diff --git a/.dockerignore b/.dockerignore
index 3466d3150..9f1a0a399 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,4 +1,3 @@
-*
 !*.py
 !requirements.txt
 !images/*
diff --git a/Escrita_de_Minuta.py b/Escrita_de_Minuta.py
new file mode 100644
index 000000000..06dbe3060
--- /dev/null
+++ b/Escrita_de_Minuta.py
@@ -0,0 +1,172 @@
+import streamlit as st
+from rag_utils.config import init
+from rag_utils.pipeline import RAG_document_retrieval
+import base64
+import threading
+import logging
+import time
+
+
+session_state_status_percent = 0
+
+
+def parte_compradora_agents_thread(uploaded_files):
+    global session_state_status_percent
+
+    if uploaded_files:
+        st.session_state.status = "Processando documentos da parte compradora..."
+        logging.info("Parte compradora: Iniciando o processamento dos documentos.")
+        
+        session_state_status_percent = 0
+        len_uploaded_files = len(uploaded_files)
+        
+        # Simulate processing each uploaded file
+        for p, uploaded_file in enumerate(uploaded_files):
+            # Simulate processing time
+            time.sleep(1)
+            st.session_state.status = f"Processando {uploaded_file.name}..."
+            session_state_status_percent = (p+1) / len_uploaded_files
+            logging.info(f"Parte compradora: Processando {uploaded_file.name}...")
+            logging.info(f"Parte compradora (Thread): Progresso {session_state_status_percent:.2%}")
+
+            # Here you would typically call your RAG_document_retrieval function
+            # For example: RAG_document_retrieval(uploaded_file)
+        
+        st.session_state.status = "Documentos da parte compradora processados com sucesso!"
+        logging.info("Parte compradora: Documentos processados com sucesso!")
+
+
+def parte_compradora_button_callback(uploaded_files, container):
+    global session_state_status_percent
+    
+    thread = threading.Thread(
+        target=parte_compradora_agents_thread,
+        args=(uploaded_files,),
+        daemon=True
+    )
+    thread.start()
+    
+    with container:
+        bar = st.progress(0, text_ocr)
+        while session_state_status_percent*100 < 100:
+            time.sleep(0.1)
+            bar.progress(session_state_status_percent, text_ocr)
+            logging.info(f"Parte compradora: Progresso {session_state_status_percent:.2%}")
+        bar.empty()
+        thread.join()
+
+    session_state_status_percent = 0
+    st.session_state.status = "Processamento finalizado!"
+    logging.info("Parte compradora: Processamento finalizado!")
+
+
+def parte_vendedora_button_callback():
+    pass
+
+
+def imovel_button_callback():
+    pass
+
+
+def container_files_uploader_and_text_writer(container, labels: dict, key, callback):
+    container.markdown(f"**{labels['markdown_label']}**")
+    
+    uploaded_files = container.file_uploader(
+        labels['file_uploader_label'],
+        type=["pdf", "jpg", "jpeg", "png"],
+        key=f"{key}_file_uploader",
+        accept_multiple_files=True
+    )
+    
+    write_text_button = container.button(
+        labels['button_label'],
+        help="Clique para gerar o parágrafo com as informações extraídas dos documentos.",
+        disabled=not uploaded_files,
+        on_click=callback,
+        args=(uploaded_files, container),
+        key=f"{key}_button"
+    )
+    
+    if uploaded_files and write_text_button:
+        container.write(f"Status: {st.session_state.status}")
+
+logging.basicConfig(level = logging.INFO)
+
+if 'init' not in st.session_state:
+    st.session_state.init = True
+    if 'status' not in st.session_state:
+        st.session_state.status = "Aguardando o upload dos documentos..."
+    init()
+
+if 'init_writer_page' not in st.session_state:
+    st.session_state.init_buyer_writer_page = True
+
+    st.session_state.buyer_documents_list = [
+        'CNH Comprador', 
+        'Comprovante de Residência Comprador', 
+        'Certidão de Casamento Comprador',
+        'Pacto Antenupcial ou Declaração de União Estável',
+        'CNH Cônjuge',
+        'Quitação ITBI'
+    ]
+    
+    st.session_state.owner_documents_list = [
+        'CNH Vendedor',
+        'Comprovante de Residência Vendedor',
+        'Matrícula do Imóvel'
+    ]
+
+text_ocr = "Extraindo informações dos documentos..."
+
+st.title(body='✍️ StartLegal - Escritor de Minutas')
+st.header("Assistente de Elaboração de Escrituras", divider='gray', )
+
+st.write(
+    "Anexe os documentos necessários das partes compradora e vendedora e a escritura do imóvel."
+)
+
+parte_compradora = st.container()
+
+container_files_uploader_and_text_writer(
+    container=parte_compradora,
+    labels={
+        'markdown_label': '**Parte Compradora**',
+        'file_uploader_label': 'Anexe os documentos da parte compradora',
+        'button_label': 'Gerar Parágrafo',
+        'progress_text': text_ocr
+    },
+    key='parte_compradora',
+    callback=parte_compradora_button_callback
+)
+
+st.divider()
+
+parte_vendedora = st.container()
+
+container_files_uploader_and_text_writer(
+    container=parte_vendedora,
+    labels={
+        'markdown_label': '**Parte Vendedora**',
+        'file_uploader_label': 'Anexe os documentos da parte vendedora',
+        'button_label': 'Gerar Parágrafo',
+        'progress_text': text_ocr
+    },
+    key='parte_vendedora',
+    callback=parte_vendedora_button_callback
+)
+
+st.divider()
+
+imovel = st.container()
+
+container_files_uploader_and_text_writer(
+    container=imovel,
+    labels={
+        'markdown_label': '**Escritura do Imóvel**',
+        'file_uploader_label': 'Anexe a escritura do imóvel',
+        'button_label': 'Gerar Parágrafo',
+        'progress_text': text_ocr
+    },
+    key='imovel',
+    callback=imovel_button_callback
+)
diff --git a/Revisor_de_Minuta.py b/Revisor_de_Minuta.py
new file mode 100644
index 000000000..65369e8de
--- /dev/null
+++ b/Revisor_de_Minuta.py
@@ -0,0 +1,27 @@
+import streamlit as st
+
+st.title(body='📄 StartLegal - Revisor de Minutas')
+st.header("Revisão de Minuta de Escrituras", divider='gray', )
+
+st.write(
+    "Anexe a minuta de uma escritura e em seguida os documentos necessários para revisão."
+)
+
+doc_ = '''Siga os passos abaixo para revisar informações da Minuta:
+1. No menu à esquerda, clique em "Anexar Minuta" para inserir uma minuta no sistema e iniciar o processo de revisão.
+2. Em seguida clique em "Parte Compradora" e insira no sistema os documentos necessários em cada aba disponível (se necessário).
+    
+    2.1. Aguarde o sistema extrair as informações e realizar a comparação com a Minuta fornecida.
+
+    2.2. Caso encontre alguma inconsistência, reportar o escrivão e finalizar o processo de revisão.
+
+3. Por último, clique em "Parte Vendedora" e insira os documentos solicitados.
+
+    3.1. Aguarde o sistema extrair as informações e realizar a comparação com a Minuta fornecida.
+
+    3.2 Caso encontre alguma inconsistência, reportar o escrivão e finalizar o processo de revisão.
+'''
+
+st.markdown(
+    doc_
+)
\ No newline at end of file
diff --git a/StartLegal.py b/StartLegal.py
new file mode 100644
index 000000000..8879df430
--- /dev/null
+++ b/StartLegal.py
@@ -0,0 +1,22 @@
+import streamlit as st
+from streamlit.logger import get_logger
+
+
+st.set_page_config(page_title="StartLegal - IA para Cartórios", page_icon="🤖", layout="wide")
+
+logger = get_logger(__name__)
+
+escritor_page = st.Page("Escrita_de_Minuta.py", title="Escrita de Minuta", icon="✍️")
+
+revisor_page = st.Page("Revisor_de_Minuta.py", title="Guia de Usabilidade", icon="📄")
+upload_minuta_page = st.Page("pages/1_Anexar_Minuta.py", title="Minuta", icon="📄")
+parte_compradora_page = st.Page("pages/2_Parte_Compradora.py", title="Parte Compradora", icon="📄")
+parte_vendedora_page = st.Page("pages/3_Parte_Vendedora.py", title="Parte Vendedora", icon="📄")
+
+pg = st.navigation(
+    {
+        "Escrita de Minutas": [escritor_page],
+        "Revisão de Minutas": [revisor_page, upload_minuta_page, parte_compradora_page, parte_vendedora_page],
+    }
+)
+pg.run()
\ No newline at end of file
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/chains.py b/chains.py
index 926ced7ee..f84eca120 100644
--- a/chains.py
+++ b/chains.py
@@ -63,12 +63,9 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config=
 
 
 def load_llm(llm_name: str, logger=BaseLogger(), config={}):
-    if llm_name in ["gpt-4", "gpt-4o", "gpt-4-turbo"]:
-        logger.info("LLM: Using GPT-4")
-        return ChatOpenAI(temperature=0, model_name=llm_name, streaming=True)
-    elif llm_name == "gpt-3.5":
-        logger.info("LLM: Using GPT-3.5")
-        return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)
+    if llm_name.startswith("gpt"):
+        logger.info(f"LLM: Using OPENAI: {llm_name}")
+        return ChatOpenAI(temperature=0, model_name=llm_name)
     elif llm_name == "claudev2":
         logger.info("LLM: ClaudeV2")
         return ChatBedrock(
diff --git a/docker-compose.yml b/docker-compose.yml
index 7dacfd59c..dfbad246c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -30,7 +30,7 @@ services:
     tty: true
 
   database:
-    user: neo4j:neo4j
+    #user: neo4j:neo4j
     image: neo4j:5.23
     ports:
       - 7687:7687
@@ -86,6 +86,7 @@ services:
             - bot.py
             - pdf_bot.py
             - api.py
+            - multiple_files_bot.py
             - front-end/
     ports:
       - 8081:8080
@@ -129,6 +130,7 @@ services:
             - loader.py
             - pdf_bot.py
             - api.py
+            - multiple_files_bot.py
             - front-end/
     ports:
       - 8501:8501
@@ -168,10 +170,50 @@ services:
             - loader.py
             - bot.py
             - api.py
+            - multiple_files_bot.py
             - front-end/
     ports:
       - 8503:8503
 
+  multiple_files_bot:
+    build:
+      context: .
+      dockerfile: multiple_files_bot.Dockerfile
+    environment:
+      - NEO4J_URI=${NEO4J_URI-neo4j://database:7687}
+      - NEO4J_PASSWORD=${NEO4J_PASSWORD-password}
+      - NEO4J_USERNAME=${NEO4J_USERNAME-neo4j}
+      - OPENAI_API_KEY=${OPENAI_API_KEY-}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY-}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL-http://host.docker.internal:11434}
+      - LLM=${LLM-llama2}
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL-sentence_transformer}
+      - LANGCHAIN_ENDPOINT=${LANGCHAIN_ENDPOINT-"https://api.smith.langchain.com"}
+      - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2-false}
+      - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT}
+      - LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
+      - AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
+    networks:
+      - net
+    depends_on:
+      database:
+        condition: service_healthy
+      pull-model:
+        condition: service_completed_successfully
+    x-develop:
+      watch:
+        - action: rebuild
+          path: .
+          ignore:
+            - loader.py
+            - bot.py
+            - api.py
+            - front-end/
+    ports:
+      - 8505:8505
+
   api:
     build:
       context: .
@@ -209,6 +251,7 @@ services:
             - loader.py
             - bot.py
             - pdf_bot.py
+            - multiple_files_bot.py
             - front-end/
     ports:
       - 8504:8504
diff --git a/multiple_files_bot.Dockerfile b/multiple_files_bot.Dockerfile
new file mode 100644
index 000000000..041e367a1
--- /dev/null
+++ b/multiple_files_bot.Dockerfile
@@ -0,0 +1,35 @@
+FROM langchain/langchain
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    software-properties-common \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN apt-get update && apt-get install -y \
+    poppler-utils \
+    tesseract-ocr \
+    libtesseract-dev \
+    tesseract-ocr-por
+
+COPY requirements.txt .
+
+RUN pip install --upgrade -r requirements.txt
+
+ADD rag_utils rag_utils
+ADD pages pages
+COPY __init__.py .
+COPY StartLegal.py .
+COPY Revisor_de_Minuta.py .
+COPY Escrita_de_Minuta.py .
+COPY prompts.json .
+COPY utils.py .
+COPY chains.py .
+
+EXPOSE 8505
+
+HEALTHCHECK CMD curl --fail http://localhost:8503/_stcore/health
+
+ENTRYPOINT ["streamlit", "run", "StartLegal.py", "--server.port=8505", "--server.address=0.0.0.0"]
diff --git a/pages/1_Anexar_Minuta.py b/pages/1_Anexar_Minuta.py
new file mode 100644
index 000000000..fdc5fbdea
--- /dev/null
+++ b/pages/1_Anexar_Minuta.py
@@ -0,0 +1,133 @@
+import streamlit as st
+import logging
+from streamlit.logger import get_logger
+from rag_utils.config import init
+from rag_utils.pipeline import RAG_document_retrieval
+from utils import StreamHandler
+import base64
+
+
+logging.basicConfig(level = logging.INFO)
+
+logger = get_logger(__name__)
+
+if 'init' not in st.session_state:
+    st.session_state.init = True
+    init()
+    
+st.subheader(
+    "Anexe a minuta da escritura para iniciar a revisão.",
+    divider='gray'
+)
+
+# upload a your files
+uploaded_file_minuta = st.file_uploader(
+    "Suba o documento da Minuta em formato PDF.", 
+    accept_multiple_files=False,
+    type="pdf"
+)
+
+if uploaded_file_minuta:
+    st.write("A IA irá coletar as informações presentes no documento...")
+    
+    col1, col2 = st.columns(2, vertical_alignment="center")
+
+    with col2:
+        base64_pdf = base64.b64encode(uploaded_file_minuta.getvalue()).decode("utf-8")
+        pdf_display = (
+            f'<embed src="data:application/pdf;base64,{base64_pdf}" '
+            'width="960" height="2160" type="application/pdf"></embed>'
+        )
+        
+        st.markdown(pdf_display, unsafe_allow_html=True)
+        st.session_state.minuta_file = uploaded_file_minuta
+
+    with col1:
+        # Collect and structure data from Buyers 
+        answer = RAG_document_retrieval(
+                    document='Minuta Comprador',
+                    file=uploaded_file_minuta,
+                    prompts=st.session_state.prompts,
+                    logger=logger,
+                    embeddings=st.session_state.embeddings,
+                    vectordb_config=st.session_state.vectorstore_config,
+                    llm=st.session_state.llm,
+                    ocr_params={
+                        'pages': [0],
+                        'lang': 'por'
+                    }
+                )
+        
+        st.session_state.minuta_comprador = answer
+
+        # Print output answer
+        stream_handler = StreamHandler(st.empty())
+        for token in st.session_state.minuta_comprador:
+            stream_handler.on_llm_new_token(token=token)
+
+        # Collect and structure data from Sellers 
+        answer = RAG_document_retrieval(
+                    document='Minuta Vendedor',
+                    file=uploaded_file_minuta,
+                    prompts=st.session_state.prompts,
+                    logger=logger,
+                    embeddings=st.session_state.embeddings,
+                    vectordb_config=st.session_state.vectorstore_config,
+                    llm=st.session_state.llm,
+                    ocr_params={
+                        'pages': [0],
+                        'lang': 'por'
+                    }
+                )
+
+        st.session_state.minuta_vendedor = answer
+
+        # Print output answer
+        stream_handler2 = StreamHandler(st.empty())
+        for token in st.session_state.minuta_vendedor:
+            stream_handler2.on_llm_new_token(token=token)
+
+        # Collect and structure data from Real State/Land
+        answer = RAG_document_retrieval(
+                    document='Minuta Imóvel',
+                    file=uploaded_file_minuta,
+                    prompts=st.session_state.prompts,
+                    logger=logger,
+                    embeddings=st.session_state.embeddings,
+                    vectordb_config=st.session_state.vectorstore_config,
+                    llm=st.session_state.llm,
+                    ocr_params={
+                        'pages': [0,1],
+                        'lang': 'por'
+                    }
+                )
+
+        st.session_state.minuta_imovel = answer
+
+        # Print output answer
+        stream_handler3 = StreamHandler(st.empty())
+        for token in st.session_state.minuta_imovel:
+            stream_handler3.on_llm_new_token(token=token)
+
+else:
+    if 'minuta_file' in st.session_state:
+        col3, col4 = st.columns(2, vertical_alignment="center")
+
+        with col4:
+            base64_pdf = base64.b64encode(st.session_state.minuta_file.getvalue()).decode("utf-8")
+            pdf_display = (
+                f'<embed src="data:application/pdf;base64,{base64_pdf}" '
+                'width="960" height="2160" type="application/pdf"></embed>'
+            )
+            
+            st.markdown(pdf_display, unsafe_allow_html=True)
+        
+        with col3:
+            if 'minuta_comprador' in st.session_state:
+                st.write(st.session_state.minuta_comprador)
+
+            if 'minuta_vendedor' in st.session_state:
+                st.write(st.session_state.minuta_vendedor)
+
+            if 'minuta_imovel' in st.session_state:
+                st.write(st.session_state.minuta_imovel)
\ No newline at end of file
diff --git a/pages/2_Parte_Compradora.py b/pages/2_Parte_Compradora.py
new file mode 100644
index 000000000..7487378d3
--- /dev/null
+++ b/pages/2_Parte_Compradora.py
@@ -0,0 +1,114 @@
+import streamlit as st
+from streamlit.logger import get_logger
+import logging
+from utils import StreamHandler
+from rag_utils.config import init
+from rag_utils.pipeline import RAG_document_retrieval, RAG_document_validator
+import base64
+
+
+logging.basicConfig(level = logging.INFO)
+
+if 'init' not in st.session_state:
+    st.session_state.init = True
+    init()
+
+if 'init_buyer_review_page' not in st.session_state:
+    st.session_state.init_buyer_review_page = True
+
+    st.session_state.buyer_documents_list = [
+        'CNH Comprador', 
+        'Comprovante de Residência Comprador', 
+        'Certidão de Casamento Comprador',
+        'Pacto Antenupcial ou Declaração de União Estável',
+        'CNH Cônjuge',
+        'Quitação ITBI'
+    ]
+    st.session_state.buyer_documents_list_tab = [
+        'CNH', 
+        'Comprovante de Residência', 
+        'Certidão de Casamento',
+        'Pacto Antenupcial ou Declaração de União Estável',
+        'CNH Cônjuge',
+        'Quitação ITBI'
+    ]
+    st.session_state.final_answer = dict().fromkeys(st.session_state.buyer_documents_list)
+
+logger = get_logger(__name__)
+
+# Define a list of Documents at app init() method
+tabs = st.tabs(st.session_state.buyer_documents_list_tab)
+
+for tab, document in zip(tabs, st.session_state.buyer_documents_list):
+    with tab:
+        # upload a your files
+        uploaded_file = st.file_uploader(
+            "Suba o documento em algum desses formatos: PDF, png, jpeg, ou txt.", 
+            accept_multiple_files=False,
+            type=["png", "jpg", "jpeg", "pdf", "txt"],
+            key=document
+        )
+    
+        if uploaded_file:
+            st.write("A IA irá coletar e validar as informações presentes...")
+
+            col1, col2, col3 = st.columns(3, vertical_alignment="top")
+
+            with col1:
+                base64_pdf = base64.b64encode(uploaded_file.getvalue()).decode("utf-8")
+                pdf_display = (
+                    f'<embed src="data:application/pdf;base64,{base64_pdf}" '
+                    'width="640" height="1080" type="application/pdf"></embed>'
+                )
+                
+                st.markdown(pdf_display, unsafe_allow_html=True)
+            
+            with col3:
+                base64_pdf = base64.b64encode(st.session_state.minuta_file.getvalue()).decode("utf-8")
+                pdf_display = (
+                    f'<embed src="data:application/pdf;base64,{base64_pdf}" '
+                    'width="640" height="1080" type="application/pdf"></embed>'
+                )
+                
+                st.markdown(pdf_display, unsafe_allow_html=True)
+
+            with col2:
+                # Collect and structure data from Buyers 
+                answer = RAG_document_retrieval(
+                        document=document,
+                        file=uploaded_file,
+                        prompts=st.session_state.prompts,
+                        logger=logger,
+                        embeddings=st.session_state.embeddings,
+                        vectordb_config=st.session_state.vectorstore_config,
+                        llm=st.session_state.llm,
+                        ocr_params={
+                            'pages': None,
+                            'lang': 'por'
+                        }
+                    )
+            
+                stream_handler = StreamHandler(st.empty())
+                for token in answer:
+                    stream_handler.on_llm_new_token(token=token)
+
+                # Ask to LLM a table showing the Document data and Minuta data
+                st.write(f"Validando de {document} com os dados da Minuta.")
+
+                final_answer = RAG_document_validator(
+                    document=document,
+                    document_answer=answer,
+                    minuta_answer=st.session_state.minuta_comprador,
+                    llm=st.session_state.llm,
+                    logger=logger
+                )
+                
+                st.session_state.final_answer[document] = final_answer
+
+                stream_handler = StreamHandler(st.empty())
+                for token in final_answer:
+                    stream_handler.on_llm_new_token(token=token)
+        
+        else:
+            if st.session_state.final_answer[document]:
+                st.write(st.session_state.final_answer[document])
\ No newline at end of file
diff --git a/pages/3_Parte_Vendedora.py b/pages/3_Parte_Vendedora.py
new file mode 100644
index 000000000..eb4ebab50
--- /dev/null
+++ b/pages/3_Parte_Vendedora.py
@@ -0,0 +1,102 @@
+import streamlit as st
+from streamlit.logger import get_logger
+import logging
+from utils import StreamHandler
+from rag_utils.config import init
+from rag_utils.pipeline import RAG_document_retrieval, RAG_document_validator
+import base64
+
+
+logging.basicConfig(level = logging.INFO)
+
+logger = get_logger(__name__)
+
+if 'init' not in st.session_state:
+    st.session_state.init = True
+    init()
+
+st.session_state.owner_documents_list = [
+    'CNH Vendedor', 
+    'Comprovante de Residência Vendedor',
+    'Matrícula do Imóvel'
+]
+
+if 'init_owner_review_page' not in st.session_state:
+    st.session_state.init_owner_review_page = True
+    st.session_state.final_answer_owner = dict().fromkeys(st.session_state.owner_documents_list)
+
+# Define a list of Documents at app init() method
+tabs = st.tabs(st.session_state.owner_documents_list)
+
+for tab, document in zip(tabs, st.session_state.owner_documents_list):
+    with tab:
+        # upload a your files
+        uploaded_file = st.file_uploader(
+            "Suba o documento em algum desses formatos: PDF, png, jpeg, ou txt.", 
+            accept_multiple_files=False,
+            type=["png", "jpg", "jpeg", "pdf", "txt"],
+            key=document
+        )
+    
+        if uploaded_file:
+            st.write("A IA irá coletar e validar as informações presentes...")
+
+            col1, col2, col3 = st.columns(3, vertical_alignment="top")
+
+            with col1:
+                base64_pdf = base64.b64encode(uploaded_file.getvalue()).decode("utf-8")
+                pdf_display = (
+                    f'<embed src="data:application/pdf;base64,{base64_pdf}" '
+                    'width="640" height="1080" type="application/pdf"></embed>'
+                )
+                
+                st.markdown(pdf_display, unsafe_allow_html=True)
+            
+            with col3:
+                base64_pdf = base64.b64encode(st.session_state.minuta_file.getvalue()).decode("utf-8")
+                pdf_display = (
+                    f'<embed src="data:application/pdf;base64,{base64_pdf}" '
+                    'width="640" height="1080" type="application/pdf"></embed>'
+                )
+                
+                st.markdown(pdf_display, unsafe_allow_html=True)
+
+            with col2:
+                answer = RAG_document_retrieval(
+                            document=document,
+                            file=uploaded_file,
+                            prompts=st.session_state.prompts,
+                            logger=logger,
+                            embeddings=st.session_state.embeddings,
+                            vectordb_config=st.session_state.vectorstore_config,
+                            llm=st.session_state.llm
+                        )
+                # Print output answer
+                stream_handler = StreamHandler(st.empty())
+                for token in answer:
+                    stream_handler.on_llm_new_token(token=token)
+
+                # Ask to LLM a table showing the Document data and Minuta data
+                st.write(f"Validando dados de {document} com os dados da Minuta.")
+
+                minuta_answer = st.session_state.minuta_vendedor 
+                if document == 'Matrícula do Imóvel':
+                    minuta_answer = st.session_state.minuta_imovel
+                
+                final_answer = RAG_document_validator(
+                    document=document,
+                    document_answer=answer,
+                    minuta_answer=minuta_answer,
+                    llm=st.session_state.llm,
+                    logger=logger
+                )
+                st.session_state.final_answer_owner[document] = final_answer
+                
+                # Print output answer
+                stream_handler = StreamHandler(st.empty())
+                for token in final_answer:
+                    stream_handler.on_llm_new_token(token=token)
+            
+        else:
+            if st.session_state.final_answer_owner[document]:
+                st.write(st.session_state.final_answer_owner[document])
\ No newline at end of file
diff --git a/prompts.json b/prompts.json
new file mode 100644
index 000000000..55b4d0ca9
--- /dev/null
+++ b/prompts.json
@@ -0,0 +1,100 @@
+{
+    "Minuta Comprador": {
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Minuta Comprador'. Return all the text from the paragraph where the term 'Outorgada Compradora' exists and put it at the begining of the answer as a quote string . The user will require data from this text, always return a structured table with that data.",
+            "input": "Extraia os dados da parte compradora e cônjuge referentes a: identificação e outros documentos pessoais apresentados, profissão, estado civil, regime de separação de bens (se casado ou união estável), cartório de notas e número de registro do Pacto Antenupcial ou União Estável (se declarado), e endereço de residência. Estruture esses dados em uma tabela com título 'Minuta Dados da Parte Compradora'."
+        }
+    },
+    "Minuta Vendedor": {
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Minuta Vendedor'. Return all the text from the paragraph where the term 'Outorgante Vendedora' exists and put it at the begining of the answer as a quote string . The user will require data from this text, always return a structured table with that data.",
+            "input": "Extraia os dados da parte vendedora e cônjuge referentes a: identificação e outros documentos pessoais apresentados, profissão, estado civil, regime de separação de bens (se casado ou união estável), cartório de notas e número de registro do Pacto Antenupcial ou União Estável (se declarado), e endereço de residência. Estruture esses dados em uma tabela com título 'Minuta Dados da Parte Vendedora'."
+
+        }
+    },
+    "Minuta Imóvel": {
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Minuta Imóvel'. Return all the text from the paragraph where the term 'Cláusula 1a' exists and put it at the begining of the answer as a quote string . The user will require data from this text, always return a structured table with that data.",
+            "input": "Extraia todas as informações acerca do imóvel: nome completo do proprietário, descrição do imóvel, logradouro, número, bairro e município, identificação de lote ou quadra, natureza do terreno (se pertence às forças armadas), características de cômodos, dimensões de tamanho e localização do imóvel, número de matrícula do imóvel e cartório que registrou a matrícula. Retorne uma tabela com título 'Dados do Imóvel' com essas informações."
+        }
+    },
+    "CNH Comprador": {
+        "v1": "Extraia do documento CNH os dados nos campos: Nome completo, nacionalidade, data de nascimento, RG e órgão expedidor e CPF.",
+        "v2": "Extraia do documento CNH os dados nos campos: Nome completo (1º Nome encontrado), nacionalidade, data de nascimento, RG com órgão expedidor e CPF (localizado após o RG).",
+        "v3": "Extraia do documento CNH os dados nos campos: Nome completo (1º Nome encontrado), nacionalidade (faça inferência, se necessário), data de nascimento, RG com órgão expedidor e CPF (11 dígitos, localizado após o RG). Informe se a data de vencimento é maior que a data atual: 25/02/2025 (em caso negativo, retorne DOCUMENTO INVÁLIDO).",
+        "v4": "Extraia do documento CNH os dados nos campos (alguns campos podem possuir escrita parecida com os dados a seguir, tente buscar campo com nome parecido): Nome completo (1º Nome encontrado), nacionalidade (faça inferência, se necessário), data de nascimento, RG com órgão expedidor e CPF (11 dígitos, localizado após o RG). Informe se a data de validade é maior que a data atual: 25/02/2025 (em caso negativo, retorne DOCUMENTO INVÁLIDO).",
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'CNH Comprador'. Always return a structured table consolidating the information in the end of the answer.",
+            "input": "Extraia os dados nos campos: Nome completo (1º Nome encontrado), nacionalidade (infira, se necessário e não declare ao usuário que foi inferido), data de nascimento, RG com órgão expedidor e CPF (11 dígitos separados por '.' e '-', localizado após o RG) remova '.', '-' e '/' dos valores. Retorne uma tabela com título 'Dados do CNH do Comprador' com essas informações."
+        },
+        "resposta": "Nome Completo: MARLI SILVA DE ANDRADE; Nacionalidade: Brasileira (inferente do órgão emitente); Data de Nascimento: 19/08/1968; RG com Órgão Expedidor: 3198072 - SSP PE; CPF: Não localizado na informação fornecida; Validade do Documento: Até 29/04/2026. Como esta data está após 25/02/2025, o documento é válido."
+    },
+    "CNH Vendedor": {
+        "v1": "Extraia do documento CNH os dados nos campos: Nome completo, nacionalidade, data de nascimento, RG e órgão expedidor e CPF.",
+        "v2": "Extraia do documento CNH os dados nos campos: Nome completo (1º Nome encontrado), nacionalidade, data de nascimento, RG com órgão expedidor e CPF (localizado após o RG).",
+        "v3": "Extraia do documento CNH os dados nos campos: Nome completo (1º Nome encontrado), nacionalidade (faça inferência, se necessário), data de nascimento, RG com órgão expedidor e CPF (11 dígitos, localizado após o RG). Informe se a data de vencimento é maior que a data atual: 25/02/2025 (em caso negativo, retorne DOCUMENTO INVÁLIDO).",
+        "v4": "Extraia do documento CNH os dados nos campos (alguns campos podem possuir escrita parecida com os dados a seguir, tente buscar campo com nome parecido): Nome completo (1º Nome encontrado), nacionalidade (faça inferência, se necessário), data de nascimento, RG com órgão expedidor e CPF (11 dígitos, localizado após o RG). Informe se a data de validade é maior que a data atual: 25/02/2025 (em caso negativo, retorne DOCUMENTO INVÁLIDO).",
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'CNH Vendedor'. Always return a structured table consolidating the information in the end of the answer.",
+            "input": "Extraia os dados nos campos: Nome completo (1º Nome encontrado), nacionalidade (infira, se necessário e não declare ao usuário que foi inferido), data de nascimento, RG com órgão expedidor e CPF (11 dígitos separados por '.' e '-', localizado após o RG) remova '.', '-' e '/' dos valores. Retorne uma tabela com título 'Dados do CNH do Vendedor' com essas informações."
+        }
+    },
+    "Quitação ITBI": {
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Quitação ITBI'. Always return a structured table consolidating the information in the end of the answer.",
+            "input": "Extraia a Inscrição e/ou sequencial do imóvel na Prefeitura onde está o imóvel, nome completo da pessoa no documento e valor financeiro presente. Retorne uma tabela com título 'Dados do Comprovante de ITBI' com essas informações."
+        }
+    },
+    "Matrícula do Imóvel": {
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Matrícula do Imóvel'. Always return a structured table consolidating the information in the end of the answer.",
+            "input": "Extraia todas as informações acerca do imóvel: nome completo do proprietário, descrição do imóvel, logradouro, número, bairro e município, identificação de lote ou quadra, natureza do terreno (se pertence às forças armadas), características de cômodos, dimensões de tamanho e localização do imóvel, número de matrícula do imóvel, cartório que registrou a matrícula, e Inscrição e/ou sequencial do imóvel na Prefeitura onde está o imóvel. Retorne uma tabela com título 'Dados do Imóvel Matrícula' com essas informações."
+        }
+    },
+    "Comprovante de Residência Comprador": {
+        "v1": "Extraia do documento 'Comprovante de Residência' os dados relacionados a endereço e CEP (Caixa Postal/ZIP Code).",
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Comprovante de Residência Comprador'. Always return a structured table consolidating the information in the end of the answer.",
+            "input": "Extraia os dados relacionados a endereço e CEP (Caixa Postal/ZIP Code). Retorne uma tabela com título 'Dados do Comprovante de Residência Comprador' com essas informações."
+        }
+    },
+    "Comprovante de Residência Vendedor": {
+        "v1": "Extraia do documento 'Comprovante de Residência' os dados relacionados a endereço e CEP (Caixa Postal/ZIP Code).",
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Comprovante de Residência Vendedor'. Always return a structured table consolidating the information in the end of the answer.",
+            "input": "Extraia os dados relacionados a endereço e CEP (Caixa Postal/ZIP Code). Retorne uma tabela com título 'Dados do Comprovante de Residência Vendedor' com essas informações."
+        }
+    },
+    "Certidão de Casamento Comprador": {
+        "v1": "Extraia do documento 'Certidão de Casamento' os dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens.",
+        "v2": "Extraia do documento 'Certidão de Casamento' os dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens e extrair os dados de registro da certidão e onde a certidão foi emitida.",
+        "v3": "Extraia do documento 'Certidão de Casamento' os dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens. Extrair o número de registro da certidão, onde a certidão foi emitida e a data de emissão da Certidão.",
+        "v4": "Extraia do documento 'Certidão de Casamento' os dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens. Extrair o número de registro da certidão, onde a certidão foi emitida e a data de emissão da Certidão.",
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brazil). Always return a structured table gathering the information at the end. Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Certidão de Casamento Comprador'.",
+            "input": "Extraia dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens. Extrair o número de registro da certidão, onde a certidão foi emitida e a data de emissão da Certidão. Retorne uma tabela com título 'Dados da Certidão de Casamento Comprador' com essas informações."
+        }
+    },
+    "Certidão de Casamento Vendedor": {
+        "v1": "Extraia do documento 'Certidão de Casamento' os dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens.",
+        "v2": "Extraia do documento 'Certidão de Casamento' os dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens e extrair os dados de registro da certidão e onde a certidão foi emitida.",
+        "v3": "Extraia do documento 'Certidão de Casamento' os dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens. Extrair o número de registro da certidão, onde a certidão foi emitida e a data de emissão da Certidão.",
+        "v4": "Extraia do documento 'Certidão de Casamento' os dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens. Extrair o número de registro da certidão, onde a certidão foi emitida e a data de emissão da Certidão.",
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brazil). Always return a structured table gathering the information at the end. Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Certidão de Casamento Vendedor'.",
+            "input": "Extraia dados relacionados ao Cônjuge: Nome, Documento de Identificação e Data do Casamento. Extrair o dado sobre o tipo de Regime de Bens. Extrair o número de registro da certidão, onde a certidão foi emitida e a data de emissão da Certidão. Retorne uma tabela com título 'Dados da Certidão de Casamento Vendedor' com essas informações."
+        }
+    },
+    "CNH Cônjuge": {
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'CNH Cônjuge'. Always return a structured table consolidating the information in the end of the answer.",
+            "input": "Extraia os dados nos campos: Nome completo (1º Nome encontrado), nacionalidade (infira, se necessário e não declare ao usuário que foi inferido), data de nascimento, RG com órgão expedidor e CPF (11 dígitos separados por '.' e '-', localizado após o RG) remova '.', '-' e '/' dos valores. Retorne uma tabela com título 'Dados do CNH do Cônjuge' com essas informações."
+        }
+    },
+    "Pacto Antenupcial ou Declaração de União Estável": {
+        "latest": {
+            "prompt": "You are an assistant for question-answering tasks. Always answer in Portuguese (Brasil). Retrieve information from Context where the token 'NOME_DO_DOCUMENTO' contains 'Pacto Antenupcial ou Declaração de União Estável'. Always return a structured table consolidating the information in the end of the answer.",
+            "input": "Extrair os dados sobre o tipo de Regime de Bens, número da Escritura, cartório onde foi lavrada, informações do livro e data. Retorne uma tabela com título 'Dados do Pacto/Declaração' com essas informações."
+        }
+    }
+}
\ No newline at end of file
diff --git a/pull_model.Dockerfile b/pull_model.Dockerfile
index b06625f7d..eb0ad4cd4 100644
--- a/pull_model.Dockerfile
+++ b/pull_model.Dockerfile
@@ -6,40 +6,7 @@ FROM babashka/babashka:latest
 # just using as a client - never as a server
 COPY --from=ollama /bin/ollama ./bin/ollama
 
-COPY <<EOF pull_model.clj
-(ns pull-model
-  (:require [babashka.process :as process]
-            [clojure.core.async :as async]))
+COPY pull_model.clj /usr/src/pull_model.clj
 
-(try
-  (let [llm (get (System/getenv) "LLM")
-        url (get (System/getenv) "OLLAMA_BASE_URL")]
-    (println (format "pulling ollama model %s using %s" llm url))
-    (if (and llm 
-         url 
-         (not (#{"gpt-4" "gpt-3.5" "claudev2" "gpt-4o" "gpt-4-turbo"} llm))
-         (not (some #(.startsWith llm %) ["ai21.jamba-instruct-v1:0"
-                                          "amazon.titan"
-                                          "anthropic.claude"
-                                          "cohere.command"
-                                          "meta.llama"
-                                          "mistral.mi"])))
-
-      ;; ----------------------------------------------------------------------
-      ;; just call `ollama pull` here - create OLLAMA_HOST from OLLAMA_BASE_URL
-      ;; ----------------------------------------------------------------------
-      ;; TODO - this still doesn't show progress properly when run from docker compose
-
-      (let [done (async/chan)]
-        (async/go-loop [n 0]
-          (let [[v _] (async/alts! [done (async/timeout 5000)])]
-            (if (= :stop v) :stopped (do (println (format "... pulling model (%ss) - will take several minutes" (* n 10))) (recur (inc n))))))
-        (process/shell {:env {"OLLAMA_HOST" url "HOME" (System/getProperty "user.home")} :out :inherit :err :inherit} (format "bash -c './bin/ollama show %s --modelfile > /dev/null || ./bin/ollama pull %s'" llm llm))
-        (async/>!! done :stop))
-
-      (println "OLLAMA model only pulled if both LLM and OLLAMA_BASE_URL are set and the LLM model is not gpt")))
-  (catch Throwable _ (System/exit 1)))
-EOF
-
-ENTRYPOINT ["bb", "-f", "pull_model.clj"]
+ENTRYPOINT ["bb", "-f", "/usr/src/pull_model.clj"]
 
diff --git a/pull_model.clj b/pull_model.clj
new file mode 100644
index 000000000..ed9d3b0be
--- /dev/null
+++ b/pull_model.clj
@@ -0,0 +1,33 @@
+(ns pull-model
+  (:require [babashka.process :as process]
+            [clojure.core.async :as async]))
+
+(try
+  (let [llm (get (System/getenv) "LLM")
+        url (get (System/getenv) "OLLAMA_BASE_URL")]
+    (println (format "pulling ollama model %s using %s" llm url))
+    (if (and llm
+         url
+         (not (some #(.startsWith llm %) ["gpt" 
+                                          "claudev2"]))
+         (not (some #(.startsWith llm %) ["ai21.jamba-instruct-v1:0"
+                                          "amazon.titan"
+                                          "anthropic.claude"
+                                          "cohere.command"
+                                          "meta.llama"
+                                          "mistral.mi"])))
+
+      ;; ----------------------------------------------------------------------
+      ;; just call `ollama pull` here - create OLLAMA_HOST from OLLAMA_BASE_URL
+      ;; ----------------------------------------------------------------------
+      ;; TODO - this still doesn't show progress properly when run from docker compose
+
+      (let [done (async/chan)]
+        (async/go-loop [n 0]
+          (let [[v _] (async/alts! [done (async/timeout 5000)])]
+            (if (= :stop v) :stopped (do (println (format "... pulling model (%ss) - will take several minutes" (* n 10))) (recur (inc n))))))
+        (process/shell {:env {"OLLAMA_HOST" url "HOME" (System/getProperty "user.home")} :out :inherit :err :inherit} (format "bash -c './bin/ollama show %s --modelfile > /dev/null || ./bin/ollama pull %s'" llm llm))
+        (async/>!! done :stop))
+
+      (println "OLLAMA model only pulled if both LLM and OLLAMA_BASE_URL are set and the LLM model is not gpt")))
+  (catch Throwable _ (System/exit 1)))
diff --git a/rag_utils/__init__.py b/rag_utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/rag_utils/config.py b/rag_utils/config.py
new file mode 100644
index 000000000..327797aa3
--- /dev/null
+++ b/rag_utils/config.py
@@ -0,0 +1,49 @@
+import streamlit as st
+from streamlit.logger import get_logger
+import os
+import json
+
+from chains import (
+    load_embedding_model,
+    load_llm,
+)
+
+
+logger = get_logger(__name__)
+
+# load api key lib
+from dotenv import load_dotenv
+
+
+def init():
+    load_dotenv(".env")
+
+    st.session_state.vectorstore_config = dict()
+    st.session_state.vectorstore_config['url'] = os.getenv("NEO4J_URI")
+    st.session_state.vectorstore_config['username'] = os.getenv("NEO4J_USERNAME")
+    st.session_state.vectorstore_config['password'] = os.getenv("NEO4J_PASSWORD")
+    
+    ollama_base_url = os.getenv("OLLAMA_BASE_URL")
+    embedding_model_name = os.getenv("EMBEDDING_MODEL")
+    llm_name = os.getenv("LLM")
+    # Remapping for Langchain Neo4j integration
+    os.environ["NEO4J_URL"] = st.session_state.vectorstore_config['url']
+
+    embeddings, dimension = load_embedding_model(
+        embedding_model_name, 
+        config={"ollama_base_url": ollama_base_url}, 
+        logger=logger
+    )
+    st.session_state.embeddings = embeddings
+    st.session_state.dimension = dimension
+
+    prompts = dict()
+    with open('prompts.json', 'rb') as f:
+        prompts = json.load(f)
+    
+    st.session_state.prompts = prompts
+    st.session_state.llm = load_llm(
+        llm_name, 
+        logger=logger, 
+        config={"ollama_base_url": ollama_base_url}
+    )
diff --git a/rag_utils/content_indexing.py b/rag_utils/content_indexing.py
new file mode 100644
index 000000000..e8a1ad91b
--- /dev/null
+++ b/rag_utils/content_indexing.py
@@ -0,0 +1,127 @@
+from pdf2image import convert_from_bytes
+from PIL import Image
+import pytesseract
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Neo4jVector
+import logging
+
+
+def document_encoder_retriever(
+        document_name: str, 
+        uploaded_file,
+        ocr_params: dict,
+        logger: logging.Logger, 
+        vectorstore_config: dict, 
+        embeddings
+):
+    '''
+    Indexing Phase:
+        Documents are transformed into vector representations using dense embeddings.
+        These vectors are stored in a vector database.
+    '''
+
+    ocr_pages = ocr_params.get('pages', None)
+    ocr_lang = ocr_params.get('lang', None)
+    
+    if uploaded_file:
+        bytes_data = uploaded_file.getvalue()
+        file_format = uploaded_file.name.split('.')[1].lower()
+        
+        # Extract text from document
+        if ocr_lang and type(ocr_lang) == str:
+            text = documents_OCR(
+                uploaded_file, 
+                logger, 
+                bytes_data, 
+                file_format, 
+                pages=ocr_pages, 
+                lang=ocr_lang
+            )    
+        else:
+            text = documents_OCR(uploaded_file, logger, bytes_data, file_format)
+        
+        # langchain_textspliter
+        chunks = text_chunking(document_name, text)
+
+        # Store the chuncks part in db (vector)
+        vectorstore = build_vectorstore(document_name, vectorstore_config, embeddings, chunks)
+
+        return vectorstore
+
+
+def documents_OCR(uploaded_file, logger, bytes_data, file_format, pages=None, lang='por'):
+    '''
+    OCR Step:
+        Extract text from PDFs, images and txt files. 
+    '''
+    text = ""
+
+    match file_format:
+        case 'pdf':
+            images = convert_from_bytes(bytes_data)
+
+            if not pages:
+                pages = list(range(len(images)))
+
+            for i, image in enumerate(images):
+                if i not in pages:
+                    continue
+
+                text += f"Página: {i} \n\n" + pytesseract.image_to_string(image, lang=lang)
+
+        case 'txt':
+            for line in uploaded_file:
+                text += line
+
+        case 'png':
+            text += pytesseract.image_to_string(Image.open(uploaded_file), lang=lang)
+
+        case 'jpg':
+            text += pytesseract.image_to_string(Image.open(uploaded_file), lang=lang)
+
+        case 'jpeg':
+            text += pytesseract.image_to_string(Image.open(uploaded_file), lang=lang)
+
+        case _:
+            logger.error(f"Formato do arquivo: {uploaded_file.name} não é suportado!")
+            
+    return text
+
+
+def text_chunking(document_name, text, size=10000, overlap=200, text_splitter=None):
+    '''
+    Chuncking Step:
+        Split document content into smaller segments called chunks. 
+        These can be paragraphs, sentences, or token-limited segments, making it easier for the model to search and retrieve only what's needed. 
+        The chunking technique is crucial for optimizing RAG performance.
+    '''
+    if not text_splitter:
+        text_splitter = RecursiveCharacterTextSplitter(
+                                chunk_size=size,
+                                chunk_overlap=overlap,
+                                length_function=len, 
+                                separators=['\n\n', '\n']
+                        )
+    
+    chunks = text_splitter.split_text(text=text)
+    chunks = [f"NOME_DO_DOCUMENTO: {document_name} " + chunk for chunk in chunks]
+    return chunks
+
+
+def build_vectorstore(reference_name, vectorstore_config, embeddings, chunks):
+    '''
+    Store Embeddings Step:
+        Enconding all chunks as dense embeddings representation and store them in a Vector Database.
+    '''
+    vectorstore = Neo4jVector.from_texts(
+            chunks,
+            url=vectorstore_config['url'],
+            username=vectorstore_config['username'],
+            password=vectorstore_config['password'],
+            embedding=embeddings,
+            node_label=f"MultipleFilesBotChunk_{reference_name}",
+            pre_delete_collection=True, # Delete existing data in collection
+        )
+    
+    return vectorstore
+
diff --git a/rag_utils/pipeline.py b/rag_utils/pipeline.py
new file mode 100644
index 000000000..26b27aacf
--- /dev/null
+++ b/rag_utils/pipeline.py
@@ -0,0 +1,87 @@
+from rag_utils.content_indexing import document_encoder_retriever
+from rag_utils.qa_document_retrieval import build_agent
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_community.callbacks import get_openai_callback
+import logging
+
+
+def RAG_document_retrieval(
+    document, 
+    file,
+    prompts, 
+    logger: logging.Logger, 
+    embeddings, 
+    vectordb_config,
+    llm,
+    ocr_params={'pages': None, 'lang': 'por'}
+) -> str:
+    # Text extraction and embedding using OCR and LLM to build a QA RAG
+    document_retriever = document_encoder_retriever(
+        document_name=document, 
+        uploaded_file=file,
+        ocr_params=ocr_params, 
+        logger=logger, 
+        embeddings=embeddings,
+        vectorstore_config=vectordb_config
+    )
+
+    # prepare prompt with instructions
+    instructions = prompts[document].get('latest')['prompt']
+    agent = build_agent(
+        prompt=instructions, 
+        vectorstore=document_retriever, 
+        logger=logger, 
+        llm=llm
+    )
+
+    # QA RAG document retrieval
+    query = prompts[document].get('latest')['input']
+
+    with get_openai_callback() as cb:
+        answer = agent.invoke({'input': query})['answer']
+
+    logger.info(f"Total Tokens: {cb.total_tokens}")
+    logger.info(f"Prompt Tokens: {cb.prompt_tokens}")
+    logger.info(f"Completion Tokens: {cb.completion_tokens}")
+    logger.info(f"Total Cost (USD): ${cb.total_cost}")
+    
+    return answer
+
+
+def RAG_document_validator(document, document_answer, minuta_answer, llm, logger: logging.Logger):
+    
+    # Build context aggregating information from document and Minuta
+    context = f"Tabela {document} " + \
+        document_answer + "| Tabela Minuta" + \
+        minuta_answer
+
+    # Instructions of how to check if Minuta information matches document information
+    system_prompt = """ 
+    Você é um assistente que compara dados obtidos de diferentes documentos. 
+    O usuário fornecerá duas tabelas após o termo 'Contexto'.
+    Auxilie o usuário a checar se os dados nessas duas tabelas estão escritos da mesma forma. 
+    A comparação dos dados em comum precisa estar numa tabela. 
+    Dados que aparecem em apenas uma das tabelas fornecidas não precisa aparecer na tabela de comparação.
+    A comparação pode ignorar diferenças entre letras maiúsculas e minúsculas, e a presença de símbolos '.', '-', ou '/'.
+    A tabela de comparação precisa ter uma coluna 'Validação' que indica se os dados foram escritos de forma idêntica. 
+    """ + f" Contexto: {context} "
+    prompt = ChatPromptTemplate(
+            [
+                ("system", system_prompt),
+                ("human", "{input}")
+            ]
+        )
+    
+    chain = prompt | llm | StrOutputParser()
+    
+    # QA RAG document validation
+    with get_openai_callback() as cb:
+        answer = chain.invoke(f"Compare apenas os dados do {document} os quais também estejam presentes na Minuta.")
+        
+    logger.info(f"Total Tokens: {cb.total_tokens}")
+    logger.info(f"Prompt Tokens: {cb.prompt_tokens}")
+    logger.info(f"Completion Tokens: {cb.completion_tokens}")
+    logger.info(f"Total Cost (USD): ${cb.total_cost}")
+
+    return answer
\ No newline at end of file
diff --git a/rag_utils/qa_document_retrieval.py b/rag_utils/qa_document_retrieval.py
new file mode 100644
index 000000000..9e89eb05f
--- /dev/null
+++ b/rag_utils/qa_document_retrieval.py
@@ -0,0 +1,31 @@
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.chains import create_retrieval_chain
+from langchain_core.output_parsers import StrOutputParser
+from langchain.chains.combine_documents import create_stuff_documents_chain
+import logging
+
+
+def build_agent(prompt, vectorstore, logger: logging.Logger, history_context="", llm=None):
+    if not llm:
+        logger.error("LLM is not available!")
+
+        return None
+    
+    if not prompt:
+        # st.session_state.prompts[document_name].get('latest')['prompt']
+        prompt = "You are a user assistant. Answer the questions using only the context provided." 
+
+    system_prompt = prompt + " Context: {context} " + history_context + " "
+
+    chat_prompt = ChatPromptTemplate(
+            [
+                ("system", system_prompt),
+                ("human", "{input}")
+            ]
+        )
+    
+    qa_chain = create_stuff_documents_chain(llm, chat_prompt)
+
+    agent_document_retrieval = create_retrieval_chain(vectorstore.as_retriever(), qa_chain)
+    
+    return agent_document_retrieval
diff --git a/requirements.txt b/requirements.txt
index 2670d2535..02aca6e66 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,15 +2,16 @@ python-dotenv
 wikipedia
 tiktoken
 neo4j
-streamlit
+streamlit==1.44.0
 Pillow
 fastapi
+pdf2image==1.17.0
 PyPDF2
+pytesseract
 pydantic
 uvicorn
 sse-starlette
 boto3
-streamlit==1.32.1
 # missing from the langchain base image?
 langchain-openai==0.2.4
 langchain-community==0.3.3
diff --git a/utils.py b/utils.py
index 23ad5b63d..15ecf6764 100644
--- a/utils.py
+++ b/utils.py
@@ -1,8 +1,21 @@
+from langchain.callbacks.base import BaseCallbackHandler
+
+
 class BaseLogger:
     def __init__(self) -> None:
         self.info = print
 
 
+class StreamHandler(BaseCallbackHandler):
+    def __init__(self, container, initial_text=""):
+        self.container = container
+        self.text = initial_text
+
+    def on_llm_new_token(self, token: str, **kwargs) -> None:
+        self.text += token
+        self.container.markdown(self.text)
+
+
 def extract_title_and_question(input_string):
     lines = input_string.strip().split("\n")