Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
dc3613d
fix: Dockerfile to build and pull images.
leonardovaleriano Jan 21, 2025
6940459
feat: bot using tesseract to extract text from images and implementRAG.
leonardovaleriano Feb 13, 2025
88e27bb
Creating a prompt for each document.
leonardovaleriano Mar 29, 2025
8ff1437
refactoring app to process and run queries about the documents.
leonardovaleriano Mar 29, 2025
69003b9
remove commented code lines.
leonardovaleriano Mar 29, 2025
0efcd32
feat: minuta prompt and input query.
leonardovaleriano Apr 8, 2025
f41f7ff
feat: minuta pdf content extraction and RAG agent.
leonardovaleriano Apr 8, 2025
c95ea88
feat: prompt to compare and validate data from two tables.
leonardovaleriano Apr 13, 2025
df0b7eb
refactor:add package for rag systems and split steps on 3 pages.
leonardovaleriano Apr 16, 2025
3a31f6b
Merge pull request #1 from leonardovaleriano/feature/bot_using_multip…
leonardovaleriano Apr 16, 2025
a89cf38
feat: RAG package for multiple documents content retrieval.
leonardovaleriano Apr 23, 2025
4b8e992
refactor:streamlit pages for minuta, parte compradora and parte vende…
leonardovaleriano Apr 23, 2025
a91895e
refactor:rename file.
leonardovaleriano Apr 23, 2025
6794c3e
refactor: prompt engineering and add Streaming Output tokens at utils…
leonardovaleriano Apr 23, 2025
14e471a
Changing AI to OpenAI API
leonardovaleriano Apr 24, 2025
43fac6b
feat:output text used as Context
leonardovaleriano Apr 24, 2025
a0c1e49
feat:add monitoring logs about tokens
leonardovaleriano Apr 24, 2025
e9b04ef
refactor: allow more versions from OpenAI models
leonardovaleriano Apr 24, 2025
b255394
Merge pull request #2 from leonardovaleriano/feature/bot_using_multip…
leonardovaleriano Apr 25, 2025
b6b2de5
Merge branch 'feature/bot_using_multiple_files' into develop
leonardovaleriano Apr 25, 2025
ab4cd41
feature:add document viewer at all pages
leonardovaleriano Apr 25, 2025
4699162
Merge pull request #3 from leonardovaleriano/feature/document_viewer
leonardovaleriano Apr 25, 2025
b577f41
feat:changing layout to top.
leonardovaleriano May 20, 2025
bd34290
Merge branch 'feature/document_viewer' into develop
leonardovaleriano Jun 17, 2025
35186bd
Merge branch 'release/startlegal_modulo_revisor'
leonardovaleriano Jun 17, 2025
0e5e6f2
Merge tag 'vstartlegal_modulo_revisor' into develop
leonardovaleriano Jun 17, 2025
85e7def
Adding page for new feature. Organizing home page to route multiple p…
leonardovaleriano Jun 17, 2025
6a3b029
Feature layout.
leonardovaleriano Jun 17, 2025
9003a7a
Setting up Wide layout configuration.
leonardovaleriano Jun 17, 2025
bf2821f
feat:Threading processing and progress bar synchronization.
leonardovaleriano Jun 18, 2025
cf90b95
fix: counter percentage instead of cumulative counter. reset counter …
leonardovaleriano Jun 18, 2025
417fe3f
Merge branch 'feature/escrita_minuta' into develop
leonardovaleriano Jun 18, 2025
71290a2
Merge branch 'release/2.0'
leonardovaleriano Jun 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
*
!*.py
!requirements.txt
!images/*
Expand Down
172 changes: 172 additions & 0 deletions Escrita_de_Minuta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import streamlit as st
from rag_utils.config import init
from rag_utils.pipeline import RAG_document_retrieval
import base64
import threading
import logging
import time


session_state_status_percent = 0


def parte_compradora_agents_thread(uploaded_files):
global session_state_status_percent

if uploaded_files:
st.session_state.status = "Processando documentos da parte compradora..."
logging.info("Parte compradora: Iniciando o processamento dos documentos.")

session_state_status_percent = 0
len_uploaded_files = len(uploaded_files)

# Simulate processing each uploaded file
for p, uploaded_file in enumerate(uploaded_files):
# Simulate processing time
time.sleep(1)
st.session_state.status = f"Processando {uploaded_file.name}..."
session_state_status_percent = (p+1) / len_uploaded_files
logging.info(f"Parte compradora: Processando {uploaded_file.name}...")
logging.info(f"Parte compradora (Thread): Progresso {session_state_status_percent:.2%}")

# Here you would typically call your RAG_document_retrieval function
# For example: RAG_document_retrieval(uploaded_file)

st.session_state.status = "Documentos da parte compradora processados com sucesso!"
logging.info("Parte compradora: Documentos processados com sucesso!")


def parte_compradora_button_callback(uploaded_files, container):
global session_state_status_percent

thread = threading.Thread(
target=parte_compradora_agents_thread,
args=(uploaded_files,),
daemon=True
)
thread.start()

with container:
bar = st.progress(0, text_ocr)
while session_state_status_percent*100 < 100:
time.sleep(0.1)
bar.progress(session_state_status_percent, text_ocr)
logging.info(f"Parte compradora: Progresso {session_state_status_percent:.2%}")
bar.empty()
thread.join()

session_state_status_percent = 0
st.session_state.status = "Processamento finalizado!"
logging.info("Parte compradora: Processamento finalizado!")


def parte_vendedora_button_callback():
pass


def imovel_button_callback():
pass


def container_files_uploader_and_text_writer(container, labels: dict, key, callback):
container.markdown(f"**{labels['markdown_label']}**")

uploaded_files = container.file_uploader(
labels['file_uploader_label'],
type=["pdf", "jpg", "jpeg", "png"],
key=f"{key}_file_uploader",
accept_multiple_files=True
)

write_text_button = container.button(
labels['button_label'],
help="Clique para gerar o parágrafo com as informações extraídas dos documentos.",
disabled=not uploaded_files,
on_click=callback,
args=(uploaded_files, container),
key=f"{key}_button"
)

if uploaded_files and write_text_button:
container.write(f"Status: {st.session_state.status}")

logging.basicConfig(level = logging.INFO)

if 'init' not in st.session_state:
st.session_state.init = True
if 'status' not in st.session_state:
st.session_state.status = "Aguardando o upload dos documentos..."
init()

if 'init_writer_page' not in st.session_state:
st.session_state.init_buyer_writer_page = True

st.session_state.buyer_documents_list = [
'CNH Comprador',
'Comprovante de Residência Comprador',
'Certidão de Casamento Comprador',
'Pacto Antenupcial ou Declaração de União Estável',
'CNH Cônjuge',
'Quitação ITBI'
]

st.session_state.owner_documents_list = [
'CNH Vendedor',
'Comprovante de Residência Vendedor',
'Matrícula do Imóvel'
]

text_ocr = "Extraindo informações dos documentos..."

st.title(body='✍️ StartLegal - Escritor de Minutas')
st.header("Assistente de Elaboração de Escrituras", divider='gray', )

st.write(
"Anexe os documentos necessários das partes compradora e vendedora e a escritura do imóvel."
)

parte_compradora = st.container()

container_files_uploader_and_text_writer(
container=parte_compradora,
labels={
'markdown_label': '**Parte Compradora**',
'file_uploader_label': 'Anexe os documentos da parte compradora',
'button_label': 'Gerar Parágrafo',
'progress_text': text_ocr
},
key='parte_compradora',
callback=parte_compradora_button_callback
)

st.divider()

parte_vendedora = st.container()

container_files_uploader_and_text_writer(
container=parte_vendedora,
labels={
'markdown_label': '**Parte Vendedora**',
'file_uploader_label': 'Anexe os documentos da parte vendedora',
'button_label': 'Gerar Parágrafo',
'progress_text': text_ocr
},
key='parte_vendedora',
callback=parte_vendedora_button_callback
)

st.divider()

imovel = st.container()

container_files_uploader_and_text_writer(
container=imovel,
labels={
'markdown_label': '**Escritura do Imóvel**',
'file_uploader_label': 'Anexe a escritura do imóvel',
'button_label': 'Gerar Parágrafo',
'progress_text': text_ocr
},
key='imovel',
callback=imovel_button_callback
)
27 changes: 27 additions & 0 deletions Revisor_de_Minuta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import streamlit as st

st.title(body='📄 StartLegal - Revisor de Minutas')
st.header("Revisão de Minuta de Escrituras", divider='gray', )

st.write(
"Anexe a minuta de uma escritura e em seguida os documentos necessários para revisão."
)

doc_ = '''Siga os passos abaixo para revisar informações da Minuta:
1. No menu à esquerda, clique em "Anexar Minuta" para inserir uma minuta no sistema e iniciar o processo de revisão.
2. Em seguida clique em "Parte Compradora" e insira no sistema os documentos necessários em cada aba disponível (se necessário).

2.1. Aguarde o sistema extrair as informações e realizar a comparação com a Minuta fornecida.

2.2. Caso encontre alguma inconsistência, reportar o escrivão e finalizar o processo de revisão.

3. Por último, clique em "Parte Vendedora" e insira os documentos solicitados.

3.1. Aguarde o sistema extrair as informações e realizar a comparação com a Minuta fornecida.

3.2 Caso encontre alguma inconsistência, reportar o escrivão e finalizar o processo de revisão.
'''

st.markdown(
doc_
)
22 changes: 22 additions & 0 deletions StartLegal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import streamlit as st
from streamlit.logger import get_logger


st.set_page_config(page_title="StartLegal - IA para Cartórios", page_icon="🤖", layout="wide")

logger = get_logger(__name__)

escritor_page = st.Page("Escrita_de_Minuta.py", title="Escrita de Minuta", icon="✍️")

revisor_page = st.Page("Revisor_de_Minuta.py", title="Guia de Usabilidade", icon="📄")
upload_minuta_page = st.Page("pages/1_Anexar_Minuta.py", title="Minuta", icon="📄")
parte_compradora_page = st.Page("pages/2_Parte_Compradora.py", title="Parte Compradora", icon="📄")
parte_vendedora_page = st.Page("pages/3_Parte_Vendedora.py", title="Parte Vendedora", icon="📄")

pg = st.navigation(
{
"Escrita de Minutas": [escritor_page],
"Revisão de Minutas": [revisor_page, upload_minuta_page, parte_compradora_page, parte_vendedora_page],
}
)
pg.run()
Empty file added __init__.py
Empty file.
9 changes: 3 additions & 6 deletions chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,9 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config=


def load_llm(llm_name: str, logger=BaseLogger(), config={}):
if llm_name in ["gpt-4", "gpt-4o", "gpt-4-turbo"]:
logger.info("LLM: Using GPT-4")
return ChatOpenAI(temperature=0, model_name=llm_name, streaming=True)
elif llm_name == "gpt-3.5":
logger.info("LLM: Using GPT-3.5")
return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)
if llm_name.startswith("gpt"):
logger.info(f"LLM: Using OPENAI: {llm_name}")
return ChatOpenAI(temperature=0, model_name=llm_name)
elif llm_name == "claudev2":
logger.info("LLM: ClaudeV2")
return ChatBedrock(
Expand Down
45 changes: 44 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ services:
tty: true

database:
user: neo4j:neo4j
#user: neo4j:neo4j
image: neo4j:5.23
ports:
- 7687:7687
Expand Down Expand Up @@ -86,6 +86,7 @@ services:
- bot.py
- pdf_bot.py
- api.py
- multiple_files_bot.py
- front-end/
ports:
- 8081:8080
Expand Down Expand Up @@ -129,6 +130,7 @@ services:
- loader.py
- pdf_bot.py
- api.py
- multiple_files_bot.py
- front-end/
ports:
- 8501:8501
Expand Down Expand Up @@ -168,10 +170,50 @@ services:
- loader.py
- bot.py
- api.py
- multiple_files_bot.py
- front-end/
ports:
- 8503:8503

multiple_files_bot:
build:
context: .
dockerfile: multiple_files_bot.Dockerfile
environment:
- NEO4J_URI=${NEO4J_URI-neo4j://database:7687}
- NEO4J_PASSWORD=${NEO4J_PASSWORD-password}
- NEO4J_USERNAME=${NEO4J_USERNAME-neo4j}
- OPENAI_API_KEY=${OPENAI_API_KEY-}
- GOOGLE_API_KEY=${GOOGLE_API_KEY-}
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL-http://host.docker.internal:11434}
- LLM=${LLM-llama2}
- EMBEDDING_MODEL=${EMBEDDING_MODEL-sentence_transformer}
- LANGCHAIN_ENDPOINT=${LANGCHAIN_ENDPOINT-"https://api.smith.langchain.com"}
- LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2-false}
- LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT}
- LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY}
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
networks:
- net
depends_on:
database:
condition: service_healthy
pull-model:
condition: service_completed_successfully
x-develop:
watch:
- action: rebuild
path: .
ignore:
- loader.py
- bot.py
- api.py
- front-end/
ports:
- 8505:8505

api:
build:
context: .
Expand Down Expand Up @@ -209,6 +251,7 @@ services:
- loader.py
- bot.py
- pdf_bot.py
- multiple_files_bot.py
- front-end/
ports:
- 8504:8504
Expand Down
35 changes: 35 additions & 0 deletions multiple_files_bot.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FROM langchain/langchain

WORKDIR /app

RUN apt-get update && apt-get install -y \
build-essential \
curl \
software-properties-common \
&& rm -rf /var/lib/apt/lists/*

RUN apt-get update && apt-get install -y \
poppler-utils \
tesseract-ocr \
libtesseract-dev \
tesseract-ocr-por

COPY requirements.txt .

RUN pip install --upgrade -r requirements.txt

ADD rag_utils rag_utils
ADD pages pages
COPY __init__.py .
COPY StartLegal.py .
COPY Revisor_de_Minuta.py .
COPY Escrita_de_Minuta.py .
COPY prompts.json .
COPY utils.py .
COPY chains.py .

EXPOSE 8505

HEALTHCHECK CMD curl --fail http://localhost:8503/_stcore/health

ENTRYPOINT ["streamlit", "run", "StartLegal.py", "--server.port=8505", "--server.address=0.0.0.0"]
Loading