Skip to content

Commit 5147453

Browse files
fix autopep8 errors
1 parent 5b9b0e2 commit 5147453

File tree

2 files changed

+59
-51
lines changed

2 files changed

+59
-51
lines changed
Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1+
from langchain_text_splitters import RecursiveCharacterTextSplitter
2+
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
3+
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
4+
from langchain.prompts import PromptTemplate
5+
from langchain.chains import RetrievalQA
6+
from langchain_chroma import Chroma
7+
from optimum.intel import OVModelForCausalLM
8+
from transformers import AutoTokenizer, pipeline
9+
import openvino as ov
110
import os
211
os.environ['USER_AGENT'] = "SummarizeBot"
312

4-
import openvino as ov
5-
from transformers import AutoTokenizer, pipeline
6-
from optimum.intel import OVModelForCausalLM
7-
from langchain_chroma import Chroma
8-
from langchain.chains import RetrievalQA
9-
from langchain.prompts import PromptTemplate
10-
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
11-
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
12-
from langchain_text_splitters import RecursiveCharacterTextSplitter
1313

1414
def get_device():
1515
core = ov.Core()
@@ -23,11 +23,11 @@ def get_device():
2323
class TextSummarizerEngine:
2424
"""
2525
A class for managing text summarization and Q&A operations using LLMs and vector stores.
26-
26+
2727
This class encapsulates the functionality for loading LLMs, processing documents,
2828
and generating summaries and answers for both web content and PDFs.
2929
"""
30-
30+
3131
# Prompt Templates for Summarization & QA Bot
3232
SUMMARY_TEMPLATE = """Write a concise summary of the following: "{context}" CONCISE SUMMARY: """
3333
QUERY_TEMPLATE = """Use the following pieces of context to answer the question at the end.
@@ -37,41 +37,41 @@ class TextSummarizerEngine:
3737
{context}
3838
Question: {question}
3939
Helpful Answer:"""
40-
40+
4141
# Embedding model name
4242
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
43-
43+
4444
def __init__(self):
4545
"""Initialize the TextSummarizerEngine with empty attributes."""
4646
self.model_id = None
4747
self.model_path = None
4848
self.llm = None
4949
self.tokenizer = None
50-
50+
5151
# Vector store for document processing
5252
self.vectorstore = None
53-
53+
5454
# Initialize embeddings
5555
self.embeddings = HuggingFaceEmbeddings(model_name=self.EMBEDDING_MODEL)
56-
56+
5757
def load_model(self, model_id):
5858
"""
5959
Load and initialize the specified LLM model using OpenVINO optimization.
6060
"""
6161
self.model_id = model_id
62-
62+
6363
if model_id == "Meta LLama 2":
6464
self.model_path = "../models/ov_llama_2"
6565
elif model_id == "Qwen 7B Instruct":
6666
self.model_path = "../models/ov_qwen7b"
6767
else:
6868
raise ValueError(f"Unsupported model ID: {model_id}")
69-
69+
7070
# Load the model with OpenVINO optimization
7171
device = get_device()
7272
model = OVModelForCausalLM.from_pretrained(self.model_path, device=device)
7373
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
74-
74+
7575
# Create a text generation pipeline
7676
pipe = pipeline(
7777
"text-generation",
@@ -80,10 +80,10 @@ def load_model(self, model_id):
8080
max_new_tokens=4096,
8181
device=model.device,
8282
)
83-
83+
8484
# Create a LangChain compatible model
8585
self.llm = HuggingFacePipeline(pipeline=pipe)
86-
86+
8787
def _process_document(self, loader):
8888
"""
8989
Process document content from a loader and create a vector store.
@@ -94,36 +94,36 @@ def _process_document(self, loader):
9494
chunk_size=1000, chunk_overlap=20
9595
)
9696
all_splits = text_splitter.split_documents(page_data)
97-
97+
9898
# Create and return a vector store from the document chunks
9999
vectorstore = Chroma.from_documents(
100-
documents=all_splits,
100+
documents=all_splits,
101101
embedding=self.embeddings
102102
)
103103
return vectorstore
104-
104+
105105
async def process_document(self, source, is_url=True):
106106
"""
107107
Process a document (URL or PDF) to generate a summary of its content.
108108
"""
109109
if not self.llm:
110110
raise ValueError("Model not loaded. Call load_model first.")
111-
111+
112112
# Create the appropriate loader based on the document type
113113
if is_url:
114114
loader = WebBaseLoader(source)
115115
else:
116116
loader = PyPDFLoader(source, extract_images=False)
117-
117+
118118
# Process the document content
119119
self.vectorstore = self._process_document(loader)
120-
120+
121121
# Create a prompt for summarization
122122
prompt = PromptTemplate(
123-
template=self.SUMMARY_TEMPLATE,
123+
template=self.SUMMARY_TEMPLATE,
124124
input_variables=["context"]
125125
)
126-
126+
127127
# Create a retrieval QA chain
128128
qa_chain = RetrievalQA.from_chain_type(
129129
llm=self.llm,
@@ -132,7 +132,7 @@ async def process_document(self, source, is_url=True):
132132
chain_type_kwargs={"prompt": prompt},
133133
return_source_documents=False,
134134
)
135-
135+
136136
# Generate a summary
137137
question = "Please summarize the entire content in one paragraph of 100 words"
138138
summary = qa_chain.invoke(question)["result"]
@@ -142,20 +142,20 @@ async def process_document(self, source, is_url=True):
142142
else:
143143
summary = "No summary found."
144144
return summary
145-
145+
146146
async def answer_question(self, query):
147147
"""
148148
Answer a question about previously processed document content.
149149
"""
150150
if not self.llm or not self.vectorstore:
151151
raise ValueError("Document content not processed or model not loaded.")
152-
152+
153153
# Create a prompt for Q&A
154154
prompt = PromptTemplate(
155-
template=self.QUERY_TEMPLATE,
155+
template=self.QUERY_TEMPLATE,
156156
input_variables=["context", "question"]
157157
)
158-
158+
159159
# Create a retrieval QA chain
160160
reduce_chain = RetrievalQA.from_chain_type(
161161
llm=self.llm,
@@ -164,7 +164,7 @@ async def answer_question(self, query):
164164
chain_type_kwargs={"prompt": prompt},
165165
return_source_documents=False,
166166
)
167-
167+
168168
# Generate an answer
169169
response = reduce_chain.invoke({"query": query})['result']
170170
start_idx = response.find("Helpful Answer:")
@@ -173,7 +173,7 @@ async def answer_question(self, query):
173173
else:
174174
response = "No answer found."
175175
return response
176-
176+
177177
def cleanup(self):
178178
"""Clean up resources when done."""
179179
if self.vectorstore:
@@ -182,12 +182,12 @@ def cleanup(self):
182182
except Exception:
183183
print("Failed to delete vector store collection")
184184

185+
185186
if __name__ == "__main__":
186187
# Example usage
187188
engine = TextSummarizerEngine()
188189
engine.load_model("Qwen 7B Instruct")
189-
190+
190191
# Process a document (URL or PDF)
191192
engine.process_document("https://example.com/document", is_url=True)
192193
engine.cleanup()
193-

Text-Summarizer-Browser-Plugin/backend/server.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,15 @@
1111
# Suppress warnings
1212
warnings.filterwarnings("ignore")
1313

14+
1415
class ModelSelectionRequest(BaseModel):
1516
model_id: Literal["Meta LLama 2", "Qwen 7B Instruct"]
1617

18+
1719
class UrlRequest(BaseModel):
1820
url: HttpUrl
1921

22+
2023
# Create FastAPI app instance
2124
app = FastAPI()
2225
app.add_middleware(
@@ -30,6 +33,8 @@ class UrlRequest(BaseModel):
3033
engine_instance = None
3134

3235
# Startup event to initialize the engine
36+
37+
3338
@app.on_event("startup")
3439
async def startup_event():
3540
"""Initialize the TextSummarizerEngine when the application starts"""
@@ -38,20 +43,23 @@ async def startup_event():
3843
print("TextSummarizerEngine initialized on startup")
3944

4045
# Dependency to get the engine
46+
47+
4148
async def get_engine():
4249
"""Dependency to provide the engine to routes that need it"""
4350
if engine_instance is None:
4451
raise HTTPException(status_code=500, detail="Engine not initialized")
4552
return engine_instance
4653

54+
4755
@app.get("/")
4856
async def root():
4957
return {"message": "The backend server is running. Please use the browser plugin to interact with it."}
5058

5159

5260
@app.post("/select-model")
5361
async def select_model(
54-
req: ModelSelectionRequest,
62+
req: ModelSelectionRequest,
5563
engine: TextSummarizerEngine = Depends(get_engine)
5664
):
5765
"""
@@ -62,7 +70,6 @@ async def select_model(
6270
return {"message": f"Model {req.model_id} loaded successfully."}
6371

6472

65-
6673
# Add a cleanup event to free resources when the server shuts down
6774
@app.on_event("shutdown")
6875
async def shutdown_event():
@@ -84,11 +91,11 @@ async def process_url(
8491
url = str(url_req.url)
8592
if not url:
8693
return JSONResponse(content={"message": "No URL provided"}, status_code=400)
87-
94+
8895
# Make sure the model is loaded
8996
if engine.llm is None:
9097
return JSONResponse(content={"message": "Model not loaded. Please select a model first."}, status_code=400)
91-
98+
9299
# Process the URL and return summary
93100
summary = await engine.process_document(url, is_url=True)
94101
return JSONResponse(content={"message": summary}, status_code=200)
@@ -99,7 +106,7 @@ async def process_url(
99106

100107
@app.post("/upload-pdf")
101108
async def upload_pdf(
102-
pdf_file: UploadFile = File(...),
109+
pdf_file: UploadFile = File(...),
103110
engine: TextSummarizerEngine = Depends(get_engine)
104111
):
105112
"""
@@ -117,17 +124,18 @@ async def upload_pdf(
117124
# Make sure the model is loaded
118125
if engine.llm is None:
119126
return JSONResponse(content={"message": "Model not loaded. Please select a model first."}, status_code=400)
120-
127+
121128
# Process the PDF and return summary
122129
summary = await engine.process_document(temp_pdf_path, is_url=False)
123-
return JSONResponse(content={"message": summary}, status_code=200)
130+
return JSONResponse(content={"message": summary}, status_code=200)
124131

125132
except Exception as e:
126133
return JSONResponse(content={"message": f"Error processing PDF: {str(e)}"}, status_code=500)
127134

128135
else:
129136
return JSONResponse(content={"message": "Invalid file type. Please upload a PDF."}, status_code=400)
130137

138+
131139
@app.post("/query")
132140
async def query(
133141
query: str,
@@ -136,14 +144,14 @@ async def query(
136144
try:
137145
if not query:
138146
return JSONResponse(content={"message": "No query provided"}, status_code=400)
139-
147+
140148
# Make sure the model and vectorstore are ready
141149
if engine.llm is None or engine.vectorstore is None:
142150
return JSONResponse(
143-
content={"message": "No document processed yet or model not loaded"},
151+
content={"message": "No document processed yet or model not loaded"},
144152
status_code=400
145153
)
146-
154+
147155
# Get answer to the query
148156
response_message = await engine.answer_question(query)
149157
return {"message": response_message}
@@ -155,13 +163,13 @@ async def query(
155163
print("""
156164
🎉 FastAPI server is Ready! 🎉
157165
Your application is now live and waiting for interaction!
158-
166+
159167
**🚀 Essential Step: Activate Your Browser Plugin!**
160-
168+
161169
- This application operates through its dedicated browser extension.
162170
- To begin, please open your web browser and locate the plugin's icon, which looks like `T`, in your toolbar (it's often in the top-right corner).
163171
- Click on the `T` icon to access the browser extension
164-
172+
165173
**Having trouble?**
166174
- Is the plugin loaded? If you haven't already, please load it by following the Readme.md file.
167175
- Is it enabled? Double-check your browser's extension settings to ensure the plugin isn't disabled.

0 commit comments

Comments
 (0)