fix autopep8 errors

ankur-singh-intel · ankur-singh-intel · commit 514745366da2 · 2025-06-09T10:49:23.000-07:00
diff --git a/Text-Summarizer-Browser-Plugin/backend/llm_engine.py b/Text-Summarizer-Browser-Plugin/backend/llm_engine.py
@@ -1,15 +1,15 @@
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
+from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
+from langchain.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain_chroma import Chroma
+from optimum.intel import OVModelForCausalLM
+from transformers import AutoTokenizer, pipeline
+import openvino as ov
 import os
 os.environ['USER_AGENT'] = "SummarizeBot"
 
-import openvino as ov
-from transformers import AutoTokenizer, pipeline
-from optimum.intel import OVModelForCausalLM
-from langchain_chroma import Chroma
-from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate
-from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
-from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 def get_device():
     core = ov.Core()
@@ -23,11 +23,11 @@ def get_device():
 class TextSummarizerEngine:
     """
     A class for managing text summarization and Q&A operations using LLMs and vector stores.
-    
+
     This class encapsulates the functionality for loading LLMs, processing documents,
     and generating summaries and answers for both web content and PDFs.
     """
-    
+
     # Prompt Templates for Summarization & QA Bot
     SUMMARY_TEMPLATE = """Write a concise summary of the following: "{context}" CONCISE SUMMARY: """
     QUERY_TEMPLATE = """Use the following pieces of context to answer the question at the end.
@@ -37,41 +37,41 @@ class TextSummarizerEngine:
     {context}
     Question: {question}
     Helpful Answer:"""
-    
+
     # Embedding model name
     EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
-    
+
     def __init__(self):
         """Initialize the TextSummarizerEngine with empty attributes."""
         self.model_id = None
         self.model_path = None
         self.llm = None
         self.tokenizer = None
-        
+
         # Vector store for document processing
         self.vectorstore = None
-        
+
         # Initialize embeddings
         self.embeddings = HuggingFaceEmbeddings(model_name=self.EMBEDDING_MODEL)
-    
+
     def load_model(self, model_id):
         """
         Load and initialize the specified LLM model using OpenVINO optimization.
         """
         self.model_id = model_id
-        
+
         if model_id == "Meta LLama 2":
             self.model_path = "../models/ov_llama_2"
         elif model_id == "Qwen 7B Instruct":
             self.model_path = "../models/ov_qwen7b"
         else:
             raise ValueError(f"Unsupported model ID: {model_id}")
-            
+
         # Load the model with OpenVINO optimization
         device = get_device()
         model = OVModelForCausalLM.from_pretrained(self.model_path, device=device)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
-        
+
         # Create a text generation pipeline
         pipe = pipeline(
             "text-generation",
@@ -80,10 +80,10 @@ def load_model(self, model_id):
             max_new_tokens=4096,
             device=model.device,
         )
-        
+
         # Create a LangChain compatible model
         self.llm = HuggingFacePipeline(pipeline=pipe)
-    
+
     def _process_document(self, loader):
         """
         Process document content from a loader and create a vector store.
@@ -94,36 +94,36 @@ def _process_document(self, loader):
             chunk_size=1000, chunk_overlap=20
         )
         all_splits = text_splitter.split_documents(page_data)
-        
+
         # Create and return a vector store from the document chunks
         vectorstore = Chroma.from_documents(
-            documents=all_splits, 
+            documents=all_splits,
             embedding=self.embeddings
         )
         return vectorstore
-    
+
     async def process_document(self, source, is_url=True):
         """
         Process a document (URL or PDF) to generate a summary of its content.
         """
         if not self.llm:
             raise ValueError("Model not loaded. Call load_model first.")
-        
+
         # Create the appropriate loader based on the document type
         if is_url:
             loader = WebBaseLoader(source)
         else:
             loader = PyPDFLoader(source, extract_images=False)
-            
+
         # Process the document content
         self.vectorstore = self._process_document(loader)
-        
+
         # Create a prompt for summarization
         prompt = PromptTemplate(
-            template=self.SUMMARY_TEMPLATE, 
+            template=self.SUMMARY_TEMPLATE,
             input_variables=["context"]
         )
-        
+
         # Create a retrieval QA chain
         qa_chain = RetrievalQA.from_chain_type(
             llm=self.llm,
@@ -132,7 +132,7 @@ async def process_document(self, source, is_url=True):
             chain_type_kwargs={"prompt": prompt},
             return_source_documents=False,
         )
-        
+
         # Generate a summary
         question = "Please summarize the entire content in one paragraph of 100 words"
         summary = qa_chain.invoke(question)["result"]
@@ -142,20 +142,20 @@ async def process_document(self, source, is_url=True):
         else:
             summary = "No summary found."
         return summary
-    
+
     async def answer_question(self, query):
         """
         Answer a question about previously processed document content.
         """
         if not self.llm or not self.vectorstore:
             raise ValueError("Document content not processed or model not loaded.")
-            
+
         # Create a prompt for Q&A
         prompt = PromptTemplate(
-            template=self.QUERY_TEMPLATE, 
+            template=self.QUERY_TEMPLATE,
             input_variables=["context", "question"]
         )
-        
+
         # Create a retrieval QA chain
         reduce_chain = RetrievalQA.from_chain_type(
             llm=self.llm,
@@ -164,7 +164,7 @@ async def answer_question(self, query):
             chain_type_kwargs={"prompt": prompt},
             return_source_documents=False,
         )
-        
+
         # Generate an answer
         response = reduce_chain.invoke({"query": query})['result']
         start_idx = response.find("Helpful Answer:")
@@ -173,7 +173,7 @@ async def answer_question(self, query):
         else:
             response = "No answer found."
         return response
-        
+
     def cleanup(self):
         """Clean up resources when done."""
         if self.vectorstore:
@@ -182,12 +182,12 @@ def cleanup(self):
             except Exception:
                 print("Failed to delete vector store collection")
 
+
 if __name__ == "__main__":
     # Example usage
     engine = TextSummarizerEngine()
     engine.load_model("Qwen 7B Instruct")
-    
+
     # Process a document (URL or PDF)
     engine.process_document("https://example.com/document", is_url=True)
     engine.cleanup()
-                
diff --git a/Text-Summarizer-Browser-Plugin/backend/server.py b/Text-Summarizer-Browser-Plugin/backend/server.py
@@ -11,12 +11,15 @@
 # Suppress warnings
 warnings.filterwarnings("ignore")
 
+
 class ModelSelectionRequest(BaseModel):
     model_id: Literal["Meta LLama 2", "Qwen 7B Instruct"]
 
+
 class UrlRequest(BaseModel):
     url: HttpUrl
 
+
 # Create FastAPI app instance
 app = FastAPI()
 app.add_middleware(
@@ -30,6 +33,8 @@ class UrlRequest(BaseModel):
 engine_instance = None
 
 # Startup event to initialize the engine
+
+
 @app.on_event("startup")
 async def startup_event():
     """Initialize the TextSummarizerEngine when the application starts"""
@@ -38,20 +43,23 @@ async def startup_event():
     print("TextSummarizerEngine initialized on startup")
 
 # Dependency to get the engine
+
+
 async def get_engine():
     """Dependency to provide the engine to routes that need it"""
     if engine_instance is None:
         raise HTTPException(status_code=500, detail="Engine not initialized")
     return engine_instance
 
+
 @app.get("/")
 async def root():
     return {"message": "The backend server is running. Please use the browser plugin to interact with it."}
 
 
 @app.post("/select-model")
 async def select_model(
-    req: ModelSelectionRequest, 
+    req: ModelSelectionRequest,
     engine: TextSummarizerEngine = Depends(get_engine)
 ):
     """
@@ -62,7 +70,6 @@ async def select_model(
     return {"message": f"Model {req.model_id} loaded successfully."}
 
 
-
 # Add a cleanup event to free resources when the server shuts down
 @app.on_event("shutdown")
 async def shutdown_event():
@@ -84,11 +91,11 @@ async def process_url(
         url = str(url_req.url)
         if not url:
             return JSONResponse(content={"message": "No URL provided"}, status_code=400)
-        
+
         # Make sure the model is loaded
         if engine.llm is None:
             return JSONResponse(content={"message": "Model not loaded. Please select a model first."}, status_code=400)
-            
+
         # Process the URL and return summary
         summary = await engine.process_document(url, is_url=True)
         return JSONResponse(content={"message": summary}, status_code=200)
@@ -99,7 +106,7 @@ async def process_url(
 
 @app.post("/upload-pdf")
 async def upload_pdf(
-    pdf_file: UploadFile = File(...), 
+    pdf_file: UploadFile = File(...),
     engine: TextSummarizerEngine = Depends(get_engine)
 ):
     """
@@ -117,17 +124,18 @@ async def upload_pdf(
             # Make sure the model is loaded
             if engine.llm is None:
                 return JSONResponse(content={"message": "Model not loaded. Please select a model first."}, status_code=400)
-                
+
             # Process the PDF and return summary
             summary = await engine.process_document(temp_pdf_path, is_url=False)
-            return JSONResponse(content={"message": summary}, status_code=200) 
+            return JSONResponse(content={"message": summary}, status_code=200)
 
         except Exception as e:
             return JSONResponse(content={"message": f"Error processing PDF: {str(e)}"}, status_code=500)
 
     else:
         return JSONResponse(content={"message": "Invalid file type. Please upload a PDF."}, status_code=400)
 
+
 @app.post("/query")
 async def query(
     query: str,
@@ -136,14 +144,14 @@ async def query(
     try:
         if not query:
             return JSONResponse(content={"message": "No query provided"}, status_code=400)
-            
+
         # Make sure the model and vectorstore are ready
         if engine.llm is None or engine.vectorstore is None:
             return JSONResponse(
-                content={"message": "No document processed yet or model not loaded"}, 
+                content={"message": "No document processed yet or model not loaded"},
                 status_code=400
             )
-            
+
         # Get answer to the query
         response_message = await engine.answer_question(query)
         return {"message": response_message}
@@ -155,13 +163,13 @@ async def query(
     print("""
     🎉 FastAPI server is Ready! 🎉
     Your application is now live and waiting for interaction!
-    
+
     **🚀 Essential Step: Activate Your Browser Plugin!**
-    
+
     - This application operates through its dedicated browser extension.
     - To begin, please open your web browser and locate the plugin's icon, which looks like `T`, in your toolbar (it's often in the top-right corner).
     - Click on the `T` icon to access the browser extension
-    
+
     **Having trouble?**
     - Is the plugin loaded? If you haven't already, please load it by following the Readme.md file.
     - Is it enabled? Double-check your browser's extension settings to ensure the plugin isn't disabled.