import warnings
warnings.filterwarnings('ignore')
import os
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
# pydantic libabray for defining the expected input and output
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
load_dotenv(override=True)

# Load API keys
#Provide your credentials and API keys in a `.env` file to authenticate services
#(OpenAI API key, email credentials, Tavily API key).
os.environ['OPENAI_API_KEY'] = os.environ.get("OPENAI_API_KEY")
os.environ["TAVILY_API_KEY"] = os.environ.get("TAVILY_API_KEY")
os.environ["EMAIL_API_KEY"] = os.environ.get("EMAIL_API_KEY")

USER_AGENT environment variable not set, consider setting it to identify your requests.

# Define source URLs to load articles from
source_links = [
    "https://www.geeksforgeeks.org/region-proposal-network-rpn-in-object-detection/",
    "https://www.geeksforgeeks.org/machine-learning/faster-r-cnn-ml/",
    "https://www.geeksforgeeks.org/what-is-ordinal-data/",
    "https://www.geeksforgeeks.org/introduction-convolution-neural-network/",
    "https://d2l.ai/chapter_computer-vision/"
]

# Load documents from the URLs
raw_documents = [WebBaseLoader(link).load() for link in source_links]
flattened_docs = [doc for group in raw_documents for doc in group]

# Initialize a text splitter for chunking the documents
chunker = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=100
)

chunked_documents = chunker.split_documents(flattened_docs)

# Create a vector store with embeddings
doc_vectorstore = Chroma.from_documents(
    documents=chunked_documents,
    collection_name="cnn-rpn-knowledge-base",
    embedding=OpenAIEmbeddings(),
)

# Create a retriever from the vector store
doc_retriever = doc_vectorstore.as_retriever()

query = "What is Intersection-Over-Union?"
results = doc_retriever.get_relevant_documents(query)

for result in results:
    print(result.page_content[:100], '...', result.metadata['source'])

C:\Users\mrezv\AppData\Local\Temp\ipykernel_17460\1528654845.py:2: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 1.0. Use :meth:`~invoke` instead.
  results = doc_retriever.get_relevant_documents(query)

Intersection-Over-Union (IoU)The foreground and background labels are assigned based on a metric cal ... https://www.geeksforgeeks.org/region-proposal-network-rpn-in-object-detection/
14.4. Anchor Boxes
14.4.1. Generating Multiple Anchor Boxes
14.4.2. Intersection over Union (IoU)
14 ... https://d2l.ai/chapter_computer-vision/
training allows RPN and the detection network to share features.3. Region of Interest(RoI) PoolingRe ... https://www.geeksforgeeks.org/machine-learning/faster-r-cnn-ml/
14.1. Image Augmentation
14.1.1. Common Image Augmentation Methods
14.1.2. Training with Image Augme ... https://d2l.ai/chapter_computer-vision/

# Custom schema for grading document relevance
class RelevanceScore(BaseModel):
    """Binary relevance indicator for retrieved content."""

    relevance: str = Field(
        description="Return strictly 'Yes' if the document contains the information needed to answer the question directly. Otherwise return 'No'."
    )
    justification: str = Field(
        description="A short explanation (one sentence) why you said Yes or No."
    )

# LLM setup with structured output schema
grader_model = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Bind LLM with the output schema
structured_grader = grader_model.with_structured_output(RelevanceScore)

# Prompt definition
grading_instruction = """Return 'Yes' or 'No' to indicate if the document is relevant to the given question."""

grading_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", grading_instruction),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

# Composing the grader pipeline
document_grader = grading_prompt | structured_grader

# Iterate over retrieved documents and evaluate relevance
print(f'Question: {query}\n')
for result in results:
    content = result.page_content
    evaluation = document_grader.invoke({"document": content, "question": query})
    source = result.metadata['source']
    print(f'{evaluation}\ndocument: {content[:100]}\nsource: {source}')
    print('--------------------------------------------')

Question: What is Intersection-Over-Union?

relevance='Yes' justification='The document provides a clear definition of Intersection-Over-Union (IoU) and explains how it is calculated.'
document: Intersection-Over-Union (IoU)The foreground and background labels are assigned based on a metric cal
source: https://www.geeksforgeeks.org/region-proposal-network-rpn-in-object-detection/
--------------------------------------------
relevance='Yes' justification="The document includes a section titled 'Intersection over Union (IoU)', which suggests it contains information about the concept."
document: 14.4. Anchor Boxes
14.4.1. Generating Multiple Anchor Boxes
14.4.2. Intersection over Union (IoU)
14
source: https://d2l.ai/chapter_computer-vision/
--------------------------------------------
relevance='No' justification='The document does not mention Intersection-Over-Union or provide any information related to it.'
document: training allows RPN and the detection network to share features.3. Region of Interest(RoI) PoolingRe
source: https://www.geeksforgeeks.org/machine-learning/faster-r-cnn-ml/
--------------------------------------------
relevance='Yes' justification="The document includes a section on 'Intersection over Union (IoU)' which directly addresses the user's question."
document: 14.1. Image Augmentation
14.1.1. Common Image Augmentation Methods
14.1.2. Training with Image Augme
source: https://d2l.ai/chapter_computer-vision/
--------------------------------------------

# Load a prebuilt RAG prompt template from the LangChain hub
from langchain import hub

rag_prompt_template = hub.pull("rlm/rag-prompt")

# Display the type and content of each message in the prompt template
for msg in rag_prompt_template.messages:
    print(type(msg))
    print(msg.prompt.template)
    print('--------------------------------------------')

<class 'langchain_core.prompts.chat.HumanMessagePromptTemplate'>
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:
--------------------------------------------

from langchain_core.output_parsers import StrOutputParser

# Initialize the language model
chat_model = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# Helper function to format retrieved documents
def join_documents(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Construct the RAG pipeline
# Note: StrOutputParser() ensures clean string output from the LLM
rag_pipeline = rag_prompt_template | chat_model | StrOutputParser()

# Execute the chain with context and question
response = rag_pipeline.invoke({
    "context": join_documents(results),
    "question": query
})

print(response)

Intersection-Over-Union (IoU) is a metric used to measure the overlap between an anchor box and the object of interest. It is calculated as the ratio of the area of intersection to the area of the union of the two boxes. An IoU greater than 0.7 typically indicates that the area is classified as foreground.

# Using a different LLM model to demonstrate flexibility in multi-model usage
search_optimizer_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Prompt template for rewriting questions to be more effective for web search
system_instruction = """You are a query rewriter that improves an input question for optimal web search results. 
Analyze the question and identify its underlying semantic intent or meaning."""

query_rewrite_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_instruction),
        (
            "human",
            "Here is the original query: \n\n {question} \n Please rewrite it to be more effective.",
        ),
    ]
)

# Construct the pipeline: prompt -> LLM -> output parser
query_optimizer = query_rewrite_prompt | search_optimizer_llm | StrOutputParser()

# Execute with a sample question
print(f"Original question: {query}")
optimized_query = query_optimizer.invoke({"question": query})
print(f"Optimized question: {optimized_query}")

Original question: What is Intersection-Over-Union?
Optimized question: What is the concept of Intersection-Over-Union (IoU) in image processing and machine learning?

import json

template_reason = '''
You are evaluating whether an assistant’s response fully and correctly answers all parts of a user’s question.

Your task:
- Provide a concise **reason** explaining whether the response answers all questions.
- Assign a **score** from 1 to 10 based on completeness.

### User Question
{query}

### Assistant's Response
{llm_output}

### Output Format (JSON)
{{"reason": "...", "score": ...}}

'''

template_prompt = ChatPromptTemplate.from_template(template_reason)
# message template
messages = template_prompt.format_messages(query=query, 
                                           llm_output=response)
respose = chat_model.invoke(messages).content
json.loads(respose)

{'reason': "The response provides a clear definition of Intersection-Over-Union (IoU) and explains how it is calculated, but it lacks details about its applications and significance in various fields such as computer vision. Therefore, it does not fully address the user's question in a comprehensive manner.",
 'score': 7}

from typing import List
from typing_extensions import TypedDict
from typing import List, Any

# Define a structured dictionary to track the workflow state of the graph process
class PipelineState(TypedDict):
    """
    A dictionary-style representation of the current state in the graph-based pipeline.

    Attributes:
        user_query (str): The original or reformulated question.
        llm_output (str): The response generated by the language model.
        retrieved_docs (List[str]): A collection of documents retrieved based on the query.
        refine_query_count (int): Number of times the query has been rephrased.
        web_search_count (int): Number of web search.
        enable_web_search (str): Placeholder indicating if a web search should be triggered
                                 (functionality not implemented in this version).
    """

    query: str
    doc_retriever: Any
    llm_output: str
    retrieved_docs: List[str]
    refine_query_count: int
    web_search_count:int
    enable_web_search: str

from langchain.schema import Document
import uuid
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter


def initialize_state(query_state):
    """
    Initializes values in the query state.

    Args:
        query_state (dict): The current state of the query workflow.

    Returns:
        dict: State dictionary with initialized refine_query_count and web_search_count.
    """
    print("---INITIALIZE QUERY STATE---")
    return {"refine_query_count": 0, 
            "web_search_count": 0,
            "doc_retriever": query_state["doc_retriever"],
           }

def fetch_documents(query_state):
    """
    Retrieves relevant documents for the given query.

    Args:
        query_state (dict): The current state of the query workflow.

    Returns:
        dict: An updated state dictionary with a new key 'retrieved_docs'.
    """
    print("---FETCH DOCUMENTS---")

    query = query_state["query"]

    # Perform document retrieval
    doc_retriever = query_state["doc_retriever"]  
    relevant_docs = doc_retriever.get_relevant_documents(query)
    return {"retrieved_docs": relevant_docs}

def get_retriever(source_links):
    """
    Args:
        source_links: list of websites to get documents

    Returns:
        doc_retriever: vectore score
    """
    # Load documents from the URLs
    raw_documents = [WebBaseLoader(link).load() for link in source_links]
    flattened_docs = [doc for group in raw_documents for doc in group]
    
    # Initialize a text splitter for chunking the documents
    chunker = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=250, chunk_overlap=100
    )
    
    chunked_documents = chunker.split_documents(flattened_docs)
    collection_id = f"collection-{uuid.uuid4()}"
    
    # Create a vector store with embeddings
    doc_vectorstore = Chroma.from_documents(
        documents=chunked_documents,
        collection_name=collection_id,
        embedding=OpenAIEmbeddings(),
        persist_directory=None, # <- in-memory, no history
    )
    
    # Create a retriever from the vector store
    doc_retriever = doc_vectorstore.as_retriever()
    return doc_retriever

def filter_relevant_documents(query_state):
    """
    Evaluates and filters retrieved documents for relevance to the query.

    Args:
        query_state (dict): The current state of the query workflow.

    Returns:
        dict: Updated state with only relevant documents and web search indicator.
    """
    print("---EVALUATE DOCUMENT RELEVANCE---")
    query = query_state["query"]
    documents = query_state["retrieved_docs"]

    filtered_results = []
    should_perform_web_search = "No"

    for doc in documents:
        score = document_grader.invoke({
            "question": query,
            "document": doc.page_content
        })
        is_relevant = score.relevance
        print(doc.metadata.get('source', 'Unknown'), f'Score: {is_relevant}')

        if is_relevant == "Yes":
            print("---DOCUMENT IS RELEVANT---")
            filtered_results.append(doc)

    if not filtered_results:
        print("---NO RELEVANT DOCUMENTS FOUND---")
        should_perform_web_search = "Yes"

    return {
        "retrieved_docs": filtered_results,
        "perform_web_search": should_perform_web_search
    }


def refine_query(query_state):
    """
    Rewrites the input query to improve clarity and search effectiveness.

    Args:
        query_state (dict): The current state of the query workflow.

    Returns:
        dict: Updated state with a refined query and incremented transformation count.
    """
    print("---REFINE QUERY---")

    query = query_state["query"]
    refine_query_count = query_state["refine_query_count"] + 1

    # Rewrite the query using a question rewriter
    improved_query = query_optimizer.invoke({"question": query})

    print("---IMPROVED QUERY---")
    print(improved_query)

    return {
        "query": improved_query,
        "refine_query_count": refine_query_count}


def generate_response(query_state):
    """
    Generates a response using RAG (Retrieval-Augmented Generation).

    Args:
        query_state (dict): The current state of the query workflow.

    Returns:
        dict: An updated state dictionary with a new key 'llm_output'.
    """
    print("---GENERATE RESPONSE---")

    query = query_state["query"]
    documents = query_state["retrieved_docs"]

    # Generate an answer using RAG
    response = rag_pipeline.invoke({
        "context": join_documents(documents),
        "question": query
    })

    return {"llm_output": response}

from langchain.schema.document import Document
import json

def evaluate_response(query_state):
    """
    Args:
        query_state: The current query state containing the search question.

    Returns:
        int: score (from 1 to 10) for the agent response
    """
    #
    template_reason = '''
    You are evaluating whether an assistant’s response fully and correctly answers all parts of a user’s question.
    
    Your task:
    - Provide a concise **reason** explaining whether the response answers all questions.
    - Assign a **score** from 1 to 10 based on completeness.
    
    ### User Question
    {query}
    
    ### Assistant's Response
    {llm_output}
    
    ### Output Format (JSON)
    {{"reason": "...", "score": ...}}
    
    '''
    
    print(
        "---ASSESSING RESPONSE: PREDICT SCORE FOR LLM RESPONSE---"
    )  
    query = query_state["query"]
    generate_response = query_state["llm_output"]
    template_prompt = ChatPromptTemplate.from_template(template_reason)
    # message template
    messages = template_prompt.format_messages(query=query, 
                                               llm_output=generate_response)
    respose = chat_model.invoke(messages).content
    

    return {
        "response_score": json.loads(respose)['score'],
        "llm_output": generate_response
    }

from langchain.utilities.tavily_search import TavilySearchAPIWrapper
def web_search(query_state):
    """
    Executes a web search using the given query and returns a list of document objects.

    Args:
        query_state: The current query state containing the search question.

    Returns:
        Dict[str, Any]: Updated state including the original question and retrieved documents.
    """
    query = query_state["query"]
    web_search_count = query_state["web_search_count"] + 1
    #
    search_client = TavilySearchAPIWrapper()
    search_results = search_client.results(query=query, max_results=3)

    retrieved_docs = [
        Document(page_content=item["content"], metadata={"source": item["url"]})
        for item in search_results
    ]
    print(
        "---WEB SEARCH: RETRIEVED DOCS FROM WEB SEARCH---"
    ) 
    
    return {
        "query": query,
        "retrieved_docs": retrieved_docs,
        "web_search_count": web_search_count
    }


def decide_next_action(query_state):
    """
    Determines the next step in the workflow: whether to generate an answer
    or rephrase the query again for better document retrieval.

    Args:
        query_state (dict): The current state of the query process.

    Returns:
        str: The next action to take - either 'if_generate' or 'if_transform_query'.
    """

    print("---ASSESSING DOCUMENT RELEVANCE---")
    requires_web_search = query_state["perform_web_search"]

    if requires_web_search == "Yes":
        # If the query has already been transformed multiple times with no success,
        # proceed to generate an answer anyway.
        if query_state["refine_query_count"] >= 3:
            print(
                "---DECISION: MAX REWRITES REACHED AND NO RELEVANT DOCUMENTS FOUND → LETS APPLY WEB SEARCH---"
            )        

            return "apply_web_search"

        # Still below the rewrite threshold; attempt another reformulation.
        print(
            "---DECISION: NO RELEVANT DOCUMENTS FOUND YET → TRANSFORM QUERY AGAIN---"
        )
        return "apply_transform_query"

    else:
        # Relevant documents are present; move on to answer generation.
        print("---DECISION: RELEVANT DOCUMENTS FOUND → GENERATE---")
        return "apply_generate"
    
    
def decide_to_end(query_state):
    """
    Determines to end the workflow: 
    Args:
        query_state (dict): The current state of the query process.

    Returns:

    """

    print("---ASSESSING to END the WORKFLOW OR NOT ---")
    response_score_value = query_state["response_score"]

    if response_score_value >= 6:
        # If the score is bigger than 6, we can end the work. As we do not have
        # ground truth, score above 6 is high enough

        print(
            "---DECISION: THE SCORE IS REASONABLE → END---"
        )

        return "apply_end"

    elif query_state["web_search_count"]<=2:
        # Relevant documents are present; move on to answer generation.
        print("---DECISION: THE SCORE IS LOW → LETS APPLY WEB SEARCH (again)---")
        return "apply_web_search" 
    
    else :
        # Relevant documents are present; move on to answer generation.
        print("---DECISION: THE SCORE IS LOW APPLY MULTIPPLE WEB SEARCH CANNOT FIND AN ANSWER → END---")
        return "apply_end"

from langgraph.graph import START, END, StateGraph

# Create a new stateful graph using the defined PipelineState structure
pipeline_graph = StateGraph(PipelineState)

# Register nodes
pipeline_graph.add_node("initialize_state", initialize_state)
pipeline_graph.add_node("fetch_documents", fetch_documents)
pipeline_graph.add_node("filter_relevant_documents", filter_relevant_documents)
pipeline_graph.add_node("generate_response", generate_response)
pipeline_graph.add_node("evaluate_response", evaluate_response)
pipeline_graph.add_node("refine_query", refine_query)
pipeline_graph.add_node("web_search", web_search)

# --- GRAPH LOGIC ---

# Start → Initialize
pipeline_graph.add_edge(START, "initialize_state")

# Initialize → Fetch Documents
pipeline_graph.add_edge("initialize_state", "fetch_documents")

# Fetch → Filter Relevant
pipeline_graph.add_edge("fetch_documents", "filter_relevant_documents")

# Filter → Branch
pipeline_graph.add_conditional_edges(
    "filter_relevant_documents",
    decide_next_action,
    {
        "apply_transform_query": "refine_query",
        "apply_web_search": "web_search",
        "apply_generate": "generate_response",
    }
)

# If refine_query → go back to fetch
pipeline_graph.add_edge("refine_query", "fetch_documents")

# If web_search → ALWAYS → generate_response
pipeline_graph.add_edge("web_search", "generate_response")

# Normal generation → evaluation
pipeline_graph.add_edge("generate_response", "evaluate_response")

# Evaluate → choose end or web_search retry
pipeline_graph.add_conditional_edges(
    "evaluate_response",
    decide_to_end,
    {
        "apply_end": END,
        "apply_web_search": "web_search",
    }
)

# Compile graph
retrieval_qa_pipeline = pipeline_graph.compile()
retrieval_qa_pipeline

source_links = [
    "https://www.geeksforgeeks.org/region-proposal-network-rpn-in-object-detection/",
    "https://www.geeksforgeeks.org/machine-learning/faster-r-cnn-ml/",
    "https://www.geeksforgeeks.org/what-is-ordinal-data/",
    "https://www.geeksforgeeks.org/introduction-convolution-neural-network/",
    "https://d2l.ai/chapter_computer-vision/"
]

doc_retriever = get_retriever(source_links)

inputs = {
    "query": "What is feature map?",
    "doc_retriever": doc_retriever
} 
    
for output in retrieval_qa_pipeline.stream(inputs):
    for key, value in output.items():
        # Node
        print(f'----------Node "{key}" Completed-------------')
        print("\n")
# Final generation
print(value["llm_output"])

---INITIALIZE QUERY STATE---
----------Node "initialize_state" Completed-------------


---FETCH DOCUMENTS---
----------Node "fetch_documents" Completed-------------


---EVALUATE DOCUMENT RELEVANCE---
https://www.geeksforgeeks.org/region-proposal-network-rpn-in-object-detection/ Score: No
https://www.geeksforgeeks.org/introduction-convolution-neural-network/ Score: Yes
---DOCUMENT IS RELEVANT---
https://www.geeksforgeeks.org/introduction-convolution-neural-network/ Score: Yes
---DOCUMENT IS RELEVANT---
https://www.geeksforgeeks.org/machine-learning/faster-r-cnn-ml/ Score: No
---ASSESSING DOCUMENT RELEVANCE---
---DECISION: RELEVANT DOCUMENTS FOUND → GENERATE---
----------Node "filter_relevant_documents" Completed-------------


---GENERATE RESPONSE---
----------Node "generate_response" Completed-------------


---ASSESSING RESPONSE: PREDICT SCORE FOR LLM RESPONSE---
---ASSESSING to END the WORKFLOW OR NOT ---
---DECISION: THE SCORE IS REASONABLE → END---
----------Node "evaluate_response" Completed-------------


A feature map is the output generated by applying a set of learnable filters (kernels) to an input image in a convolutional layer. It represents the detected features of the input image, with dimensions determined by the number of filters used. For example, using 12 filters on a 32 x 32 input image results in a feature map of dimension 32 x 32 x 12.

source_links = [
    "https://www.ratehub.ca/"
]

doc_retriever = get_retriever(source_links)

inputs = {
    "query": "What is the best mortgage rate for 5‑year fixed in Calgary, also give name of the bank",
    "doc_retriever": doc_retriever
} 
for output in retrieval_qa_pipeline.stream(inputs):
    for key, value in output.items():
        # Node
        print(f'----------Node "{key}" Completed-------------')
        print("\n")
# Final generation
print(value["llm_output"])

---INITIALIZE QUERY STATE---
----------Node "initialize_state" Completed-------------


---FETCH DOCUMENTS---
----------Node "fetch_documents" Completed-------------


---EVALUATE DOCUMENT RELEVANCE---
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
---NO RELEVANT DOCUMENTS FOUND---
---ASSESSING DOCUMENT RELEVANCE---
---DECISION: NO RELEVANT DOCUMENTS FOUND YET → TRANSFORM QUERY AGAIN---
----------Node "filter_relevant_documents" Completed-------------


---REFINE QUERY---
---IMPROVED QUERY---
What are the current best mortgage rates for a 5-year fixed term in Calgary, and which banks offer them?
----------Node "refine_query" Completed-------------


---FETCH DOCUMENTS---
----------Node "fetch_documents" Completed-------------


---EVALUATE DOCUMENT RELEVANCE---
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
---NO RELEVANT DOCUMENTS FOUND---
---ASSESSING DOCUMENT RELEVANCE---
---DECISION: NO RELEVANT DOCUMENTS FOUND YET → TRANSFORM QUERY AGAIN---
----------Node "filter_relevant_documents" Completed-------------


---REFINE QUERY---
---IMPROVED QUERY---
What are the best current mortgage rates for a 5-year fixed term in Calgary, and which banks provide these rates?
----------Node "refine_query" Completed-------------


---FETCH DOCUMENTS---
----------Node "fetch_documents" Completed-------------


---EVALUATE DOCUMENT RELEVANCE---
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
---NO RELEVANT DOCUMENTS FOUND---
---ASSESSING DOCUMENT RELEVANCE---
---DECISION: NO RELEVANT DOCUMENTS FOUND YET → TRANSFORM QUERY AGAIN---
----------Node "filter_relevant_documents" Completed-------------


---REFINE QUERY---
---IMPROVED QUERY---
What are the current best mortgage rates for a 5-year fixed term in Calgary, and which banks offer these rates?
----------Node "refine_query" Completed-------------


---FETCH DOCUMENTS---
----------Node "fetch_documents" Completed-------------


---EVALUATE DOCUMENT RELEVANCE---
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
https://www.ratehub.ca/ Score: No
---NO RELEVANT DOCUMENTS FOUND---
---ASSESSING DOCUMENT RELEVANCE---
---DECISION: MAX REWRITES REACHED AND NO RELEVANT DOCUMENTS FOUND → LETS APPLY WEB SEARCH---
----------Node "filter_relevant_documents" Completed-------------


---WEB SEARCH: RETRIEVED DOCS FROM WEB SEARCH---
----------Node "web_search" Completed-------------


---GENERATE RESPONSE---
----------Node "generate_response" Completed-------------


---ASSESSING RESPONSE: PREDICT SCORE FOR LLM RESPONSE---
---ASSESSING to END the WORKFLOW OR NOT ---
---DECISION: THE SCORE IS REASONABLE → END---
----------Node "evaluate_response" Completed-------------


The current best mortgage rates for a 5-year fixed term in Calgary are 3.94% offered by a Big 6 Bank and 3.99% offered by Meridian Credit Union and a Canadian Lender. Other competitive rates include 4.09% from Simplii Financial and 4.14% from Alterna Savings. These rates may vary, so it's advisable to check with the banks for the most accurate information.

# Visualize our graph
from IPython.display import Image, display
try:
    display(Image(retrieval_qa_pipeline.get_graph().draw_mermaid_png()))
except Exception:
    pass

# if the above fails try this (requires grandalf)
print(retrieval_qa_pipeline.get_graph().draw_ascii())

                                                    +-----------+                                                
                                                    | __start__ |                                                
                                                    +-----------+                                                
                                                           *                                                     
                                                           *                                                     
                                                           *                                                     
                                                 +------------------+                                            
                                                 | initialize_state |                                            
                                                 +------------------+                                            
                                                           *                                                     
                                                           *                                                     
                                                           *                                                     
                                                  +-----------------+                                            
                                                  | fetch_documents |*                                           
                                                  +-----------------+ ********                                   
                                                 ***                          ********                           
                                              ***                                     *******                    
                                            **                                               ********            
                           +---------------------------+                                             ****        
                           | filter_relevant_documents |..                                              *        
                           +---------------------------+  ...........                                   *        
                                 ...                   .....         ...........                        *        
                               ..                           .....               ...........             *        
                             ..                                  .....                     ......       *        
                +-------------------+                                 ...                       +--------------+ 
                | generate_response |                                   .                       | refine_query | 
                +-------------------+                                   .                       +--------------+ 
                 ***             ***                                    .                                        
               **                   ***                                 .                                        
             **                        **                               .                                        
+-------------------+                    ***                           ..                                        
| evaluate_response |..                     **                       ..                                          
+-------------------+  .......                ***                  ..                                            
          .                   .........          **             ...                                              
          .                            .......     **         ..                                                 
          .                                   .....  **     ..                                                   
     +---------+                                  +------------+                                                 
     | __end__ |                                  | web_search |                                                 
     +---------+                                  +------------+

from datetime import date

today = date.today()
formatted_date = today.strftime("%B %Y") 

rag_inputs = [
    {
        "question": f"What current Nissan promotions are available in Alberta as of {formatted_date}?",
        "links": ["https://www.stadiumnissan.com/our-promotions.html"]
    },
    {
        "question": f"What Honda vehicle offers are available in Alberta as of {formatted_date}?",
        "links": ["https://www.honda.ca/special-offers/alberta"]
    },
    {
        "question": f"What are Toyota’s latest special deals in Alberta as of {formatted_date}?",
        "links": ["https://www.shoptoyota.ca/alberta/en"]
    },
    {
        "question": f"What Mazda offers are available in Alberta as of {formatted_date}?",
        "links": ["https://albertamazdaoffers.ca/"]
    },
    {
        "question": f"What are Mercedes-Benz’s current special offers in Alberta as of {formatted_date}?",
        "links": ["https://www.mercedes-benz-countryhills.ca/en/special-offers"]
    }
]

rag_outputs = []
for inp in rag_inputs:
    doc_retriever = get_retriever(inp["links"])
    inputs = {
        "query": inp["question"],
        "doc_retriever": doc_retriever
    }
    for output in retrieval_qa_pipeline.stream(inputs):
        for key, value in output.items():
            # Node
            print(f'----------Node "{key}" Completed-------------')
            print("\n")
    # Final generation
    rag_outputs.append(value["llm_output"])

from datetime import date

today = date.today()

def aggregate_answers(llm, rag_outputs):
    aggregation_prompt = f"""
    You are an expert synthesizer.
    
    Combine the following question results achieved from RAG into a clear and cohesive final summary separate them. Start
    with date of today {today}
    
    RAG Results:
    {"\n\n".join(
        f"RAG #{i+1}\nQuestion: {item_1['question']}\nAnswer:\n{item_2}"
        for i, (item_1, item_2) in enumerate(zip(rag_inputs, rag_outputs))
    )}
    """

    final = llm.invoke(aggregation_prompt)
    return final

final_answer = aggregate_answers(chat_model, rag_outputs)

from rich.console import Console
from rich.markdown import Markdown

console = Console()
console.print(Markdown(final_answer.content))

Date: December 11, 2025                                                                                            

Nissan Promotions in Alberta: For December 2025, Nissan is offering a $500 Nissan Bonus and a 0.5% Loyalty Rate    
Reduction for qualifying Nissan owners on the 2025 KICKS Play S. Additionally, there is a $2,500 Cash Purchase     
Bonus available for new and previously unregistered 2025 Sentra models purchased with cash from December 2, 2025,  
to January 2, 2026. These offers are available through Nissan Canada Finance, subject to approved credit.          

Honda Vehicle Offers in Alberta: In December 2025, Honda's lineup for sale in Alberta includes the 2025 Honda Civic
(various trims), Honda CR-V, Honda Odyssey, Honda Ridgeline, and Honda Passport. The 2025 Honda HR-V and Honda     
Accord may also be available, with specific trims and configurations varying.                                      

Toyota Special Deals in Alberta: As of December 2025, Toyota is offering savings of $3,299 on select 2026 Prius    
Plug-Ins and promotions on new 2026 bZ models. New 2026 Tundra models are also arriving, along with Toyota loyalty 
offers.                                                                                                            

Mazda Offers in Alberta: In December 2025, Mazda has promotions on the 2025 Mazda3, CX-5, CX-30, CX-90 PHEV, CX-70 
PHEV, CX-70 MHEV, and CX-90 MHEV models. A bonus of up to $4,000 is available for new and previously unregistered  
vehicles purchased or financed/leased during this period. For more details, it's recommended to check mazda.ca or  
contact a local Mazda dealer.                                                                                      

Mercedes-Benz Special Offers in Alberta: The December 2025 special offers for Mercedes-Benz vehicles in Alberta    
include cash credits on in-stock models like the 2025 AMG® GT55 and interest rate offers on new, unregistered 2025 
C 300 Sedans for a 48-month lease term. These offers are available for a limited time and may be combined, but are 
subject to change or cancellation without notice. For complete details, contacting an authorized Mercedes-Benz     
dealer or the Mercedes-Benz Customer Relations Centre is advised.

# Put the App Password in `.env` 

APP_PASSWORD = os.getenv("APP_PASSWORD")

import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import markdown

def send_email_report(subject, markdown_text, to_emails):
    msg = MIMEMultipart("alternative")
    msg["From"] = "mrezvandehy@gmail.com"
    msg["To"] = ", ".join(to_emails)
    msg["Subject"] = subject

    # Convert markdown → HTML
    html_text = markdown.markdown(markdown_text)

    # Attach both plain and HTML versions
    msg.attach(MIMEText(markdown_text, "plain"))   # fallback
    msg.attach(MIMEText(html_text, "html"))        # formatted version

    # Send via Gmail
    with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
        server.login("mrezvandehy@gmail.com", APP_PASSWORD)
        server.send_message(msg)

    print(f"✅ Email sent to: {', '.join(to_emails)}")

    
markdown_report = final_answer.content

send_email_report(
    subject="🚗 Alberta Car Promotions Update",
    markdown_text=markdown_report,
    to_emails=["mrezvandehy@gmail.com"]
)

✅ Email sent to: mrezvandehy@gmail.com

name: Send Email Report

on:
  push:
  schedule:
    - cron: "0 14 * * *"   # Runs every day at 14:00 UTC
  workflow_dispatch:        # allows manual runs

jobs:
  send-email:
    runs-on: ubuntu-latest

    steps:
    - name: Checkout repository
      uses: actions/checkout@v3

    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: "3.10"

    - name: Install dependencies
      run: pip install -r requirements.txt

    - name: Run email script
      env:
        EMAIL_ADDRESS: ${{ secrets.EMAIL_ADDRESS }}
        APP_PASSWORD: ${{ secrets.APP_PASSWORD }}
        RECIPIENTS: ${{ secrets.RECIPIENTS }}
      run: automated_rag_email.py

import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

EMAIL_ADDRESS = os.getenv("EMAIL_ADDRESS")
APP_PASSWORD = os.getenv("APP_PASSWORD")
RECIPIENTS = os.getenv("RECIPIENTS").split(",")

def send_email_report():
    msg = MIMEMultipart()
    msg["From"] = EMAIL_ADDRESS
    msg["To"] = ", ".join(RECIPIENTS)
    msg["Subject"] = "📊 Alberta Car Promotions Update"

    msg.attach(MIMEText("Your automated report goes here!", "plain"))

    with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
        server.login(EMAIL_ADDRESS, APP_PASSWORD)
        server.send_message(msg)

    print("Email sent!")

if __name__ == "__main__":
    send_email_report()

Secret Name	Value
EMAIL_ADDRESS	your email (`you@gmail.com`)
APP_PASSWORD	your Gmail App Password
RECIPIENTS	comma separated emails

Table of Contents

Enhanced Retrieval Augmented Generation¶

Introduction¶

Load Documents¶

Corrected Retrieval¶

Enhance Questions to Optimize Document Search¶

Building Rubric to Evaluate Response¶

LangGraph to Construct a Graph¶

Function for Graph's Nodes¶

Functions for Graph's Edges¶

Build the Graph¶

Run the Graph¶

Multi RAG-Driven Auto Explorer¶

Send Email¶

Use an App Password¶

Automatic Email Delivery Tool¶

Put Python script in a GitHub repository¶

Move app password into GitHub Secrets¶

¶

Create the GitHub Action (scheduler)¶

Update Python script to read secrets from env vars¶