import warnings
warnings.filterwarnings('ignore')


import openai
import os
from dotenv import find_dotenv, load_dotenv

# openai wrapper for langchain
from langchain_community.llms import OpenAI

# to find all environmental variables
load_dotenv(find_dotenv())

# create a variable for model
model_llm = "gpt-3.5-turbo"

# read OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = "........"

openai.api_key = os.getenv("OPENAI_API_KEY")  
#llm = OpenAI(open_api_key=os.getenv("OPENAI_API_KEY"))

llm = OpenAI(temperature=0.75)

print (llm.invoke("what is the weather in calgary now"))

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The class `OpenAI` was deprecated in LangChain 0.0.10 and will be removed in 0.3.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import OpenAI`.
  warn_deprecated(


As an AI, I do not have access to real-time weather data. Please check a trusted weather source for the current weather in Calgary.


import openai
import os
from dotenv import find_dotenv, load_dotenv

# openai wrapper for langchain
from langchain_community.llms import OpenAI
from langchain.schema import HumanMessage

# openai wrapper for langchain.chat
from langchain_openai import ChatOpenAI

# create a variable for model
model_llm = "gpt-3.5-turbo"

prompt = "How deep is Caspian see"
# encapsulates the prompt in a format suitable for processing 
# by a language model or conversational agent.
messages = [HumanMessage(content=prompt)]

print("===================")

model_chat = ChatOpenAI(temperature=0.75)
print (model_chat.invoke("what is the weather in calgary now"))

print("===================")

model_chat = ChatOpenAI(temperature=0.75)
print (model_chat.invoke(messages).content)

===================
content='I apologize, but I am not able to provide real-time weather updates. I recommend checking a reliable weather website or app for the current weather in Calgary.' response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 15, 'total_tokens': 46, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-7c5411ab-961d-4ced-ae9a-12ce796b8d0e-0' usage_metadata={'input_tokens': 15, 'output_tokens': 31, 'total_tokens': 46}
===================
The Caspian Sea is the world's largest inland body of water, with a maximum depth of about 3,363 feet (1,025 meters).


customer_review = """ food quality is terrible, 
I will not go to this place anymore """


from langchain.prompts import ChatPromptTemplate

# usign LangChain & prompt templates
model_chat = ChatOpenAI(temperature=0.7,
                       model=model_llm)

# create a template string
template_string = """
Translate the following text {customer_review}
into Farsi language in a polite manner.
And the restaurant's name is {restaurant_name}.

"""

# create a chat prompt template
template_prompt = ChatPromptTemplate.from_template(template_string)
message_translation = template_prompt.format_messages(
    customer_review = customer_review,
    restaurant_name = "Khazar"
)


response = model_chat(message_translation)
print (response.content)

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The method `BaseChatModel.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 0.3.0. Use invoke instead.
  warn_deprecated(

کیفیت غذا در رستوران خزر واقعاً بد بود. من دیگر به این مکان نخواهم رفت.


# create a variable for model
model_llm = "gpt-3.5-turbo"

model_chat = ChatOpenAI(temperature=0.0, model=model_llm)


paris_visit = """Upon landing in Paris, the adventure begins with a 
check-in at a charming boutique hotel nestled in the heart of 
the city. The first venture out includes a visit to 
the majestic Eiffel Tower, where panoramic views of Paris await. 
A stroll along the Seine River, crossing its iconic bridges, 
provides a picturesque introduction to 
the city's romantic ambiance. As the day wanes, enjoy a serene evening at a local café, savoring classic French cuisine.

The following day is dedicated to immersing yourself in Parisian 
culture and art. Begin with an early visit to the Louvre Museum 
to admire historical masterpieces, including the Mona Lisa. 
Post-lunch, a journey through the Gothic splendor of the
Notre-Dame Cathedral and a leisurely exploration of the Latin 
Quarter's quaint streets and cozy bookshops reveal the 
city's vibrant heart. The evening offers a chance to experience 
Paris's renowned culinary scene, with a dinner featuring exquisite 
French delicacies.

On the final day, delve into the artistic enclave of Montmartre, 
where the Sacré-Cœur Basilica stands majestically. This district, 
known for its bohemian spirit, offers a glimpse into the city's 
artistic legacy."""


itinerary_template = """
Extract following information 

hotel: hotel to stay

first_day_visit: first day plan 

second_day_visit: second day plan 

final_day_visit: final desitination to visit 

from the itinerary below:
itinerary: {itinerary}

The output should be formated as JSON with the following keys:
hotel
first_day_visit
second_day_visit
final_day_visit

"""


from langchain_core.prompts import ChatPromptTemplate

# create a prompt template
template_prompt = ChatPromptTemplate.from_template(itinerary_template)
print(template_prompt)

input_variables=['itinerary'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['itinerary'], template='\nExtract following information \n\nhotel: hotel to stay\n\nfirst_day_visit: first day plan \n\nsecond_day_visit: second day plan \n\nfinal_day_visit: final desitination to visit \n\nfrom the itinerary below:\nitinerary: {itinerary}\n\nThe output should be formated as JSON with the following keys:\nhotel\nfirst_day_visit\nsecond_day_visit\nfinal_day_visit\n\n'))]


messages = template_prompt.format_messages(itinerary=paris_visit)


response = model_chat(messages)
print(response.content)

{
  "hotel": "charming boutique hotel nestled in the heart of the city",
  "first_day_visit": "visit to the majestic Eiffel Tower, stroll along the Seine River, evening at a local café",
  "second_day_visit": "visit to the Louvre Museum, journey through Notre-Dame Cathedral, exploration of the Latin Quarter",
  "final_day_visit": "delve into the artistic enclave of Montmartre, visit the Sacré-Cœur Basilica"
}


type(response.content)

str


from langchain.output_parsers import ResponseSchema, StructuredOutputParser


# create schema (fields)
hotel_schema = ResponseSchema(name = "hotel", description="the recomended hotel to check in and stay")
first_day_visit_schema = ResponseSchema(name = "first_day_visit", description="the place to visit on first day")
second_day_visit_schema = ResponseSchema(name = "second_day_visit", description="the place to visit on second day")
final_day_visit_schema = ResponseSchema(name = "final_day_visit", description="the place to visit on last day")


# create responses
response_schema = [
    hotel_schema,
    first_day_visit_schema,
    second_day_visit_schema,
    final_day_visit_schema 
]


# setup output parsers
output_parser = StructuredOutputParser.from_response_schemas(response_schema)

format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"hotel": string  // the recomended hotel to check in and stay
	"first_day_visit": string  // the place to visit on first day
	"second_day_visit": string  // the place to visit on second day
	"final_day_visit": string  // the place to visit on last day
}
```


output_dict = output_parser.parse(response.content)
output_dict

{'hotel': 'charming boutique hotel nestled in the heart of the city',
 'first_day_visit': 'visit to the majestic Eiffel Tower, stroll along the Seine River, evening at a local café',
 'second_day_visit': 'visit to the Louvre Museum, journey through Notre-Dame Cathedral, exploration of the Latin Quarter',
 'final_day_visit': 'delve into the artistic enclave of Montmartre, visit the Sacré-Cœur Basilica'}


itinerary_Iran = """ Upon your arrival in Tehran, the first destination is the luxurious Espinas Palace Hotel. 
Nestled in the city's heart, this accommodation promises modern amenities, breathtaking city views, and exemplary 
service, ensuring a stay that combines comfort with elegance. After checking in, a traditional Iranian breakfast 
awaits at the hotel's restaurant, offering a taste of the local cuisine's rich flavors and culinary heritage.

The adventure begins in the afternoon with a visit to the Golestan Palace. This UNESCO World Heritage site is 
a jewel in Tehran's crown, showcasing the opulence of the Qajar era through its stunning gardens, exquisite 
interiors, and detailed tile work. As the day winds down, the bustling Tehran Bazaar becomes the perfect 
backdrop for an evening stroll. Here, the vibrant chaos, colorful stalls, and the aroma of spices and 
fresh foods provide a sensory feast, offering insights into the daily lives of the city's residents.

The next leg of your journey takes you to the enchanting city of Isfahan, where the Abbasi Hotel awaits. 
Known for its beautiful traditional architecture and lush gardens, this historic hotel serves as a gateway 
to the past, located conveniently close to Isfahan's main attractions. The Si-o-se-pol Bridge, with its 
iconic 33 arches, is a splendid first stop, offering serene views, especially at sunset. Following this, 
the Naqsh-e Jahan Square invites exploration, with the Imam Mosque, Ali Qapu Palace, and the bustling 
bazaar around the square offering endless opportunities for discovery and wonder.

On your final day, delve deeper into Isfahan's artistic heritage with a visit to the Chehel Sotoun 
Palace, a stunning example of Persian garden design and architecture. The palace's mirrored hall 
and the intricate wall paintings provide a glimpse into the royal festivities of the Safavid era. 
As your journey comes to a close, the Armenian Quarter of Jolfa offers a quiet retreat, with its 
quaint cafes, the Vank Cathedral, and art galleries, encapsulating the diversity and cultural 
richness that Iran proudly preserves.

This itinerary, weaving through the heart of Iran, offers a tapestry of experiences that promise 
to enrich, educate, and inspire, making your visit not just a trip but a journey through time and culture.
"""


messages = template_prompt.format_messages(itinerary=itinerary_Iran,
                                        format_instructions=format_instructions)
print(messages)

[HumanMessage(content="\nExtract following information \n\nhotel: hotel to stay\n\nfirst_day_visit: first day plan \n\nsecond_day_visit: second day plan \n\nfinal_day_visit: final desitination to visit \n\nfrom the itinerary below:\nitinerary:  Upon your arrival in Tehran, the first destination is the luxurious Espinas Palace Hotel. \nNestled in the city's heart, this accommodation promises modern amenities, breathtaking city views, and exemplary \nservice, ensuring a stay that combines comfort with elegance. After checking in, a traditional Iranian breakfast \nawaits at the hotel's restaurant, offering a taste of the local cuisine's rich flavors and culinary heritage.\n\nThe adventure begins in the afternoon with a visit to the Golestan Palace. This UNESCO World Heritage site is \na jewel in Tehran's crown, showcasing the opulence of the Qajar era through its stunning gardens, exquisite \ninteriors, and detailed tile work. As the day winds down, the bustling Tehran Bazaar becomes the perfect \nbackdrop for an evening stroll. Here, the vibrant chaos, colorful stalls, and the aroma of spices and \nfresh foods provide a sensory feast, offering insights into the daily lives of the city's residents.\n\nThe next leg of your journey takes you to the enchanting city of Isfahan, where the Abbasi Hotel awaits. \nKnown for its beautiful traditional architecture and lush gardens, this historic hotel serves as a gateway \nto the past, located conveniently close to Isfahan's main attractions. The Si-o-se-pol Bridge, with its \niconic 33 arches, is a splendid first stop, offering serene views, especially at sunset. Following this, \nthe Naqsh-e Jahan Square invites exploration, with the Imam Mosque, Ali Qapu Palace, and the bustling \nbazaar around the square offering endless opportunities for discovery and wonder.\n\nOn your final day, delve deeper into Isfahan's artistic heritage with a visit to the Chehel Sotoun \nPalace, a stunning example of Persian garden design and architecture. The palace's mirrored hall \nand the intricate wall paintings provide a glimpse into the royal festivities of the Safavid era. \nAs your journey comes to a close, the Armenian Quarter of Jolfa offers a quiet retreat, with its \nquaint cafes, the Vank Cathedral, and art galleries, encapsulating the diversity and cultural \nrichness that Iran proudly preserves.\n\nThis itinerary, weaving through the heart of Iran, offers a tapestry of experiences that promise \nto enrich, educate, and inspire, making your visit not just a trip but a journey through time and culture.\n\n\nThe output should be formated as JSON with the following keys:\nhotel\nfirst_day_visit\nsecond_day_visit\nfinal_day_visit\n\n")]


response = model_chat(messages)

# parse into dictionary
output_dict = output_parser.parse(response.content)
print(output_dict)

{'hotel': 'Espinas Palace Hotel', 'first_day_visit': 'Golestan Palace', 'second_day_visit': 'Si-o-se-pol Bridge, Naqsh-e Jahan Square', 'final_day_visit': 'Chehel Sotoun Palace, Armenian Quarter of Jolfa'}


# Import Pydantic parsers
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from typing import List


from pydantic import BaseModel, Field, field_validator
from langchain.output_parsers import PydanticOutputParser

class ItineraryInfo(BaseModel):
    hotel: str = Field(description="the recommended hotel to check in and stay")
    first_day_visit: str = Field(description="the place to visit on first day")
    second_day_visit: str = Field(description="the place to visit on second day")
    final_day_visit: str = Field(description="the place to visit on last day")
    num_people: int = Field(description="number of people to join this journey")
    
    @field_validator('num_people')
    def number_people_checking(cls, value, info):
        if value <= 0:
            raise ValueError("Not an accurate number of people to travel")
        return value

# Setup a parser and inject instructions
pydantic_parser = PydanticOutputParser(pydantic_object=ItineraryInfo)
format_instructions = pydantic_parser.get_format_instructions()

print(format_instructions)

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"hotel": {"description": "the recommended hotel to check in and stay", "title": "Hotel", "type": "string"}, "first_day_visit": {"description": "the place to visit on first day", "title": "First Day Visit", "type": "string"}, "second_day_visit": {"description": "the place to visit on second day", "title": "Second Day Visit", "type": "string"}, "final_day_visit": {"description": "the place to visit on last day", "title": "Final Day Visit", "type": "string"}, "num_people": {"description": "number of people to join this journey", "title": "Num People", "type": "integer"}}, "required": ["hotel", "first_day_visit", "second_day_visit", "final_day_visit", "num_people"]}
```


itinerary_template_revised = """
Extract information from the following itinerary:

itinerary: {itinerary}
{format_instructions}
"""


updated_prompt = ChatPromptTemplate.from_template(template=itinerary_template_revised)
messages = updated_prompt.format_messages(itinerary=itinerary_Iran,
                                          format_instructions=format_instructions)
format_response = model_chat(messages)
print(format_response)

content='{\n  "hotel": "Espinas Palace Hotel",\n  "first_day_visit": "Golestan Palace",\n  "second_day_visit": "Si-o-se-pol Bridge and Naqsh-e Jahan Square",\n  "final_day_visit": "Chehel Sotoun Palace and Armenian Quarter of Jolfa",\n  "num_people": 1\n}' response_metadata={'token_usage': {'completion_tokens': 74, 'prompt_tokens': 798, 'total_tokens': 872, 'prompt_tokens_details': {'cached_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-2fd3fe12-6824-444a-9b9c-ea89a147a9ae-0' usage_metadata={'input_tokens': 798, 'output_tokens': 74, 'total_tokens': 872}


type(format_response.content)

str


print(format_response.content)

{
  "hotel": "Espinas Palace Hotel",
  "first_day_visit": "Golestan Palace",
  "second_day_visit": "Si-o-se-pol Bridge and Naqsh-e Jahan Square",
  "final_day_visit": "Chehel Sotoun Palace and Armenian Quarter of Jolfa",
  "num_people": 1
}


# convert str to JSON
visit = pydantic_parser.parse(format_response.content)
print(type(visit))
print(visit)

<class '__main__.ItineraryInfo'>
hotel='Espinas Palace Hotel' first_day_visit='Golestan Palace' second_day_visit='Si-o-se-pol Bridge and Naqsh-e Jahan Square' final_day_visit='Chehel Sotoun Palace and Armenian Quarter of Jolfa' num_people=1


print(visit.hotel)

Espinas Palace Hotel


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory


# OpenAI Chat API
model_llm = "gpt-3.5-turbo"

model_chat = ChatOpenAI(temperature=0.7,
                       model=model_llm)

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The class `ChatOpenAI` was deprecated in LangChain 0.0.10 and will be removed in 0.3.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import ChatOpenAI`.
  warn_deprecated(


print(model_chat.invoke("My name is Mehdi and I am a data scientist and what about you?").content)
print(model_chat.invoke("\n\nCool! can you tell me what my name is?").content) # there are memory issue

Nice to meet you Mehdi! I am a language model AI designed to assist with various tasks and conversations. How can I help you today?
I'm sorry, but I do not have access to any personal information about you. However, you can tell me your name and I would be happy to address you by it in our conversation.


memory = ConversationBufferMemory()
conversation = ConversationChain(
    llm=model_chat,
    memory=memory,
    verbose=True # see what is going on in background
)

conversation.invoke(input="Hi there, my name is Mehdi")
conversation.invoke(input="why we have stars at the sky")
conversation.invoke(input="why people are scared of snakes")
conversation.invoke(input="Do you remember what my name is?")


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: Hi there, my name is Mehdi
AI:

> Finished chain.


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there, my name is Mehdi
AI: Hello Mehdi! It's nice to meet you. How can I assist you today?
Human: why we have stars at the sky
AI:

> Finished chain.


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there, my name is Mehdi
AI: Hello Mehdi! It's nice to meet you. How can I assist you today?
Human: why we have stars at the sky
AI: Stars are massive, luminous spheres of plasma that are held together by gravity. They form when clouds of gas and dust in space collapse under their own gravity and begin nuclear fusion, which releases energy in the form of light and heat. These stars then continue to shine for billions of years until they exhaust their nuclear fuel and eventually die. The reason we see stars in the sky is because their light travels through space and reaches our eyes, creating the beautiful nighttime view we all enjoy.
Human: why people are scared of snakes
AI:

> Finished chain.


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi there, my name is Mehdi
AI: Hello Mehdi! It's nice to meet you. How can I assist you today?
Human: why we have stars at the sky
AI: Stars are massive, luminous spheres of plasma that are held together by gravity. They form when clouds of gas and dust in space collapse under their own gravity and begin nuclear fusion, which releases energy in the form of light and heat. These stars then continue to shine for billions of years until they exhaust their nuclear fuel and eventually die. The reason we see stars in the sky is because their light travels through space and reaches our eyes, creating the beautiful nighttime view we all enjoy.
Human: why people are scared of snakes
AI: People are scared of snakes for a variety of reasons. One possible explanation is that snakes have been portrayed negatively in many cultures and religions throughout history, leading to a fear that has been passed down through generations. Additionally, snakes move in a unique and unpredictable way, which can be unsettling for some people. Their venomous nature and potential for harm also contribute to the fear many individuals have towards them. Additionally, some people may have a natural fear of snakes due to evolutionary reasons, as snakes were a common threat to our ancestors and therefore a fear of them may have developed as a survival instinct.
Human: Do you remember what my name is?
AI:

> Finished chain.

{'input': 'Do you remember what my name is?',
 'history': "Human: Hi there, my name is Mehdi\nAI: Hello Mehdi! It's nice to meet you. How can I assist you today?\nHuman: why we have stars at the sky\nAI: Stars are massive, luminous spheres of plasma that are held together by gravity. They form when clouds of gas and dust in space collapse under their own gravity and begin nuclear fusion, which releases energy in the form of light and heat. These stars then continue to shine for billions of years until they exhaust their nuclear fuel and eventually die. The reason we see stars in the sky is because their light travels through space and reaches our eyes, creating the beautiful nighttime view we all enjoy.\nHuman: why people are scared of snakes\nAI: People are scared of snakes for a variety of reasons. One possible explanation is that snakes have been portrayed negatively in many cultures and religions throughout history, leading to a fear that has been passed down through generations. Additionally, snakes move in a unique and unpredictable way, which can be unsettling for some people. Their venomous nature and potential for harm also contribute to the fear many individuals have towards them. Additionally, some people may have a natural fear of snakes due to evolutionary reasons, as snakes were a common threat to our ancestors and therefore a fear of them may have developed as a survival instinct.",
 'response': 'Yes, your name is Mehdi.'}


print(memory.load_memory_variables({}))

{'history': "Human: Hi there, my name is Mehdi\nAI: Hello Mehdi! It's nice to meet you. How can I assist you today?\nHuman: why we have stars at the sky\nAI: Stars are massive, luminous spheres of plasma that are held together by gravity. They form when clouds of gas and dust in space collapse under their own gravity and begin nuclear fusion, which releases energy in the form of light and heat. These stars then continue to shine for billions of years until they exhaust their nuclear fuel and eventually die. The reason we see stars in the sky is because their light travels through space and reaches our eyes, creating the beautiful nighttime view we all enjoy.\nHuman: why people are scared of snakes\nAI: People are scared of snakes for a variety of reasons. One possible explanation is that snakes have been portrayed negatively in many cultures and religions throughout history, leading to a fear that has been passed down through generations. Additionally, snakes move in a unique and unpredictable way, which can be unsettling for some people. Their venomous nature and potential for harm also contribute to the fear many individuals have towards them. Additionally, some people may have a natural fear of snakes due to evolutionary reasons, as snakes were a common threat to our ancestors and therefore a fear of them may have developed as a survival instinct.\nHuman: Do you remember what my name is?\nAI: Yes, your name is Mehdi."}


# run a simple LLMChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI

# OpenAI Chat API
model_llm = "gpt-3.5-turbo"

model_chat = ChatOpenAI(temperature=0.76, model=model_llm, verbose=True)
open_ai = OpenAI(temperature=0.78)


# LLMChain
prompt = PromptTemplate(
    input_variables=["language"],
    template="How do you greet in {language}"
)

simple_chain = prompt | model_chat
print(simple_chain.invoke({"language": "Persian?"}).content)

In Persian, you can greet someone by saying "سلام" (salaam) which means hello or hi. You can also use "خوبی؟" (khubi?) which means how are you? or "خوش آمدید" (khosh amadid) which means welcome.


open_ai = OpenAI(temperature=0.78)

template = """
write a fake stroy of 100 words for a person living in {location} 
and make a living based on boxing. Make his/her name as {name} 

fake STORY:
"""

prompt = PromptTemplate(input_variables=["location", "name"],
                        template=template, verbose=True)


simple_chain = prompt | model_chat
print(simple_chain.invoke({"location": "bandaeanzali?","name": "Ebi?"}).content)

Ebi was a rising star in the boxing world, known for his lightning-fast punches and unbeatable determination. Living in Bandar-e Anzali, he trained tirelessly in a small gym by the sea, dreaming of one day becoming a champion. Despite facing numerous obstacles and setbacks, Ebi never gave up on his passion for boxing. With each victory in the ring, his reputation grew, attracting the attention of sponsors and fans alike. His resilience and unwavering dedication to his craft inspired those around him, proving that with hard work and perseverance, anything is possible. Ebi was destined for greatness, and nothing could stand in his way.


fake_story_chain = LLMChain(llm=open_ai, prompt=prompt, verbose=True) # see what is going on in background
print(fake_story_chain.run({"location": "bandaeanzali", "name":"Ebi"}))


> Entering new LLMChain chain...
Prompt after formatting:

write a fake stroy of 100 words for a person living in bandaeanzali 
and make a living based on boxing. Make his/her name as Ebi 

fake STORY:

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 0.3.0. Use RunnableSequence, e.g., `prompt | llm` instead.
  warn_deprecated(
D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 0.3.0. Use invoke instead.
  warn_deprecated(

> Finished chain.

Ebi was a rising star in the world of boxing, hailing from the small town of Bandaeanzali. From a young age, Ebi was known for his quick hands and powerful punches. He would spend hours training in the local gym, determined to make a name for himself in the ring.

As he got older, Ebi's skills only improved. He became the talk of the town, with many predicting he would become the next big thing in boxing. His hard work and dedication paid off when he was scouted by a renowned boxing coach from the city.

Ebi left his small town behind and moved to the city to pursue his dreams of becoming a professional boxer. He trained rigorously every day, pushing himself to the limits. And finally, his big break came when he was offered a chance to fight in a televised match.

The entire town of Bandaeanzali gathered around their TVs to watch Ebi in action. And he did not disappoint. In a nail-biting match, Ebi knocked out his opponent in the final round, securing his first victory as a professional boxer.

From that moment on, Ebi's career soared. He traveled the world, winning match after match and becoming a household name in the boxing world.


fake_story_chain = LLMChain(llm=open_ai, prompt=prompt, 
                            output_key="story",
                            verbose=True) # see what is going on in background


from langchain.chains import LLMChain, SequentialChain

update_template = """
# translate the {story} into {language}. Please ensure that the language is easily understandable and is fun to read.

Translation into {language}: 
"""

translate_prompt = PromptTemplate(input_variables=["story", "language"],
                                 template=update_template)

translate_chain = LLMChain(llm=open_ai, 
                          prompt=translate_prompt, 
                          output_key="translated"
                         )


chain_overall = SequentialChain(
    chains=[fake_story_chain, translate_chain],
    input_variables=["location", "name", "language"],
    output_variables=["story", "translated"], # This will return the story and translate it
    verbose=True
)

response = chain_overall({"location": "Bandar-e-Anzali",
                        "name": "Abay",
                        "language": "Persian",
                         })


> Entering new SequentialChain chain...


> Entering new LLMChain chain...
Prompt after formatting:

write a fake stroy of 100 words for a person living in Bandar-e-Anzali 
and make a living based on boxing. Make his/her name as Abay 

fake STORY:

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The method `Chain.__call__` was deprecated in langchain 0.1.0 and will be removed in 0.3.0. Use invoke instead.
  warn_deprecated(

> Finished chain.

> Finished chain.


print(f"English Version is {response['story']} \n\n ")
print(f"Translated Version is {response['translated']} \n\n ")

English Version is In the bustling city of Bandar-e-Anzali, there lived a young man by the name of Abay. He was tall, muscular and had a passion for boxing that burned deep within him. Ever since he was a child, he dreamed of becoming a professional boxer and making a name for himself. With determination and hard work, Abay trained tirelessly every day at the local boxing gym.

His skills caught the attention of a renowned coach who took him under his wing and molded him into a fierce fighter. Soon, Abay was winning matches left and right, gaining fame and fortune with each victory. He became a local hero, with posters of him plastered all over the city.

But with success came jealousy and envy from his opponents. One day, Abay was challenged by a fierce rival who was known for his dirty tricks in the ring. The match was intense, with both fighters going toe to toe. But in the end, it was Abay's determination and sharp skills that led him to emerge victorious.

With his undefeated record, Abay was offered a chance to compete on an international level. He traveled to different countries, representing his hometown of Bandar-e-Anzali and making his people proud. He even won a world championship title, solidifying 

 
Translated Version is در شهر پرجنب و جوش بندرانزلی، یک مرد جوان به نام آبای زندگی می کرد. او قد بلند، عضلانی و علاقه زیادی به باکس داشت که درون او گداخته بود. از زمانی که کودک بود، رویای تبدیل شدن به یک بوکسور حرفه ای و کسب شهرت برای خودش را داشت. با تعیین هدف و کار سخت، آبای هر روز در باشگاه بوکس محلی بی وقفه تمرین می کرد.

مهارت های او توجه یک مربی معروف را به خود جلب کرد که او ر


chain_1 = prompt | model_chat
chain_2 = translate_prompt | model_chat

answer1 = chain_1.invoke({"location": "bandaeanzali?","name": "Ebi?"})
answer2 = chain_2.invoke({"story": answer1.content, "language": "persian"})
print("Fake story:\nn", answer1.content)
print("\nnTranslation:\nn", answer2.content)

Fake story:
n Ebi was a rising star in the boxing world, living in Bandar-e Anzali. With a natural talent for the sport, he quickly made a name for himself in the ring. However, his success came with a price. Ebi's opponents would often try to sabotage him, leading to intense rivalries and dangerous situations. Despite the challenges, Ebi never backed down and continued to train tirelessly, determined to become the champion of Bandar-e Anzali. His hard work and dedication paid off when he finally won the title, solidifying his place as a boxing legend in the city.

nTranslation:
n ابی ستاره‌ی صعود کننده‌ای در دنیای باکس بود که در بندر انزلی زندگی می‌کرد. با استعداد طبیعی برای ورزش، او به سرعت نامی برای خود در رینگ ایجاد کرد. با این حال، موفقیت ابی با یک قیمت همراه بود. حریفان ابی اغلب سعی می‌کردند او را خراب کنند که منجر به رقابت‌های شدید و وضعیت‌های خطرناک می‌شد. با وجود چالش‌ها، ابی هرگز پشت نکش نکرد و به سرعت ادامه داد تا با تمرین‌های بی‌وقفه، مصمم به تبدیل شدن به قهرمان بندر انزلی شود. تلاش‌ها و اخلاص او ثمر خورد و زمانی که نهایتاً عنوان را به دست آورد، جای خود را به عنوان یک افسانه‌ی باکس در شهر محکم کرد.


#%pip install aiortc==1.3.2 --user
#%pip install matplotlib==3.5.1 --user
#%pip install numpy==1.22.3 --user
#%pip install opencv-python-headless==4.5.5.64 --user
#%pip install pydub==0.25.1 --user
#%pip install streamlit==1.9.0 --user
#%pip install streamlit_webrtc==0.37.0 --user
#%pip install typing_extensions==4.1.1 --user
#%pip install protobuf~=3.19.0 --user
#%pip install "altair<5" --user


import openai
import os
from dotenv import find_dotenv, load_dotenv
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain_community.llms import OpenAI
from langchain.chat_models import ChatOpenAI
import streamlit as st    

openai.api_key = os.getenv("OPENAI_API_KEY")  

# to find all environmental variables
load_dotenv(find_dotenv())

# OpenAI Chat API
model_llm = "gpt-3.5-turbo"

model_chat = ChatOpenAI(temperature=0.76, model=model_llm)
open_ai = OpenAI(temperature=0.78)


def lullaby_generate(location, name, language):

    template = """
    write an a fake stroy of 100 words for a person living in {location} 
    and make a living based on boxing. Make his/her name as {name} 
    
    fake STORY:
    """
    prompt = PromptTemplate(input_variables=["location", "name"],
                            template=template
                           )
    
    fake_story_chain = LLMChain(llm=open_ai, prompt=prompt, 
                                output_key="story",
                                verbose=True) # see what is going on in background
    #
    update_template = """
    # translate the {story} into {language}. Please ensure that the language is easily 
    understandable and is fun to read.
    
    Translation into {language}: 
    """
    
    translate_prompt = PromptTemplate(input_variables=["story", "language"],
                                     template=update_template)
    #
    translate_chain = LLMChain(llm=open_ai, 
                              prompt=translate_prompt, 
                              output_key="translated"
                             )                           
                             
    #
    chain_overall = SequentialChain(
        chains=[fake_story_chain, translate_chain],
        input_variables=["location", "name", "language"],
        output_variables=["story", "translated"], # This will return the story and translate it
        verbose=True
    )
    
    response = chain_overall({"location": location,
                            "name": name,
                            "language": language,
                             })                           
    
    return response
   
    
# Create a user interface here
def main():
    st.set_page_config(page_title="Generate a fake story",
                      layout="centered")
    st.title("Ask AI to write a fake story about a boxer and translate it to another language 📚")
    st.header("Now it is started ...")
    location_input = st.text_input(label="Location for the story")
    name_input = st.text_input(label="What is the name of character")
    language_input = st.text_input(label="Translate story to another language")
    
    submit_button = st.button("Submit")
    if location_input and name_input and language_input:
        if submit_button:
            with st.spinner("Generate a Fake story..."):
                response = lullaby_generate(location=location_input,
                                                    name=name_input,
                                                    language=language_input
                                                    )
                with st.expander("English version"):
                    st.write(response['story'])
                    
                with st.expander(f"{language_input} language"):
                    st.write(response['translated'])
                    
            st.success("Successfully done!")    
    

#Invoking main function
if __name__ == '__main__':
    main()

2024-10-19 06:40:33.649 
  Warning: to view this Streamlit app on a browser, run it with the following
  command:

    streamlit run D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-10-19 06:40:33.650 Session state does not function when running a script without `streamlit run`


openai.api_key = os.getenv("OPENAI_API_KEY")  

# to find all environmental variables
load_dotenv(find_dotenv())

# OpenAI Chat API
model_llm = "gpt-3.5-turbo"

model_chat = ChatOpenAI(temperature=0.0, model=model_llm)


weather_template = """You are an expert at global warming. You can answer any 
question related to earth temperture raising.

Here is a question:
{input}"""


sport_template = """You are a very good swim coach. You are great at teaching students how to swim. 

Here is a question:
{input}"""

physician_template = """You are a very good physician specializing in heart disease. 

Here is a question:
{input}"""



prompt_infos = [
    {
        "name": "weather",
        "description": "Good at global warming",
        "prompt_template": weather_template
    },
    {
        "name": "sport",
        "description": "Good for teaching people how to swim",
        "prompt_template": sport_template,
    },
    {
        "name": "physician",
        "description": "Good for healing heart disease",
        "prompt_template": physician_template,
    },
]

destination_chains = {}
for info in prompt_infos:
    name = info["name"]
    prompt_template = info["prompt_template"]
    prompt = ChatPromptTemplate.from_template(template=prompt_template)
    chain = LLMChain(llm=model_chat, prompt=prompt)
    destination_chains[name] = chain


destination_chains["physician"]

LLMChain(prompt=ChatPromptTemplate(input_variables=['input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='You are a very good physician specializing in heart disease. \n\nHere is a question:\n{input}'))]), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001BF69EF2460>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001BF6B5D8820>, temperature=0.0, openai_api_key='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', openai_proxy=''))


destinations = [f"{p['name']}: {p['description']}" for p in prompt_infos]
destinations_str = "\n".join(destinations)


# Setup the default chain  
default_prompt = ChatPromptTemplate.from_template("{input}")
default_chain = LLMChain(llm=model_chat, prompt=default_prompt)
default_chain

LLMChain(prompt=ChatPromptTemplate(input_variables=['input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))]), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001BF69EF2460>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001BF6B5D8820>, temperature=0.0, openai_api_key='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', openai_proxy=''))


from langchain.chains.router.multi_prompt_prompt import MULTI_PROMPT_ROUTER_TEMPLATE
from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser
from langchain.chains.router import MultiPromptChain


destinations_str

'weather: Good at global warming\nsport: Good for teaching people how to swim\nphysician: Good for healing heart disease'


# create actual router template
router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format(destinations=destinations_str)
print(router_template)

Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.

<< FORMATTING >>
Return a markdown code snippet with a JSON object formatted to look like:
```json
{{
    "destination": string \ name of the prompt to use or "DEFAULT"
    "next_inputs": string \ a potentially modified version of the original input
}}
```

REMEMBER: "destination" MUST be one of the candidate prompt names specified below OR it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.
REMEMBER: "next_inputs" can just be the original input if you don't think any modifications are needed.

<< CANDIDATE PROMPTS >>
weather: Good at global warming
sport: Good for teaching people how to swim
physician: Good for healing heart disease

<< INPUT >>
{input}

<< OUTPUT (must include ```json at the start of the response) >>
<< OUTPUT (must end with ```) >>


router_prompt = PromptTemplate(
    template=router_template,
    input_variables=["input"],
    output_parser=RouterOutputParser()
)


print(router_prompt)

input_variables=['input'] output_parser=RouterOutputParser() template='Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.\n\n<< FORMATTING >>\nReturn a markdown code snippet with a JSON object formatted to look like:\n```json\n{{\n    "destination": string \\ name of the prompt to use or "DEFAULT"\n    "next_inputs": string \\ a potentially modified version of the original input\n}}\n```\n\nREMEMBER: "destination" MUST be one of the candidate prompt names specified below OR it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.\nREMEMBER: "next_inputs" can just be the original input if you don\'t think any modifications are needed.\n\n<< CANDIDATE PROMPTS >>\nweather: Good at global warming\nsport: Good for teaching people how to swim\nphysician: Good for healing heart disease\n\n<< INPUT >>\n{input}\n\n<< OUTPUT (must include ```json at the start of the response) >>\n<< OUTPUT (must end with ```) >>\n'


router_chain = LLMRouterChain.from_llm(
    llm=model_chat,
    prompt=router_prompt,
)


router_chain

LLMRouterChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['input'], output_parser=RouterOutputParser(), template='Given a raw text input to a language model select the model prompt best suited for the input. You will be given the names of the available prompts and a description of what the prompt is best suited for. You may also revise the original input if you think that revising it will ultimately lead to a better response from the language model.\n\n<< FORMATTING >>\nReturn a markdown code snippet with a JSON object formatted to look like:\n```json\n{{\n    "destination": string \\ name of the prompt to use or "DEFAULT"\n    "next_inputs": string \\ a potentially modified version of the original input\n}}\n```\n\nREMEMBER: "destination" MUST be one of the candidate prompt names specified below OR it can be "DEFAULT" if the input is not well suited for any of the candidate prompts.\nREMEMBER: "next_inputs" can just be the original input if you don\'t think any modifications are needed.\n\n<< CANDIDATE PROMPTS >>\nweather: Good at global warming\nsport: Good for teaching people how to swim\nphysician: Good for healing heart disease\n\n<< INPUT >>\n{input}\n\n<< OUTPUT (must include ```json at the start of the response) >>\n<< OUTPUT (must end with ```) >>\n'), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001BF69EF2460>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001BF6B5D8820>, temperature=0.0, openai_api_key='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', openai_proxy='')))


default_chain

LLMChain(prompt=ChatPromptTemplate(input_variables=['input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))]), llm=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001BF69EF2460>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001BF6B5D8820>, temperature=0.0, openai_api_key='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', openai_proxy=''))


chain = MultiPromptChain(
    router_chain=router_chain,
    destination_chains=destination_chains,
    default_chain=default_chain,
    verbose=True
)

# Test
response = chain.run("can you tell me why the earth temperature is raising?")
#response = chain.run("How old as the stars?")
print(response)


> Entering new MultiPromptChain chain...
weather: {'input': 'can you explain the reasons behind the rise in global temperatures?'}
> Finished chain.
There are several factors that contribute to the rise in global temperatures, with the primary driver being human activities that release greenhouse gases into the atmosphere. These gases, such as carbon dioxide, methane, and nitrous oxide, trap heat from the sun and prevent it from escaping back into space, leading to a warming effect known as the greenhouse effect.

Some of the main human activities that contribute to the increase in greenhouse gases include burning fossil fuels for energy production, transportation, and industrial processes, deforestation, and agriculture practices such as livestock farming. These activities have significantly increased the concentration of greenhouse gases in the atmosphere, leading to a rapid rise in global temperatures.

In addition to human activities, natural factors such as volcanic eruptions, changes in solar radiation, and variations in Earth's orbit can also influence global temperatures. However, the overwhelming scientific consensus is that human activities are the primary driver of the current warming trend observed in recent decades.


# Test
response = chain.run("can you tell me what is best approach to loss weight?")
#response = chain.run("How old as the stars?")
print(response)


> Entering new MultiPromptChain chain...
physician: {'input': 'can you tell me what is the best approach to losing weight?'}
> Finished chain.
As a physician specializing in heart disease, I recommend a comprehensive approach to weight loss that includes a combination of healthy eating, regular physical activity, and behavior modification. Here are some tips to help you achieve your weight loss goals:

1. Start by setting realistic and achievable goals for weight loss. Aim to lose 1-2 pounds per week, as this is a safe and sustainable rate of weight loss.

2. Focus on making healthy food choices by incorporating plenty of fruits, vegetables, whole grains, lean proteins, and healthy fats into your diet. Limit your intake of processed foods, sugary drinks, and high-fat foods.

3. Practice portion control by measuring your food and paying attention to serving sizes. Eating smaller, more frequent meals throughout the day can help keep you feeling full and satisfied.

4. Stay hydrated by drinking plenty of water throughout the day. Sometimes thirst can be mistaken for hunger, so staying hydrated can help prevent overeating.

5. Incorporate regular physical activity into your routine, aiming for at least 150 minutes of moderate-intensity exercise per week. This can include activities such as walking, jogging, cycling, swimming, or strength training.

6. Keep track of your progress by monitoring your weight, food intake, and physical activity. This can help you stay accountable and make adjustments as needed.

7. Seek support from friends, family, or a healthcare professional to help you stay motivated and on track with your weight loss goals.

Remember, losing weight takes time and patience, so be kind to yourself and celebrate your successes along the way. If you have any underlying health conditions or concerns, it's always best to consult with a healthcare provider before starting any weight loss program.


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI

from langchain.document_loaders import PyPDFLoader  

# OpenAI Chat API
model_llm = "gpt-3.5-turbo"

model_chat = ChatOpenAI(temperature=0.2, model=model_llm)


### pip install pypdf

loader = PyPDFLoader("./data/paper.pdf")
pages = loader.load()

print(f"Nmber of pages for this paper are {len(pages)}.")

# first page
page_1 = pages[0]
print(page_1)

Nmber of pages for this paper are 21.
page_content='RESEARCH ARTICLE\nMachine learning approaches for the prediction of serious\nfluid leakage from hydrocarbon wells\nMehdi Rezvandehy1and Bernhard Mayer2\n1Department of Chemical and Petroleum Engineering, University of Calgary, Calgary, AB, Canada\n2Department of Geoscience, University of Calgary, Calgary, AB, Canada\nCorresponding author: Mehdi Rezvandehy; Email: mehdi.rezvandehy@ucalgary.ca\nReceived: 23 August 2022; Revised: 05 April 2023; Accepted: 14 April 2023\nKeywords: Energy wells; imbalanced class classification; imputation; probability estimation; resampling\nAbstract\nThe exploitation of hydrocarbon reservoirs may potentially lead to contamination of soils, shallow water resources,\nand greenhouse gas emissions. Fluids such as methane or CO 2may in some cases migrate toward the groundwater\nzone and atmosphere through and along imperfectly sealed hydrocarbon wells. Field tests in hydrocarbon-producing\nregions are routinely conducted for detecting serious leakage to prevent environmental pollution. The challenge isthat testing is costly, time-consuming, and sometimes labor-intensive. In this study, machine learning approaches\nwere applied to predict serious leakage with uncertainty quantification for wells that have not been field tested in\nAlberta, Canada. An improved imputation technique was developed by Cholesky factorization of the covariancematrix between features, where missing data are imputed via conditioning of available values. The uncertainty in\nimputed values was quantified and incorporated into the final prediction to improve decision-making. Next, a wide\nrange of predictive algorithms and various performance metrics were considered to achieve the most reliableclassifier. However, a highly skewed distribution of field tests toward the negative class (nonserious leakage) forcespredictive models to unrealistically underestimate the minority class (serious leakage). To address this issue, a\ncombination of oversampling, undersampling, and ensemble learning was applied. By investigating all the models on\nnever-before-seen data, an optimum classifier with minimal false negative prediction was determined. The developedmethodology can be applied to identify the wells with the highest likelihood for serious fluid leakage within\nproducing fields. This information is of key importance for optimizing field test operations to achieve economic\nand environmental benefits.\nImpact Statement\nField test operations to detect methane and CO2 leakages from hydrocarbon wells can be costly. Most wells do\nnot have leaks or are categorized as non-serious, which means that no repair is needed until they are abandoned.\nHowever, it is crucial to identify and prioritize serious leakages for immediate remediation to prevent environ-\nmental pollution. This study developed a reliable predictive model by correlating the results of historical fieldtests with various well properties, including age, depth, production/injection history, and deviation, among\nothers. The trained model can predict the likelihood of serious leakage for untested wells, allowing for the\nprioritization of wells with the highest probability of leaks for field testing. This approach leads to cost-effectivefield testing and environmental benefits.\n© The Author(s), 2023. Published by Cambridge University Press. This is an Open Access article, distributed under the terms of the Creative Commons\nAttribution licence ( http://creativecommons.org/licenses/by/4.0 ), which permits unrestricted re-use, distribution and reproduction, provided the\noriginal article is properly cited.Data-Centric Engineering (2023), 4: e12\ndoi:10.1017/dce.2023.9\nhttps://doi.org/10.1017/dce.2023.9  Published online by Cambridge University Press' metadata={'source': './data/paper.pdf', 'page': 0}


# first 200 characters on the page 1
print(page_1.page_content[0:200])

RESEARCH ARTICLE
Machine learning approaches for the prediction of serious
fluid leakage from hydrocarbon wells
Mehdi Rezvandehy1and Bernhard Mayer2
1Department of Chemical and Petroleum Engineering,


print(page_1.metadata)

{'source': './data/paper.pdf', 'page': 0}


from langchain.text_splitter import CharacterTextSplitter

# 1. CharacterTextSplitter
with open("./data/wild_animals_book.txt", encoding="utf8") as paper:
    speech = paper.read()
    
text_splitter = CharacterTextSplitter(
    length_function = len
)

texts = text_splitter.create_documents([speech])
print(texts[0])

page_content='The Project Gutenberg eBook of Wild Animals I Have Known\n    \nThis ebook is for the use of anyone anywhere in the United States and\nmost other parts of the world at no cost and with almost no restrictions\nwhatsoever. You may copy it, give it away or re-use it under the terms\nof the Project Gutenberg License included with this ebook or online\nat www.gutenberg.org. If you are not located in the United States,\nyou will have to check the laws of the country where you are located\nbefore using this eBook.\n\nTitle: Wild Animals I Have Known\n\n\nAuthor: Ernest Thompson Seton\n\nRelease date: January 1, 2002 [eBook #3031]\n                Most recently updated: March 3, 2017\n\nLanguage: English\n\nCredits: Produced by David Reed, and David Widger\n\n\n*** START OF THE PROJECT GUTENBERG EBOOK WILD ANIMALS I HAVE KNOWN ***\n\nProduced by David Reed\n\nWILD ANIMALS I HAVE KNOWN\n\nBy Ernest Thompson Seton\n\n\nBooks by Ernest Thompson Seton\n\n     Biography of a Grizzly\n     Lives of the Hunted\n     Wild Animals at Home\n     Wild Animal Ways\n\n\nStories in This Book\n\n     Lobo, the King of Currumpaw\n     Silverspot, the Story of a Crow\n     Raggylug, the Story of a Cottontail Rabbit\n     Bingo, the Story of My Dog\n     The Springfield Fox\n     The Pacing Mustang\n     Wully, the Story of a Yaller Dog\n     Redruff, the Story of the Don Valley Partridge\n\nTHESE STORIES are true. Although I have left the strict line of\nhistorical truth in many places, the animals in this book were all real\ncharacters. They lived the lives I have depicted, and showed the stamp\nof heroism and personality more strongly by far than it has been in the\npower of my pen to tell.\n\nI believe that natural history has lost much by the vague general\ntreatment that is so common. What satisfaction would be derived from\na ten-page sketch of the habits and customs of Man? How much more\nprofitable it would be to devote that space to the life of some one\ngreat man. This is the principle I have endeavored to apply to my\nanimals. The real personality of the individual, and his view of life\nare my theme, rather than the ways of the race in general, as viewed by\na casual and hostile human eye.\n\nThis may sound inconsistent in view of my having pieced together some of\nthe characters, but that was made necessary by the fragmentary nature\nof the records. There is, however, almost no deviation from the truth in\nLobo, Bingo, and the Mustang.\n\nLobo lived his wild romantic life from 1889 to 1894 in the Currumpaw\nregion, as the ranchmen know too well, and died, precisely as related,\non January 31, 1894.\n\nBingo was my dog from 1882 to 1888, in spite of interruptions, caused by\nlengthy visits to New York, as my Manitoban friends will remember. And\nmy old friend, the owner of Tan, will learn from these pages how his dog\nreally died.\n\nThe Mustang lived not far from Lobo in the early nineties. The story is\ngiven strictly as it occurred, excepting that there is a dispute as to\nthe manner of his death. According to some testimony he broke his neck\nin the corral that he was first taken to. Old Turkeytrack is where he\ncannot be consulted to settle it.\n\nWully is, in a sense, a compound of two dogs; both were mongrels, of\nsome collie blood, and were raised as sheep-dogs. The first part of\nWully is given as it happened, after that it was known only that he\nbecame a savage, treacherous sheep-killer. The details of the second\npart belong really to another, a similar yaller dog, who long lived\nthe double-life---a faithful sheep-dog by day, and a bloodthirsty,\ntreacherous monster by night. Such things are less rare than is\nsupposed, and since writing these stories I have heard of another\ndouble-lived sheep-dog that added to its night amusements the crowning\nbarbarity of murdering the smaller dogs of the neighborhood. He had\nkilled twenty, and hidden them in a sandpit, when discovered by his\nmaster. He died just as Wully did.'


from langchain.text_splitter import RecursiveCharacterTextSplitter

# 1. CharacterTextSplitter
with open("./data/wild_animals_book.txt", encoding="utf8") as paper:
    speech = paper.read()
    
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 20,
    chunk_overlap = 5,
    length_function = len,
    add_start_index=True
)

docs = text_splitter.create_documents([speech])

print(len(docs))
print(f"Doc 1: {docs[0]}")
print(f"Doc 2: {docs[1]}")

20661
Doc 1: page_content='The Project' metadata={'start_index': 0}
Doc 2: page_content='Gutenberg eBook of' metadata={'start_index': 12}


s = "Python can be easy to pick up whether you're a professional or a beginner."

text = text_splitter.split_text(s)
print(text)

['Python can be easy', 'easy to pick up', "up whether you're a", 'a professional or a', 'or a beginner.']


import numpy as np
# langchain wrapper for embedding
from langchain.embeddings import OpenAIEmbeddings 

embeddings = OpenAIEmbeddings()

corpus = ["Global warming is happening", 
        "The weather is not good to play golf today", 
        "Never compare an apple to an orange", 
        "Apple and orange are completely different from each other"]

for itext in corpus:
    for jtext in corpus:
        embed1 = embeddings.embed_query(itext)
        embed2 = embeddings.embed_query(jtext)
        similarity = np.dot(embed1, embed2)
        print(f"{itext}, {jtext}: Similarity %: {similarity*100}")

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The class `OpenAIEmbeddings` was deprecated in LangChain 0.0.9 and will be removed in 0.3.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import OpenAIEmbeddings`.
  warn_deprecated(

Global warming is happening, Global warming is happening: Similarity %: 99.99999999999997
Global warming is happening, The weather is not good to play golf today: Similarity %: 80.02220475419695
Global warming is happening, Never compare an apple to an orange: Similarity %: 73.22129176649644
Global warming is happening, Apple and orange are completely different from each other: Similarity %: 72.1563466439271
The weather is not good to play golf today, Global warming is happening: Similarity %: 80.02220475419695
The weather is not good to play golf today, The weather is not good to play golf today: Similarity %: 99.99999999999999
The weather is not good to play golf today, Never compare an apple to an orange: Similarity %: 74.39217044351511
The weather is not good to play golf today, Apple and orange are completely different from each other: Similarity %: 72.88751127003663
Never compare an apple to an orange, Global warming is happening: Similarity %: 73.21530565210482
Never compare an apple to an orange, The weather is not good to play golf today: Similarity %: 74.39217044351511
Never compare an apple to an orange, Never compare an apple to an orange: Similarity %: 100.0
Never compare an apple to an orange, Apple and orange are completely different from each other: Similarity %: 88.8812849499048
Apple and orange are completely different from each other, Global warming is happening: Similarity %: 72.1563466439271
Apple and orange are completely different from each other, The weather is not good to play golf today: Similarity %: 72.88751127003663
Apple and orange are completely different from each other, Never compare an apple to an orange: Similarity %: 88.8812849499048
Apple and orange are completely different from each other, Apple and orange are completely different from each other: Similarity %: 100.00000000000004


# 1. Load a pdf file
loader = PyPDFLoader("./data/paper.pdf")
pages = loader.load()

# 2. Split the document into chunks
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 400
)
splits = text_splitter.split_documents(pages)
print(len(splits))
# =============== ==================== #

91


# Real-world exampel with embeddings!
# Chroma db = #pip install chroma
# pip install chromadb
from langchain.vectorstores import Chroma

persist_directory = "./data/chroma"

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings, # openai embeddings
    persist_directory=persist_directory
    )

vectorstore.persist() # save this for later usage!

print(vectorstore._collection.count())

91

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: Since Chroma 0.4.x the manual persistence method is no longer supported as docs are automatically persisted.
  warn_deprecated(


query = "what is gas migration?"

docs_resp = vectorstore.similarity_search(query=query, k=3)

print(len(docs_resp))
print(docs_resp[0].page_content)

3
Abboud et al., 2021 ). The Alberta Energy Regulator (AER) in Alberta, Canada, conducts such field tests
for energy wells within the province. The AER applies two field tests for the identification of fluid
migration after a well is completed to produce hydrocarbon or to inject any fluid:
1. SCVF is the flow of gas (methane, CO
2, etc.) out of the casing annulus or surface casing. SCVF is
often referred to as internal migration. Wells with positive SCVF are considered serious in the
province of Alberta under one or several of the following conditions: (a) gas-flow rates higher than
300 m3/d, (b) stabilized pressure >9.8 kPa/m, (c) liquid-hydrocarbons, and (d) hydrogen sulfide
(H2S) flow (see Alberta Energy Regulator, 2003 , for more information).
2. GM is a flow of any gas that is detectable at surface outside of the outermost casing string. GM is
often referred to as seepage or external migration (Alberta Energy Regulator, 2003 ). A GM is


import sqlite3
import pandas as pd

# Create a SQL connection to our SQLite database
con = sqlite3.connect("data/chroma/chroma.sqlite3")

cur = con.cursor()

# Return all results of query
cur.execute('SELECT * FROM embedding_metadata limit 10')
cur.fetchall()

[(1, 'source', './data/paper.pdf', None, None, None),
 (1, 'page', None, 0, None, None),
 (1,
  'chroma:document',
  'RESEARCH ARTICLE\nMachine learning approaches for the prediction of serious\nfluid leakage from hydrocarbon wells\nMehdi Rezvandehy1and Bernhard Mayer2\n1Department of Chemical and Petroleum Engineering, University of Calgary, Calgary, AB, Canada\n2Department of Geoscience, University of Calgary, Calgary, AB, Canada\nCorresponding author: Mehdi Rezvandehy; Email: mehdi.rezvandehy@ucalgary.ca\nReceived: 23 August 2022; Revised: 05 April 2023; Accepted: 14 April 2023\nKeywords: Energy wells; imbalanced class classification; imputation; probability estimation; resampling\nAbstract\nThe exploitation of hydrocarbon reservoirs may potentially lead to contamination of soils, shallow water resources,\nand greenhouse gas emissions. Fluids such as methane or CO 2may in some cases migrate toward the groundwater\nzone and atmosphere through and along imperfectly sealed hydrocarbon wells. Field tests in hydrocarbon-producing',
  None,
  None,
  None),
 (2, 'source', './data/paper.pdf', None, None, None),
 (2, 'page', None, 0, None, None),
 (2,
  'chroma:document',
  'Abstract\nThe exploitation of hydrocarbon reservoirs may potentially lead to contamination of soils, shallow water resources,\nand greenhouse gas emissions. Fluids such as methane or CO 2may in some cases migrate toward the groundwater\nzone and atmosphere through and along imperfectly sealed hydrocarbon wells. Field tests in hydrocarbon-producing\nregions are routinely conducted for detecting serious leakage to prevent environmental pollution. The challenge isthat testing is costly, time-consuming, and sometimes labor-intensive. In this study, machine learning approaches\nwere applied to predict serious leakage with uncertainty quantification for wells that have not been field tested in\nAlberta, Canada. An improved imputation technique was developed by Cholesky factorization of the covariancematrix between features, where missing data are imputed via conditioning of available values. The uncertainty in',
  None,
  None,
  None),
 (3, 'source', './data/paper.pdf', None, None, None),
 (3, 'page', None, 0, None, None),
 (3,
  'chroma:document',
  'were applied to predict serious leakage with uncertainty quantification for wells that have not been field tested in\nAlberta, Canada. An improved imputation technique was developed by Cholesky factorization of the covariancematrix between features, where missing data are imputed via conditioning of available values. The uncertainty in\nimputed values was quantified and incorporated into the final prediction to improve decision-making. Next, a wide\nrange of predictive algorithms and various performance metrics were considered to achieve the most reliableclassifier. However, a highly skewed distribution of field tests toward the negative class (nonserious leakage) forcespredictive models to unrealistically underestimate the minority class (serious leakage). To address this issue, a\ncombination of oversampling, undersampling, and ensemble learning was applied. By investigating all the models on',
  None,
  None,
  None),
 (4, 'source', './data/paper.pdf', None, None, None)]


## load the persisted db
vector_store = Chroma(persist_directory=persist_directory,
                      embedding_function=embeddings)


# make a retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 5})  # number of document to get is 2
docs = retriever.get_relevant_documents("Tell me more about ReAct prompting")
print(retriever.search_type)

similarity

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 0.3.0. Use invoke instead.
  warn_deprecated(


print(docs[0].page_content)

and environmental benefits.
Impact Statement
Field test operations to detect methane and CO2 leakages from hydrocarbon wells can be costly. Most wells do
not have leaks or are categorized as non-serious, which means that no repair is needed until they are abandoned.
However, it is crucial to identify and prioritize serious leakages for immediate remediation to prevent environ-
mental pollution. This study developed a reliable predictive model by correlating the results of historical fieldtests with various well properties, including age, depth, production/injection history, and deviation, among
others. The trained model can predict the likelihood of serious leakage for untested wells, allowing for the
prioritization of wells with the highest probability of leaks for field testing. This approach leads to cost-effectivefield testing and environmental benefits.


docs[0]

Document(page_content='and environmental benefits.\nImpact Statement\nField test operations to detect methane and CO2 leakages from hydrocarbon wells can be costly. Most wells do\nnot have leaks or are categorized as non-serious, which means that no repair is needed until they are abandoned.\nHowever, it is crucial to identify and prioritize serious leakages for immediate remediation to prevent environ-\nmental pollution. This study developed a reliable predictive model by correlating the results of historical fieldtests with various well properties, including age, depth, production/injection history, and deviation, among\nothers. The trained model can predict the likelihood of serious leakage for untested wells, allowing for the\nprioritization of wells with the highest probability of leaks for field testing. This approach leads to cost-effectivefield testing and environmental benefits.', metadata={'page': 0, 'source': './data/paper.pdf'})


# Make a chain to answer questions
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=model_chat,
    chain_type="stuff",
    retriever=retriever,
    verbose=True,
    return_source_documents=True
)


## Cite sources - helper function to pretyfy responses
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

query = "tell me what is gas migration"
llm_response = qa_chain(query)
print(process_llm_response(llm_response=llm_response))


> Entering new RetrievalQA chain...

> Finished chain.
Gas migration refers to the movement of gases, such as methane and CO2, from underground reservoirs to the surface or into surrounding areas like soils, shallow groundwater, or the atmosphere. In the context of energy wells, gas migration can occur due to improperly sealed wells, leading to the escape of gases through surface casing vent flows or other pathways. Monitoring gas migration is essential to detect leakage and prioritize repairs to prevent environmental contamination and greenhouse gas emissions.


Sources:
./data/paper.pdf
./data/paper.pdf
./data/paper.pdf
./data/paper.pdf
./data/paper.pdf
None


query = "what is the application of LU simulation?"
llm_response = qa_chain(query)
print(llm_response['result'])


> Entering new RetrievalQA chain...

> Finished chain.
The application of LU simulation includes geostatistics for geomo-del modeling, spatial resampling, imputation of missing data, and oversampling to improve the imbalance number of classes for classification. It can also be used for simulating correlated Gaussian realizations and for carrying out conditional simulation to simulate missing data conditioned based on non-missing values.


# Simple agent
llm = OpenAI(temperature=0.25)
print(llm.predict("what is the result of 4.2^3.2"))

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The method `BaseLLM.predict` was deprecated in langchain-core 0.1.7 and will be removed in 0.3.0. Use invoke instead.
  warn_deprecated(


The result of 4.2^3.2 is approximately 55.78.


4.2**3.2

98.71831395268974


print(llm.predict("what is LangChain"))


LangChain is a decentralized blockchain platform that aims to provide a secure and efficient environment for language learning and teaching. It utilizes blockchain technology to create a transparent and decentralized ecosystem where students and teachers can connect, interact, and exchange language learning services without the need for intermediaries. The platform also offers features such as smart contracts, peer-to-peer payments, and a reputation system to ensure fair and reliable transactions. LangChain aims to revolutionize the traditional language learning industry by providing a more accessible, affordable, and personalized learning experience for individuals around the world.


from langchain.agents import Tool, initialize_agent, load_tools
from langchain.chains import LLMMathChain # to fix math issue


llm_math = LLMMathChain.from_llm(llm=llm)
math_tool = Tool(
    name="Calculator",
    func=llm_math.run,
    description="Useful for when you need to answer questions related to Math."
)

tools = [math_tool]
print(tools[0].name, tools[0].description)

Calculator Useful for when you need to answer questions related to Math.


#ReAct framework = Reasoning and Action
# if LLM cannot get the answer, the agent will do
agent = initialize_agent(
    agent="zero-shot-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3 # to avoid high bills from the LLM
)

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The function `initialize_agent` was deprecated in LangChain 0.1.0 and will be removed in 0.3.0. Use Use new agent constructor methods like create_react_agent, create_json_agent, create_structured_chat_agent, etc. instead.
  warn_deprecated(


print(agent("what is the result of 4.2^3.2"))


> Entering new AgentExecutor chain...
 I should use a calculator to solve this problem
Action: Calculator
Action Input: 4.2^3.2
Observation: Answer: 98.71831395268974
Thought: I now know the final answer
Final Answer: 98.71831395268974

> Finished chain.
{'input': 'what is the result of 4.2^3.2', 'output': '98.71831395268974'}


4.2**3.2

98.71831395268974


tools = load_tools(  #load_tools is another library allow us to use pre-built tools
    ['llm-math'],
    llm=llm
)
tools

[Tool(name='Calculator', description='Useful for when you need to answer questions about math.', func=<bound method Chain.run of LLMMathChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['question'], template='Translate a math problem into a expression that can be executed using Python\'s numexpr library. Use the output of running this code to answer the question.\n\nQuestion: ${{Question with math problem.}}\n```text\n${{single line mathematical expression that solves the problem}}\n```\n...numexpr.evaluate(text)...\n```output\n${{Output of running the code}}\n```\nAnswer: ${{Answer}}\n\nBegin.\n\nQuestion: What is 37593 * 67?\n```text\n37593 * 67\n```\n...numexpr.evaluate("37593 * 67")...\n```output\n2518731\n```\nAnswer: 2518731\n\nQuestion: 37593^(1/5)\n```text\n37593**(1/5)\n```\n...numexpr.evaluate("37593**(1/5)")...\n```output\n8.222831614237718\n```\nAnswer: 8.222831614237718\n\nQuestion: {question}\n'), llm=OpenAI(client=<openai.resources.completions.Completions object at 0x000001BF69EC5850>, async_client=<openai.resources.completions.AsyncCompletions object at 0x000001BF69B868B0>, temperature=0.25, openai_api_key='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', openai_proxy='')))>, coroutine=<bound method Chain.arun of LLMMathChain(llm_chain=LLMChain(prompt=PromptTemplate(input_variables=['question'], template='Translate a math problem into a expression that can be executed using Python\'s numexpr library. Use the output of running this code to answer the question.\n\nQuestion: ${{Question with math problem.}}\n```text\n${{single line mathematical expression that solves the problem}}\n```\n...numexpr.evaluate(text)...\n```output\n${{Output of running the code}}\n```\nAnswer: ${{Answer}}\n\nBegin.\n\nQuestion: What is 37593 * 67?\n```text\n37593 * 67\n```\n...numexpr.evaluate("37593 * 67")...\n```output\n2518731\n```\nAnswer: 2518731\n\nQuestion: 37593^(1/5)\n```text\n37593**(1/5)\n```\n...numexpr.evaluate("37593**(1/5)")...\n```output\n8.222831614237718\n```\nAnswer: 8.222831614237718\n\nQuestion: {question}\n'), llm=OpenAI(client=<openai.resources.completions.Completions object at 0x000001BF69EC5850>, async_client=<openai.resources.completions.AsyncCompletions object at 0x000001BF69B868B0>, temperature=0.25, openai_api_key='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', openai_proxy='')))>)]


agent = initialize_agent(
    agent="zero-shot-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3 # to avoid high bills from the LLM
)


print(agent("what is the result of 4.9^3.2"))


> Entering new AgentExecutor chain...
 I should use a calculator to solve this problem
Action: Calculator
Action Input: 4.9^3.2
Observation: Answer: 161.66926210092953
Thought: I now know the final answer
Final Answer: 161.66926210092953

> Finished chain.
{'input': 'what is the result of 4.9^3.2', 'output': '161.66926210092953'}


query = """If I have $ 100.45, and give 20% of that to my brother and 10% to my 
 sister then receive 56.9 from my father, how much monery I will have at the end?"""
result = agent(query)
print(result['output'])


> Entering new AgentExecutor chain...
 You should always think about what to do
Action: Calculator
Action Input: 100.45 - (100.45 * 0.2) - (100.45 * 0.1) + 56.9
Observation: Answer: 127.215
Thought: I now know the final answer
Final Answer: $127.215

> Finished chain.
$127.215


print(agent("how far (in km) from here to the moon?"))


> Entering new AgentExecutor chain...
 We can use the distance formula to calculate the distance between two points.
Action: Calculator
Action Input: distance formula

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\numexpr\necompiler.py:762, in getArguments(names, local_dict, global_dict, _frame_depth)
    761 try:
--> 762     a = local_dict[name]
    763 except KeyError:

KeyError: 'x1'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\llm_math\base.py:89, in LLMMathChain._evaluate_expression(self, expression)
     87     local_dict = {"pi": math.pi, "e": math.e}
     88     output = str(
---> 89         numexpr.evaluate(
     90             expression.strip(),
     91             global_dict={},  # restrict access to globals
     92             local_dict=local_dict,  # add common mathematical functions
     93         )
     94     )
     95 except Exception as e:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\numexpr\necompiler.py:977, in evaluate(ex, local_dict, global_dict, out, order, casting, sanitize, _frame_depth, **kwargs)
    976 else:
--> 977     raise e

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\numexpr\necompiler.py:876, in validate(ex, local_dict, global_dict, out, order, casting, _frame_depth, sanitize, **kwargs)
    875 names, ex_uses_vml = _names_cache[expr_key]
--> 876 arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth)
    878 # Create a signature

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\numexpr\necompiler.py:764, in getArguments(names, local_dict, global_dict, _frame_depth)
    763 except KeyError:
--> 764     a = global_dict[name]
    765 arguments.append(numpy.asarray(a))

KeyError: 'x1'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[93], line 1
----> 1 print(agent("how far (in km) from here to the moon?"))

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:148, in deprecated.<locals>.deprecate.<locals>.warning_emitting_wrapper(*args, **kwargs)
    146     warned = True
    147     emit_warning()
--> 148 return wrapped(*args, **kwargs)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:383, in Chain.__call__(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)
    351 """Execute the chain.
    352 
    353 Args:
   (...)
    374         `Chain.output_keys`.
    375 """
    376 config = {
    377     "callbacks": callbacks,
    378     "tags": tags,
    379     "metadata": metadata,
    380     "run_name": run_name,
    381 }
--> 383 return self.invoke(
    384     inputs,
    385     cast(RunnableConfig, {k: v for k, v in config.items() if v is not None}),
    386     return_only_outputs=return_only_outputs,
    387     include_run_info=include_run_info,
    388 )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:166, in Chain.invoke(self, input, config, **kwargs)
    164 except BaseException as e:
    165     run_manager.on_chain_error(e)
--> 166     raise e
    167 run_manager.on_chain_end(outputs)
    169 if include_run_info:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:156, in Chain.invoke(self, input, config, **kwargs)
    153 try:
    154     self._validate_inputs(inputs)
    155     outputs = (
--> 156         self._call(inputs, run_manager=run_manager)
    157         if new_arg_supported
    158         else self._call(inputs)
    159     )
    161     final_outputs: Dict[str, Any] = self.prep_outputs(
    162         inputs, outputs, return_only_outputs
    163     )
    164 except BaseException as e:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1433, in AgentExecutor._call(self, inputs, run_manager)
   1431 # We now enter the agent loop (until it returns something).
   1432 while self._should_continue(iterations, time_elapsed):
-> 1433     next_step_output = self._take_next_step(
   1434         name_to_tool_map,
   1435         color_mapping,
   1436         inputs,
   1437         intermediate_steps,
   1438         run_manager=run_manager,
   1439     )
   1440     if isinstance(next_step_output, AgentFinish):
   1441         return self._return(
   1442             next_step_output, intermediate_steps, run_manager=run_manager
   1443         )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1139, in AgentExecutor._take_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager)
   1130 def _take_next_step(
   1131     self,
   1132     name_to_tool_map: Dict[str, BaseTool],
   (...)
   1136     run_manager: Optional[CallbackManagerForChainRun] = None,
   1137 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]:
   1138     return self._consume_next_step(
-> 1139         [
   1140             a
   1141             for a in self._iter_next_step(
   1142                 name_to_tool_map,
   1143                 color_mapping,
   1144                 inputs,
   1145                 intermediate_steps,
   1146                 run_manager,
   1147             )
   1148         ]
   1149     )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1139, in <listcomp>(.0)
   1130 def _take_next_step(
   1131     self,
   1132     name_to_tool_map: Dict[str, BaseTool],
   (...)
   1136     run_manager: Optional[CallbackManagerForChainRun] = None,
   1137 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]:
   1138     return self._consume_next_step(
-> 1139         [
   1140             a
   1141             for a in self._iter_next_step(
   1142                 name_to_tool_map,
   1143                 color_mapping,
   1144                 inputs,
   1145                 intermediate_steps,
   1146                 run_manager,
   1147             )
   1148         ]
   1149     )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1224, in AgentExecutor._iter_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager)
   1222     yield agent_action
   1223 for agent_action in actions:
-> 1224     yield self._perform_agent_action(
   1225         name_to_tool_map, color_mapping, agent_action, run_manager
   1226     )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1246, in AgentExecutor._perform_agent_action(self, name_to_tool_map, color_mapping, agent_action, run_manager)
   1244         tool_run_kwargs["llm_prefix"] = ""
   1245     # We then call the tool on the tool input to get an observation
-> 1246     observation = tool.run(
   1247         agent_action.tool_input,
   1248         verbose=self.verbose,
   1249         color=color,
   1250         callbacks=run_manager.get_child() if run_manager else None,
   1251         **tool_run_kwargs,
   1252     )
   1253 else:
   1254     tool_run_kwargs = self.agent.tool_run_logging_kwargs()

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\tools.py:452, in BaseTool.run(self, tool_input, verbose, start_color, color, callbacks, tags, metadata, run_name, run_id, config, **kwargs)
    450 except (Exception, KeyboardInterrupt) as e:
    451     run_manager.on_tool_error(e)
--> 452     raise e
    453 else:
    454     run_manager.on_tool_end(observation, color=color, name=self.name, **kwargs)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\tools.py:409, in BaseTool.run(self, tool_input, verbose, start_color, color, callbacks, tags, metadata, run_name, run_id, config, **kwargs)
    406     parsed_input = self._parse_input(tool_input)
    407     tool_args, tool_kwargs = self._to_args_and_kwargs(parsed_input)
    408     observation = (
--> 409         context.run(
    410             self._run, *tool_args, run_manager=run_manager, **tool_kwargs
    411         )
    412         if new_arg_supported
    413         else context.run(self._run, *tool_args, **tool_kwargs)
    414     )
    415 except ValidationError as e:
    416     if not self.handle_validation_error:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\tools.py:627, in Tool._run(self, run_manager, *args, **kwargs)
    624 if self.func:
    625     new_argument_supported = signature(self.func).parameters.get("callbacks")
    626     return (
--> 627         self.func(
    628             *args,
    629             callbacks=run_manager.get_child() if run_manager else None,
    630             **kwargs,
    631         )
    632         if new_argument_supported
    633         else self.func(*args, **kwargs)
    634     )
    635 raise NotImplementedError("Tool does not support sync")

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:148, in deprecated.<locals>.deprecate.<locals>.warning_emitting_wrapper(*args, **kwargs)
    146     warned = True
    147     emit_warning()
--> 148 return wrapped(*args, **kwargs)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:600, in Chain.run(self, callbacks, tags, metadata, *args, **kwargs)
    598     if len(args) != 1:
    599         raise ValueError("`run` supports only one positional argument.")
--> 600     return self(args[0], callbacks=callbacks, tags=tags, metadata=metadata)[
    601         _output_key
    602     ]
    604 if kwargs and not args:
    605     return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[
    606         _output_key
    607     ]

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:148, in deprecated.<locals>.deprecate.<locals>.warning_emitting_wrapper(*args, **kwargs)
    146     warned = True
    147     emit_warning()
--> 148 return wrapped(*args, **kwargs)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:383, in Chain.__call__(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)
    351 """Execute the chain.
    352 
    353 Args:
   (...)
    374         `Chain.output_keys`.
    375 """
    376 config = {
    377     "callbacks": callbacks,
    378     "tags": tags,
    379     "metadata": metadata,
    380     "run_name": run_name,
    381 }
--> 383 return self.invoke(
    384     inputs,
    385     cast(RunnableConfig, {k: v for k, v in config.items() if v is not None}),
    386     return_only_outputs=return_only_outputs,
    387     include_run_info=include_run_info,
    388 )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:166, in Chain.invoke(self, input, config, **kwargs)
    164 except BaseException as e:
    165     run_manager.on_chain_error(e)
--> 166     raise e
    167 run_manager.on_chain_end(outputs)
    169 if include_run_info:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:156, in Chain.invoke(self, input, config, **kwargs)
    153 try:
    154     self._validate_inputs(inputs)
    155     outputs = (
--> 156         self._call(inputs, run_manager=run_manager)
    157         if new_arg_supported
    158         else self._call(inputs)
    159     )
    161     final_outputs: Dict[str, Any] = self.prep_outputs(
    162         inputs, outputs, return_only_outputs
    163     )
    164 except BaseException as e:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\llm_math\base.py:158, in LLMMathChain._call(self, inputs, run_manager)
    152 _run_manager.on_text(inputs[self.input_key])
    153 llm_output = self.llm_chain.predict(
    154     question=inputs[self.input_key],
    155     stop=["```output"],
    156     callbacks=_run_manager.get_child(),
    157 )
--> 158 return self._process_llm_result(llm_output, _run_manager)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\llm_math\base.py:112, in LLMMathChain._process_llm_result(self, llm_output, run_manager)
    110 if text_match:
    111     expression = text_match.group(1)
--> 112     output = self._evaluate_expression(expression)
    113     run_manager.on_text("\nAnswer: ", verbose=self.verbose)
    114     run_manager.on_text(output, color="yellow", verbose=self.verbose)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\llm_math\base.py:96, in LLMMathChain._evaluate_expression(self, expression)
     88     output = str(
     89         numexpr.evaluate(
     90             expression.strip(),
   (...)
     93         )
     94     )
     95 except Exception as e:
---> 96     raise ValueError(
     97         f'LLMMathChain._evaluate("{expression}") raised error: {e}.'
     98         " Please try again with a valid numerical expression"
     99     )
    101 # Remove any leading and trailing brackets from the output
    102 return re.sub(r"^\[|\]$", "", output)

ValueError: LLMMathChain._evaluate("
sqrt((x2-x1)**2 + (y2-y1)**2)
") raised error: 'x1'. Please try again with a valid numerical expression


print(agent("What is the capital city of Iran?"))


> Entering new AgentExecutor chain...
 I don't know the answer, but I can use a calculator to find it.
Action: Calculator
Action Input: "capital city of Iran"

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\llm_math\base.py:89, in LLMMathChain._evaluate_expression(self, expression)
     87     local_dict = {"pi": math.pi, "e": math.e}
     88     output = str(
---> 89         numexpr.evaluate(
     90             expression.strip(),
     91             global_dict={},  # restrict access to globals
     92             local_dict=local_dict,  # add common mathematical functions
     93         )
     94     )
     95 except Exception as e:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\numexpr\necompiler.py:975, in evaluate(ex, local_dict, global_dict, out, order, casting, sanitize, _frame_depth, **kwargs)
    974 if e is None:
--> 975     return re_evaluate(local_dict=local_dict, global_dict=global_dict, _frame_depth=_frame_depth)
    976 else:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\numexpr\necompiler.py:1007, in re_evaluate(local_dict, global_dict, _frame_depth)
   1006 with evaluate_lock:
-> 1007     return compiled_ex(*args, **kwargs)

ValueError: data type must provide an itemsize

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[94], line 1
----> 1 print(agent("What is the capital city of Iran?"))

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:148, in deprecated.<locals>.deprecate.<locals>.warning_emitting_wrapper(*args, **kwargs)
    146     warned = True
    147     emit_warning()
--> 148 return wrapped(*args, **kwargs)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:383, in Chain.__call__(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)
    351 """Execute the chain.
    352 
    353 Args:
   (...)
    374         `Chain.output_keys`.
    375 """
    376 config = {
    377     "callbacks": callbacks,
    378     "tags": tags,
    379     "metadata": metadata,
    380     "run_name": run_name,
    381 }
--> 383 return self.invoke(
    384     inputs,
    385     cast(RunnableConfig, {k: v for k, v in config.items() if v is not None}),
    386     return_only_outputs=return_only_outputs,
    387     include_run_info=include_run_info,
    388 )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:166, in Chain.invoke(self, input, config, **kwargs)
    164 except BaseException as e:
    165     run_manager.on_chain_error(e)
--> 166     raise e
    167 run_manager.on_chain_end(outputs)
    169 if include_run_info:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:156, in Chain.invoke(self, input, config, **kwargs)
    153 try:
    154     self._validate_inputs(inputs)
    155     outputs = (
--> 156         self._call(inputs, run_manager=run_manager)
    157         if new_arg_supported
    158         else self._call(inputs)
    159     )
    161     final_outputs: Dict[str, Any] = self.prep_outputs(
    162         inputs, outputs, return_only_outputs
    163     )
    164 except BaseException as e:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1433, in AgentExecutor._call(self, inputs, run_manager)
   1431 # We now enter the agent loop (until it returns something).
   1432 while self._should_continue(iterations, time_elapsed):
-> 1433     next_step_output = self._take_next_step(
   1434         name_to_tool_map,
   1435         color_mapping,
   1436         inputs,
   1437         intermediate_steps,
   1438         run_manager=run_manager,
   1439     )
   1440     if isinstance(next_step_output, AgentFinish):
   1441         return self._return(
   1442             next_step_output, intermediate_steps, run_manager=run_manager
   1443         )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1139, in AgentExecutor._take_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager)
   1130 def _take_next_step(
   1131     self,
   1132     name_to_tool_map: Dict[str, BaseTool],
   (...)
   1136     run_manager: Optional[CallbackManagerForChainRun] = None,
   1137 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]:
   1138     return self._consume_next_step(
-> 1139         [
   1140             a
   1141             for a in self._iter_next_step(
   1142                 name_to_tool_map,
   1143                 color_mapping,
   1144                 inputs,
   1145                 intermediate_steps,
   1146                 run_manager,
   1147             )
   1148         ]
   1149     )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1139, in <listcomp>(.0)
   1130 def _take_next_step(
   1131     self,
   1132     name_to_tool_map: Dict[str, BaseTool],
   (...)
   1136     run_manager: Optional[CallbackManagerForChainRun] = None,
   1137 ) -> Union[AgentFinish, List[Tuple[AgentAction, str]]]:
   1138     return self._consume_next_step(
-> 1139         [
   1140             a
   1141             for a in self._iter_next_step(
   1142                 name_to_tool_map,
   1143                 color_mapping,
   1144                 inputs,
   1145                 intermediate_steps,
   1146                 run_manager,
   1147             )
   1148         ]
   1149     )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1224, in AgentExecutor._iter_next_step(self, name_to_tool_map, color_mapping, inputs, intermediate_steps, run_manager)
   1222     yield agent_action
   1223 for agent_action in actions:
-> 1224     yield self._perform_agent_action(
   1225         name_to_tool_map, color_mapping, agent_action, run_manager
   1226     )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\agents\agent.py:1246, in AgentExecutor._perform_agent_action(self, name_to_tool_map, color_mapping, agent_action, run_manager)
   1244         tool_run_kwargs["llm_prefix"] = ""
   1245     # We then call the tool on the tool input to get an observation
-> 1246     observation = tool.run(
   1247         agent_action.tool_input,
   1248         verbose=self.verbose,
   1249         color=color,
   1250         callbacks=run_manager.get_child() if run_manager else None,
   1251         **tool_run_kwargs,
   1252     )
   1253 else:
   1254     tool_run_kwargs = self.agent.tool_run_logging_kwargs()

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\tools.py:452, in BaseTool.run(self, tool_input, verbose, start_color, color, callbacks, tags, metadata, run_name, run_id, config, **kwargs)
    450 except (Exception, KeyboardInterrupt) as e:
    451     run_manager.on_tool_error(e)
--> 452     raise e
    453 else:
    454     run_manager.on_tool_end(observation, color=color, name=self.name, **kwargs)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\tools.py:409, in BaseTool.run(self, tool_input, verbose, start_color, color, callbacks, tags, metadata, run_name, run_id, config, **kwargs)
    406     parsed_input = self._parse_input(tool_input)
    407     tool_args, tool_kwargs = self._to_args_and_kwargs(parsed_input)
    408     observation = (
--> 409         context.run(
    410             self._run, *tool_args, run_manager=run_manager, **tool_kwargs
    411         )
    412         if new_arg_supported
    413         else context.run(self._run, *tool_args, **tool_kwargs)
    414     )
    415 except ValidationError as e:
    416     if not self.handle_validation_error:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\tools.py:627, in Tool._run(self, run_manager, *args, **kwargs)
    624 if self.func:
    625     new_argument_supported = signature(self.func).parameters.get("callbacks")
    626     return (
--> 627         self.func(
    628             *args,
    629             callbacks=run_manager.get_child() if run_manager else None,
    630             **kwargs,
    631         )
    632         if new_argument_supported
    633         else self.func(*args, **kwargs)
    634     )
    635 raise NotImplementedError("Tool does not support sync")

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:148, in deprecated.<locals>.deprecate.<locals>.warning_emitting_wrapper(*args, **kwargs)
    146     warned = True
    147     emit_warning()
--> 148 return wrapped(*args, **kwargs)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:600, in Chain.run(self, callbacks, tags, metadata, *args, **kwargs)
    598     if len(args) != 1:
    599         raise ValueError("`run` supports only one positional argument.")
--> 600     return self(args[0], callbacks=callbacks, tags=tags, metadata=metadata)[
    601         _output_key
    602     ]
    604 if kwargs and not args:
    605     return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[
    606         _output_key
    607     ]

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:148, in deprecated.<locals>.deprecate.<locals>.warning_emitting_wrapper(*args, **kwargs)
    146     warned = True
    147     emit_warning()
--> 148 return wrapped(*args, **kwargs)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:383, in Chain.__call__(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)
    351 """Execute the chain.
    352 
    353 Args:
   (...)
    374         `Chain.output_keys`.
    375 """
    376 config = {
    377     "callbacks": callbacks,
    378     "tags": tags,
    379     "metadata": metadata,
    380     "run_name": run_name,
    381 }
--> 383 return self.invoke(
    384     inputs,
    385     cast(RunnableConfig, {k: v for k, v in config.items() if v is not None}),
    386     return_only_outputs=return_only_outputs,
    387     include_run_info=include_run_info,
    388 )

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:166, in Chain.invoke(self, input, config, **kwargs)
    164 except BaseException as e:
    165     run_manager.on_chain_error(e)
--> 166     raise e
    167 run_manager.on_chain_end(outputs)
    169 if include_run_info:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\base.py:156, in Chain.invoke(self, input, config, **kwargs)
    153 try:
    154     self._validate_inputs(inputs)
    155     outputs = (
--> 156         self._call(inputs, run_manager=run_manager)
    157         if new_arg_supported
    158         else self._call(inputs)
    159     )
    161     final_outputs: Dict[str, Any] = self.prep_outputs(
    162         inputs, outputs, return_only_outputs
    163     )
    164 except BaseException as e:

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\llm_math\base.py:158, in LLMMathChain._call(self, inputs, run_manager)
    152 _run_manager.on_text(inputs[self.input_key])
    153 llm_output = self.llm_chain.predict(
    154     question=inputs[self.input_key],
    155     stop=["```output"],
    156     callbacks=_run_manager.get_child(),
    157 )
--> 158 return self._process_llm_result(llm_output, _run_manager)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\llm_math\base.py:112, in LLMMathChain._process_llm_result(self, llm_output, run_manager)
    110 if text_match:
    111     expression = text_match.group(1)
--> 112     output = self._evaluate_expression(expression)
    113     run_manager.on_text("\nAnswer: ", verbose=self.verbose)
    114     run_manager.on_text(output, color="yellow", verbose=self.verbose)

File D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain\chains\llm_math\base.py:96, in LLMMathChain._evaluate_expression(self, expression)
     88     output = str(
     89         numexpr.evaluate(
     90             expression.strip(),
   (...)
     93         )
     94     )
     95 except Exception as e:
---> 96     raise ValueError(
     97         f'LLMMathChain._evaluate("{expression}") raised error: {e}.'
     98         " Please try again with a valid numerical expression"
     99     )
    101 # Remove any leading and trailing brackets from the output
    102 return re.sub(r"^\[|\]$", "", output)

ValueError: LLMMathChain._evaluate("
"Tehran"
") raised error: data type must provide an itemsize. Please try again with a valid numerical expression


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.llms import OpenAI
from langchain.agents import Tool, initialize_agent, load_tools

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain


#==== Using OpenAI Chat API =======
llm_model = "gpt-3.5-turbo"
llm = OpenAI(temperature=0.2)


# Second Generic Tool
prompt = PromptTemplate(
    input_variables=["query"],
    template="{query}"
)

llm_chain = LLMChain(llm=llm, prompt=prompt)


# Initialize the LLM Tool
llm_tool = Tool(
    name="Language Model",
    func=llm_chain.run,
    description="Use this tool for general queries and logic"
)


tools = load_tools(
    ['llm-math'],
    llm=llm
)
tools.append(llm_tool) # adding the new tool to our tools list


#ReAct framework = Reasoning and Action
agent = initialize_agent(
    agent="zero-shot-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3 # to avoid high bills from the LLM
)
query = "What is the capital city of Iran?"
    
print(agent.agent.llm_chain.prompt.template)

Answer the following questions as best you can. You have access to the following tools:

Calculator(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Useful for when you need to answer questions about math.
Language Model(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Use this tool for general queries and logic

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Calculator, Language Model]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}


result = agent(query)
print(result['output'])


> Entering new AgentExecutor chain...
 I should use a language model to answer this question.
Action: Language Model
Action Input: "What is the capital city of Iran?"
Observation: 

The capital city of Iran is Tehran.
Thought: I now know the final answer.
Final Answer: Tehran

> Finished chain.
Tehran


query = """If I have $100.45, and give 20% of that to my brother and 10% to my 
 sister then receive 56.9 from my father, how much monery I will have at the end?"""
result = agent(query)
print(result['output'])


> Entering new AgentExecutor chain...
 I need to calculate the total amount of money I have and then subtract 20% and 10% from it, and then add 56.9 to the result.
Action: Calculator
Action Input: 100.45 - (20% of 100.45) - (10% of 100.45) + 56.9
Observation: Answer: 127.215
Thought: I now know the final answer
Final Answer: At the end, I will have $127.215.

> Finished chain.
At the end, I will have $127.215.


#ReAct framework = Reasoning and Action
agent = initialize_agent(
    agent="zero-shot-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3 # to avoid high bills from the LLM
)
query = "What is the capital city of Iran?"
    
# show the template used by our agent to represet what is going on under the hood
print(agent.agent.llm_chain.prompt.template)

Answer the following questions as best you can. You have access to the following tools:

Calculator(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Useful for when you need to answer questions about math.
Language Model(*args: Any, callbacks: Union[List[langchain_core.callbacks.base.BaseCallbackHandler], langchain_core.callbacks.base.BaseCallbackManager, NoneType] = None, tags: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Any - Use this tool for general queries and logic

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [Calculator, Language Model]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}


query = """who is donald trump?"""
result = agent(query)
print(result['output'])


> Entering new AgentExecutor chain...
 I should use a language model to answer this question.
Action: Language Model
Action Input: "Who is Donald Trump?"
Observation: 

Donald Trump is a businessman, television personality, and politician who served as the 45th President of the United States from 2017 to 2021. He was born on June 14, 1946 in New York City and grew up in Queens. Trump is known for his real estate empire, including the development of luxury properties such as Trump Tower in New York City. He also gained fame as the host of the reality TV show "The Apprentice." In 2016, Trump ran for president as the Republican nominee and won the election. During his presidency, he implemented policies on immigration, trade, and taxes, and faced numerous controversies and impeachment proceedings. He left office in January 2021 after losing the 2020 election to Joe Biden.
Thought: I now know the final answer.
Final Answer: Donald Trump is a businessman, television personality, and politician who served as the 45th President of the United States from 2017 to 2021.

> Finished chain.
Donald Trump is a businessman, television personality, and politician who served as the 45th President of the United States from 2017 to 2021.


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.llms import OpenAI
from langchain.agents import Tool, initialize_agent, load_tools

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory


#==== Using OpenAI Chat API =======
llm_model = "gpt-3.5-turbo"
llm = OpenAI(temperature=0.0)


# memory
memory = ConversationBufferMemory(memory_key="chat_history")

# Second Generic Tool
prompt = PromptTemplate(
    input_variables=["query"],
    template="{query}"
)

llm_chain = LLMChain(llm=llm, prompt=prompt)

# Initialize the LLM Tool
llm_tool = Tool(
    name="Language Model",
    func=llm_chain.run,
    description="Use this tool for general queries and logic"
)
 
tools = load_tools(
    ['llm-math'],
    llm=llm
)
tools.append(llm_tool) # adding the new tool to our tools list


# Conversational Agent
conversational_agent = initialize_agent(
    agent="conversational-react-description",
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    memory=memory
)
    
print(conversational_agent.agent.llm_chain.prompt.template)

Assistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.

Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.

TOOLS:
------

Assistant has access to the following tools:

> Calculator: Useful for when you need to answer questions about math.
> Language Model: Use this tool for general queries and logic

To use a tool, please use the following format:

```
Thought: Do I need to use a tool? Yes
Action: the action to take, should be one of [Calculator, Language Model]
Action Input: the input to the action
Observation: the result of the action
```

When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:

```
Thought: Do I need to use a tool? No
AI: [your response here]
```

Begin!

Previous conversation history:
{chat_history}

New input: {input}
{agent_scratchpad}


query_1 = "I was married at 2012"
query_2 = "I completed my phd 5 years after that"
query_3 = "At what year, I completed my PhD?"

result = conversational_agent(query_1)
results = conversational_agent(query_2)
results = conversational_agent(query_3)
print(result['output'])


> Entering new AgentExecutor chain...

Thought: Do I need to use a tool? Yes
Action: Calculator
Action Input: 2021 - 2012
Observation: Answer: 9
Thought: Do I need to use a tool? No
AI: Congratulations on your marriage! How has your relationship evolved since then?

> Finished chain.


> Entering new AgentExecutor chain...
Thought: Do I need to use a tool? Yes
Action: Language Model
Action Input: Can you tell me more about your PhD?
Observation: 

Sure, my PhD is in the field of psychology, specifically in the area of social psychology. My research focuses on the influence of social media on self-esteem and body image in young adults. I am interested in understanding how social media use affects individuals' perceptions of themselves and their bodies, and how this can impact their mental health and well-being.

To conduct my research, I have been using a combination of quantitative and qualitative methods, such as surveys, interviews, and content analysis. I have also been conducting experiments to test the causal relationship between social media use and self-esteem and body image.

My PhD journey has been both challenging and rewarding. I have had the opportunity to collaborate with other researchers, present my work at conferences, and publish my findings in academic journals. I have also had the chance to teach undergraduate courses and mentor students, which has been a fulfilling experience.

Overall, my goal with my PhD is to contribute to the understanding of the effects of social media on individuals' mental health and to provide evidence-based recommendations for promoting positive body image and self-esteem in the digital age.
Thought: Do I need to use a tool? No
AI: That's really interesting! It sounds like your research has important implications for our society today. How do you plan on using your findings to make a positive impact?

> Finished chain.


> Entering new AgentExecutor chain...
Thought: Do I need to use a tool? Yes
Action: Calculator
Action Input: 2012 + 5
Observation: Answer: 2017
Thought: Do I need to use a tool? No
AI: You completed your PhD in 2017. That's quite an accomplishment! What was your area of study?

> Finished chain.
Congratulations on your marriage! How has your relationship evolved since then?


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.llms import OpenAI
from langchain.agents import Tool, initialize_agent, load_tools


from langchain import Wikipedia
from langchain.agents.react.base import DocstoreExplorer

#==== Using OpenAI Chat API =======
llm_model = "gpt-3.5-turbo"
llm = OpenAI(temperature=0.0)


#pip install wikipedia


docstore = DocstoreExplorer(Wikipedia())
tools = [
    Tool(
        name="Search",
        func=docstore.search,
        description="search wikipedia"
    ),
    Tool(
        name="Lookup",
        func=docstore.lookup,
        description="lookup a term in wikipedia"
    )
]

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The class `DocstoreExplorer` was deprecated in LangChain 0.1.0 and will be removed in 0.3.0
  warn_deprecated(


# initialize our agent
docstore_agent = initialize_agent(
    tools,
    llm,
    agent="react-docstore",
    verbose=True,
    max_iterations=3
)

query = "Who is Ali Daei?"
result = docstore_agent.run(query)
# print(docstore_agent.agent.llm_chain.prompt.template)


> Entering new AgentExecutor chain...
Thought: I need to search Ali Daei and find out who he is.
Action: Search[Ali Daei]
Observation: Could not find [Ali Daei]. Similar: ['Ali Daei', 'Almoez Ali', "List of men's footballers with 50 or more international goals", 'Persepolis F.C.', 'List of international goals scored by Ali Daei', 'Ali Karimi', 'Saipa F.C.', 'Iran national football team', 'List of Iran national football team managers', 'Saba Qom F.C.']
Thought: Ali Daei is not a well-known person, so I need to search for more information about him.
Action: Search[Ali Daei football]
Observation: Ali Daei (Persian:  pronounced [ʔæliː dɑːjiː]; born 21 March 1969) is an Iranian football manager and former professional footballer. A striker, he was the captain of the Iranian national team between 2000 and 2006. He played in the German Bundesliga for Arminia Bielefeld, Bayern Munich and Hertha Berlin. He is regarded as one of the greatest Iranian footballers of all time as well as one of the greatest footballers from Asia.
A tall forward, Daei was a prolific goal-scorer, who was known for his heading accuracy and ability in the air. He was the world's top international goal-scorer with 108 goals until his record was broken by Cristiano Ronaldo in 2021 and went to third after being surpassed again by Lionel Messi in 2024. During his playing career, Daei was appointed a UNICEF Goodwill Ambassador in 2001. Following his retirement, Daei served as a member of the FIFA Football Committee between 2007 and 2013. In 2014, he was inducted into the Asian Football Hall of Fame.
Thought: Ali Daei is a retired Iranian football player, considered one of the greatest in Asia. 
Action: Finish[retired Iranian football player]

> Finished chain.


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.llms import OpenAI
from langchain.agents import Tool, initialize_agent, load_tools
from langchain import SerpAPIWrapper


os.environ["SERPAPI_API_KEY"] = "......"
SERP_API_KEY = os.getenv("SERPAPI_API_KEY") # must get the api key and add to .env go to https://serpapi.com/


# pip install google-search-results


#==== Using OpenAI Chat API =======
llm_model = "gpt-3.5-turbo"
llm = OpenAI(temperature=0.7)

search = SerpAPIWrapper(serpapi_api_key=SERP_API_KEY)

# tools
tools = [
    Tool(
        name="Intermediate Answer",
        func=search.run,
        description="google search"
    )
]


# initialize our agent
self_ask_with_search = initialize_agent(
    tools,
    llm,
    agent='self-ask-with-search',
    handle_parsing_errors=True,
    verbose=True
)

query = "What is largest ocean in the world?"
result = self_ask_with_search(query)


> Entering new AgentExecutor chain...
Could not parse output:  No.
Intermediate answer: Invalid or incomplete response
So the final answer is: Pacific Ocean 

> Finished chain.


from langchain.llms import OpenAI
from pypdf import PdfReader
import pandas as pd
import re
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_types import AgentType
import PyPDF2
import openai
import os
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
openai.api_key = os.getenv("OPENAI_API_KEY")


# get Info from PDF file
def pdf_text(pdf_doc):
    text=""  # make empty text
    pdf_reader = PyPDF2.PdfReader(pdf_doc)
    for page in pdf_reader.pages:  # read each page and convert to text
        text += page.extract_text()
    return text


openai.api_key = os.getenv("OPENAI_API_KEY") 

# get data from text of pdf
def extracted_data(pages_data):
    template = """Extract all the following values: "Previous balance", "Electricity", "Natural Gas", 
         "Water Treatment and Supply", "Wastewater Collection and Treatment", 
         "Stormwater Management", "Waste and Recycling", "Due Date" and "Total Amount Due".
         First read the text to find the key phrase.
         {pages}

        Expected output: dolloar sign should be removed
        {{"Due Date": "2024 March 05", "Total Amount Due": 4568, "Previous balance": 546, "Electricity": 124, "Natural Gas": 452, "Water Treatment and Supply": 456, "Wastewater Collection and Treatment": 145, "Stormwater Management": 12, "Waste and Recycling": 12}}
        Please notice "Due Date" comes after "If payment is received after".
        """
    prompt_template = PromptTemplate(input_variables=["pages"], template=template)
    llm = OpenAI(temperature=0.0)
    full_response = llm(prompt_template.format(pages=pages_data))
    
    full_response = full_response.replace('\n','')
    return full_response


# create documents from the uploaded pdfs

def create_docs(user_pdf_list):
    df = pd.DataFrame({"Due Date": pd.Series(dtype='str'),  
                   "Total Amount Due": pd.Series(dtype='str'),
                   "Previous balance": pd.Series(dtype='int'),
                   "Electricity": pd.Series(dtype='str'),
                   "Natural Gas": pd.Series(dtype='str'),
                   "Wastewater Collection and Treatment": pd.Series(dtype='str'),
                   "Stormwater Management": pd.Series(dtype='int'),
                   "Water Treatment and Supply": pd.Series(dtype='str'),
                   "Waste and Recycling": pd.Series(dtype='str')
                    })    
    ir = 1
    for filename in user_pdf_list:
        
        print(f"File {ir}: {filename}")
        ir+=1
        raw_data = pdf_text(filename)

        #key_phrase1 = "If payment "
        #key_phrase2 = "Free Outside Alberta:"
        llm_extracted_data = extracted_data(raw_data)

        pattern = r'{(.+)}' # capture one or more of any character, except newline
        match = re.search(pattern, llm_extracted_data, re.DOTALL)

        if match:
            extracted_text = match.group(1)
            # Converting the extracted text to a dictionary
            data_dict = eval('{' + extracted_text + '}')
        else:
            print("Nothing found.")

     
        df = pd.concat([df, pd.DataFrame([data_dict])], ignore_index=True)

        #df=df.append(save_to_dataframe(llm_extracted_data), ignore_index=True)

    return df


print("----------Load PDF files----------")
print("")
user_pdf_list=[
"BillExtractor/pdfs/2024_January.pdf",
"BillExtractor/pdfs/2024_February.pdf",
"BillExtractor/pdfs/2024_March.pdf",
]
df = create_docs(user_pdf_list)
print('\n')
print('Here is extracted information from the bills')
df

----------Load PDF files----------

File 1: BillExtractor/pdfs/2024_January.pdf

D:\Learning\MyWebsite\LangChain\vm_langchain\lib\site-packages\langchain_core\_api\deprecation.py:119: LangChainDeprecationWarning: The method `BaseLLM.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 0.3.0. Use invoke instead.
  warn_deprecated(

File 2: BillExtractor/pdfs/2024_February.pdf
File 3: BillExtractor/pdfs/2024_March.pdf


Here is extracted information from the bills


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader

# this library makes our life easier when it 
# comes to chatting within a library
from langchain.chains.question_answering import load_qa_chain 

load_dotenv(find_dotenv())
openai.api_key = os.getenv("OPENAI_API_KEY")


#==== Using OpenAI Chat API =======
llm_model = "gpt-3.5-turbo"
llm = ChatOpenAI(temperature=0.0, model=llm_model)

#### === packages to install ====
# pip install langchain pypdf openai chromadb tiktoken docx2txt

# load a pdf file
loader_pdf = PyPDFLoader('./MultiDocsChat/SampleResume.pdf')
docs = loader_pdf.load()

#set up question answering chain
chain = load_qa_chain(llm, verbose=True)
query = 'What is the name of person?'
response = chain.run(input_documents=docs,
                     question=query)

print(response)


> Entering new StuffDocumentsChain chain...


> Entering new LLMChain chain...
Prompt after formatting:
System: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
DANA LEE
Sales Representative
danalee@email.com (123) 456-7890 Provo, UT
LinkedIn
WORK EXPERIENCE
Sales Representative
Allied
November 2017 - current Provo, UT
Promoted the value of the customer loyalty program, leading to a
12% out-performance of sign-up targets
Performed in the top 5% of sales representatives in 2019 and
2020 in the intermountain region
Collaborated directly with potential clients, providing contract
estimates and building trust that resulted in 5-year customer
loyalty on average
Ensured customers received quality customer service, reducing
the likelihood of negative customer reviews by 80%
Attended 4 networking events annually, building relations with
15+ commercial business customers
Entry Level Sales Representative
Sparrow Electric
April 2014 - November 2017 Provo, UT
Cold called 30+ potential customers per shift, providing pertinent
information that resulted in a 20% sign-up rate
Maintained up-to-date knowledge on 150+ Sparrow Electric
products, promoting speciﬁc products to meet customer needs
Collaborated with 20+ sales representatives, ensuring all team
members felt equipped with information to succeed
Attended job training events, including courses, symposiums, and
workshops that improved job performance by 17%
Sales Associate
RMI Distributing
January 2012 - April 2014 Provo, UT
Established trust in customer relations, leading to a 19% increase
in customer retention
Collaborated on marketing projects to improve brand awareness
and increase inbound customer interest, resulting in $500K
revenue beyond targets
Overhauled lead generation techniques, increasing the number of
new customers by 8% per quarter
Understood customer needs, leading to a 40% close rateEDUCATION
High School Diploma
Provo High School
August 2006 - May 2010
Provo, UT
SKILLS
Product knowledge
Negotiation
Interpersonal communication
Attention to detail
Customer relationships
Cold-prospecting
Human: What is the name of person?

> Finished chain.

> Finished chain.
The name of the person is Dana Lee.


from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA

# load a pdf file
loader_pdf = PyPDFLoader('./MultiDocsChat/SampleResume.pdf')
docs = loader_pdf.load()


# Split the data into chunks
text_splitter = CharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)
docs_chunks = text_splitter.split_documents(docs)


# create our vector db chromadb
vectordb = Chroma.from_documents(
    documents=docs_chunks,
    embedding=OpenAIEmbeddings(),
    persist_directory='./data'
)
vectordb.persist()


# RetrievalQA chain to get info from the vectorstore
chain_qa = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(search_kwargs={'k':4}),
    return_source_documents=True
)

#result_qa = chain_qa('What is LEE SKILLS in bullet points?')
result_qa = chain_qa('Where LEE works from 2012 to 2013?')
print(result_qa['result'])

Dana Lee worked as a Sales Associate at RMI Distributing from January 2012 to April 2014.


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.chat_models import ChatOpenAI
import streamlit as st

import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader # to load pdf files
from langchain.document_loaders import Docx2txtLoader # to load word files
from langchain.document_loaders import TextLoader # to load text files
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
import streamlit as st

# this library makes our life easier when it 
# comes to chatting within a library
from langchain.chains.question_answering import load_qa_chain 
from streamlit_chat import message # pip install streamlit_chat

load_dotenv(find_dotenv())

openai.api_key = os.getenv("OPENAI_API_KEY") 


#==== Using OpenAI Chat API =======
llm_model = "gpt-3.5-turbo"
llm = ChatOpenAI(temperature=0.0, model=llm_model)

#### === packages to install ====
# pip install langchain pypdf openai chromadb tiktoken docx2txt

# load a pdf file

files = st.file_uploader("Please upload your files", accept_multiple_files=True,
                             type=["txt", "docx", "pdf"])

if files:
    documents = []
    if files is not None:
        for ifiles in files:
            if ifiles.name[-4:] == '.txt':
                loader = TextLoader(ifiles.name)
                documents.extend(loader.load())
            elif ifiles.name[-5:] == '.docx' or ifiles.name[-4:] == '.doc':
                loader = Docx2txtLoader(ifiles.name)
                documents.extend(loader.load())            
            elif ifiles.name[-4:] == '.pdf':
                loader = PyPDFLoader(ifiles.name)
                documents.extend(loader.load())
    
    # load files
    chat_history = []
    
    # split the data into chunks
    text_splitter = CharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=5
    )
    docs = text_splitter.split_documents(documents)
    
    # create vector db chromadb
    vectordb = Chroma.from_documents(
        documents=documents,
        embedding=OpenAIEmbeddings(),
        persist_directory='./MultiDocsChat/data'
    )
    vectordb.persist()
    
    chain_qa = ConversationalRetrievalChain.from_llm(
        llm,
        vectordb.as_retriever(search_kwargs={'k': 5}),
        return_source_documents=True,
        verbose=False
    )
    
    #-------- Streamlit front-end #--------
    st.title("QA Bot for Documents by Langchain")
    st.header("You can ask anything about your document... 🤖")
    
    if 'produced' not in st.session_state:
        st.session_state['produced'] = []
        
    if 'old' not in st.session_state:
        st.session_state['old'] = []
    
    
    # get the user input
    user_input = st.chat_input("Ask a question from your documents...")
    if user_input:
        result = chain_qa({'question': user_input, 'chat_history': chat_history})
        st.session_state.old.append(user_input)
        st.session_state.produced.append(result['answer'])
        
        
    if st.session_state['produced']:
        for i in range(len(st.session_state['produced'])):
            message(st.session_state['old'][i], is_user=True, key=str(i)+ '_user')
            message(st.session_state['produced'][i], key=str(i))


from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_loaders.merge import MergedDataLoader


import pandas as pd
pd.set_option('display.max_colwidth', None)


# List of files
csv_files = ['./question_answering/S08_question_answer_pairs.csv',
             './question_answering/S09_question_answer_pairs.csv',
             './question_answering/S10_question_answer_pairs.csv']

## Load and concatenate the DataFrames
#dataframes = [load_txt_file(file) for file in txt_files]
#merged_df = pd.concat(dataframes, ignore_index=True)
#
## Display the merged DataFrame
#merged_df


clmns = ['ArticleTitle', 'Question', 'Answer']
pd.read_csv(csv_files[0])
df = pd.concat(map(pd.read_csv, [csv_files[0], csv_files[1], csv_files[2]]))[clmns]


df.reset_index(drop=True, inplace=True)
df = df.drop_duplicates(
    subset=['ArticleTitle', 
            'Question'], 
    keep='first').reset_index(drop=True)


df.head()


questions_answers = []
for index, row in df.iterrows():
    txt = f"ArticleTitle: {row['ArticleTitle']}, Question: {row['Question']}, Answer: {row['Answer']}"
    questions_answers.append(txt+"\n")
questions_answers = ' '.join(questions_answers)


len(questions_answers)

303781


# Split the data into chunks
text_splitter = CharacterTextSplitter(separator="\n",
    chunk_size=800,
    chunk_overlap=400
)
qa_chunks = text_splitter.split_text(questions_answers)

Created a chunk of size 988, which is longer than the specified 800


qa_chunks[:2]

["ArticleTitle: Abraham_Lincoln, Question: Was Abraham Lincoln the sixteenth President of the United States?, Answer: yes\n ArticleTitle: Abraham_Lincoln, Question: Did Lincoln sign the National Banking Act of 1863?, Answer: yes\n ArticleTitle: Abraham_Lincoln, Question: Did his mother die of pneumonia?, Answer: no\n ArticleTitle: Abraham_Lincoln, Question: How many long was Lincoln's formal education?, Answer: 18 months\n ArticleTitle: Abraham_Lincoln, Question: When did Lincoln begin his political career?, Answer: 1832\n ArticleTitle: Abraham_Lincoln, Question: What did The Legal Tender Act of 1862 establish?, Answer: the United States Note, the first paper currency in United States history",
 "ArticleTitle: Abraham_Lincoln, Question: How many long was Lincoln's formal education?, Answer: 18 months\n ArticleTitle: Abraham_Lincoln, Question: When did Lincoln begin his political career?, Answer: 1832\n ArticleTitle: Abraham_Lincoln, Question: What did The Legal Tender Act of 1862 establish?, Answer: the United States Note, the first paper currency in United States history\n ArticleTitle: Abraham_Lincoln, Question: Who suggested Lincoln grow a beard?, Answer: 11-year-old Grace Bedell\n ArticleTitle: Abraham_Lincoln, Question: When did the Gettysburg address argue that America was born?, Answer: 1776\n ArticleTitle: Abraham_Lincoln, Question: Did Lincoln beat John C. Breckinridge in the 1860 election?, Answer: yes"]


from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_core.retrievers import BaseRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain.schema.document import Document
from typing import List


embedding = OpenAIEmbeddings()

persist_directory = "./chroma_qa"
# create our vector db chromadb
vectordb = Chroma.from_texts(
    texts=qa_chunks,
    embedding=embedding,
    persist_directory=persist_directory
)
vectordb.persist()


import sqlite3
import pandas as pd

persist_directory = "./chroma_qa/chroma.sqlite3"

# Create a SQL connection to our SQLite database
con = sqlite3.connect(persist_directory)

cur = con.cursor()

# Return all results of query
cur.execute('SELECT * FROM embedding_metadata limit 10')
#cur.fetchall()

<sqlite3.Cursor at 0x1bf154902d0>


## load the persisted db
vector_store = Chroma(persist_directory="./chroma_qa",
                      embedding_function=OpenAIEmbeddings())


# make a retriever
retriever_qa = vector_store.as_retriever(search_type="similarity", 
                                         search_kwargs={"k": 1})  # number of document to get is 4
docs = retriever_qa.get_relevant_documents("When was the Six Day War?")
print(retriever_qa.search_type)

similarity


llm_model = "gpt-3.5-turbo"
llm = ChatOpenAI(temperature=0.0, model=llm_model)


# RetrievalQA chain to get info from the vectorstore
chain_qa = RetrievalQA.from_chain_type(
    llm,
    chain_type="map_reduce",
    retriever=retriever_qa,
    verbose=True,
    return_source_documents=True,
)


#result_qa = chain_qa('What is LEE SKILLS in bullet points?')
result_qa = chain_qa("When was the Six Day War?")
print(result_qa['result'])


> Entering new RetrievalQA chain...

> Finished chain.
The Six-Day War took place from June 5 to June 10, 1967.


prompt_template = """
{question}
"""


# Define the PromptTemplate with the custom template
prompt = PromptTemplate(
    input_variables=["question"],  # The variable used inside the template
    template=prompt_template  # The custom template defined above
)

# Create a custom query with the template
query = "When was the Six Day War?"
formatted_query = prompt.format(question=query)


llm_response = chain_qa({'query': formatted_query})

print(llm_response['result'])


> Entering new RetrievalQA chain...

> Finished chain.
The Six-Day War took place from June 5 to June 10, 1967.


import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.chat_models import ChatOpenAI
import streamlit as st
import pandas as pd
import os
from dotenv import find_dotenv, load_dotenv
import openai
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader # to load pdf files
from langchain.chains import RetrievalQA
from langchain.document_loaders import Docx2txtLoader # to load word files
from langchain.document_loaders import TextLoader # to load text files
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
import streamlit as st
from langchain.prompts import PromptTemplate
import time

# this library makes our life easier when it 
# comes to chatting within a library
from langchain.chains.question_answering import load_qa_chain 
from streamlit_chat import message # pip install streamlit_chat

load_dotenv(find_dotenv())

openai.api_key = os.getenv("OPENAI_API_KEY") 


#==== Using OpenAI Chat API =======
llm_model = "gpt-3.5-turbo"
llm = ChatOpenAI(temperature=0.0, model=llm_model)

#### === packages to install ====
# pip install langchain pypdf openai chromadb tiktoken docx2txt

#-------- Streamlit front-end #--------
st.title("Document QA Bot powered by LangChain")
st.header("Feel free to ask any questions about your document... 🤖")


# Load CSV files
files = st.file_uploader("Please upload your files", accept_multiple_files=True, type=["csv"])

if files:
    dfs = []
    clmns = ['ArticleTitle', 'Question', 'Answer']
    
    # Read each uploaded CSV file and filter the required columns
    for file in files:
        df = pd.read_csv(file)
        if all(col in df.columns for col in clmns):
            dfs.append(df[clmns])
        else:
            st.warning(f"File {file.name} does not contain the required columns: {clmns}")

    # Concatenate all DataFrames
    if dfs:
        merged_df = pd.concat(dfs, ignore_index=True)
    else:
        st.warning("No valid files uploaded.")

    
    # Example function to simulate a time-consuming task
    def long_task():
        time.sleep(3)  # Simulate a 5-second task

    
    # Display spinner while running the long task
    with st.spinner("Please wait, processing..."):
        long_task()

    merged_df.reset_index(drop=True, inplace=True)
    merged_df = merged_df.drop_duplicates(
    subset=['ArticleTitle', 
            'Question'], 
    keep='first').reset_index(drop=True)
    
    st.write(merged_df.head())

    questions_answers = []
    for index, row in merged_df.iterrows():
        txt = f"ArticleTitle: {row['ArticleTitle']}, Question: {row['Question']}, Answer: {row['Answer']}"
        questions_answers.append(txt+"\n")
    questions_answers = ' '.join(questions_answers)

    # Split the data into chunks
    text_splitter = CharacterTextSplitter(separator="\n",
        chunk_size=800,
        chunk_overlap=400
    )
    qa_chunks = text_splitter.split_text(questions_answers)

    prompt_template = """
    {question}
    """

    # Define the PromptTemplate with the custom template
    prompt = PromptTemplate(
        input_variables=["question"],  # The variable used inside the template
        template=prompt_template  # The custom template defined above
    )

    embedding = OpenAIEmbeddings()
    
    # create our vector db chromadb
    vectordb = Chroma.from_texts(
        texts=qa_chunks,
        embedding=embedding,
        persist_directory='./chroma_qa'
    )
    vectordb.persist()
    
    # Display spinner while running the long task
    with st.spinner("Please wait, processing..."):
        long_task()

    # RetrievalQA chain to get info from the vectorstore
    chain_qa = RetrievalQA.from_chain_type(
        llm,
        retriever=vectordb.as_retriever(search_kwargs={'k':2}),
        return_source_documents=True,
        verbose=True
    )
    # load files
    chat_history = []

    if 'produced' not in st.session_state:
        st.session_state['produced'] = []
        
    if 'old' not in st.session_state:
        st.session_state['old'] = []

    # get the user input
    user_input = st.chat_input("Ask a question from your documents...")
    formatted_query = prompt.format(question=user_input)
    if user_input:
        result = chain_qa({'query': formatted_query, 'chat_history': chat_history})
        st.session_state.old.append(formatted_query)
        st.session_state.produced.append(result['result'])
        
        
    if st.session_state['produced']:
        for i in range(len(st.session_state['produced'])):
            message(st.session_state['old'][i], is_user=True, key=str(i)+ '_user')
            message(st.session_state['produced'][i], key=str(i))

    # Display spinner while running the long task
    with st.spinner("Please wait, processing..."):
        long_task()

	Due Date	Total Amount Due	Previous balance	Electricity	Natural Gas	Wastewater Collection and Treatment	Stormwater Management	Water Treatment and Supply	Waste and Recycling
0	2024 February 12	362.97	355.22	101.48	113.66	54.95	17.71	40.25	24.17
1	2024 March 11	463.31	362.97	102.19	224.10	44.77	14.59	33.11	28.23
2	2024 April 08	361.63	463.31	104.71	131.92	42.58	15.11	30.63	24.84

Table of Contents

Introduction¶

How it works¶

LongChain Building Blocks¶

LangChain Language Model¶

Chat Model¶

Prompt Template¶

Parsers¶

Output Parsers¶

Pydantic Output Parser¶

Memory¶

Chains¶

Simple Chain¶

Sequential Chain¶

using `prompt | model`¶

Application with Streamlit ¶

Router Chains¶

Document Loading¶

Document Splitting¶

`CharacterTextSplitter`¶

`RecursiveCharacterTextSplitter`¶

Vectorstore & Embeddings¶

Hands on¶

Similarity Search¶

Saving Embeddings to Chroma DB¶

Retrieval Augmented Generation (RAG)¶

Agents¶

Math Agent¶

Adding General Knowledge Tool for Agent¶

Agents Types¶

`zero-shot-react-description`¶

`conversational-react-description`¶

`react-docstore (docstore)`¶

Self-Ask Agent (google-search)¶

Real-World Use Cases¶

Bill Extractor¶

Multi-Doc-Chatbot¶

Streamlit App: Full Multi-Document Chatbot¶

Question answering¶

Streamlit App¶

	ArticleTitle	Question	Answer
0	Abraham_Lincoln	Was Abraham Lincoln the sixteenth President of the United States?	yes
1	Abraham_Lincoln	Did Lincoln sign the National Banking Act of 1863?	yes
2	Abraham_Lincoln	Did his mother die of pneumonia?	no
3	Abraham_Lincoln	How many long was Lincoln's formal education?	18 months
4	Abraham_Lincoln	When did Lincoln begin his political career?	1832

Table of Contents

Introduction¶

How it works¶

LongChain Building Blocks¶

LangChain Language Model¶

Chat Model¶

Prompt Template¶

Parsers¶

Output Parsers¶

Pydantic Output Parser¶

Memory¶

Chains¶

Simple Chain¶

Sequential Chain¶

using prompt | model¶

Application with Streamlit¶

Router Chains¶

Document Loading¶

Document Splitting¶

CharacterTextSplitter¶

RecursiveCharacterTextSplitter¶

Vectorstore & Embeddings¶

Hands on¶

Similarity Search¶

Saving Embeddings to Chroma DB¶

Retrieval Augmented Generation (RAG)¶

Agents¶

Math Agent¶

Adding General Knowledge Tool for Agent¶

Agents Types¶

zero-shot-react-description¶

conversational-react-description¶

react-docstore (docstore)¶

Self-Ask Agent (google-search)¶

Real-World Use Cases¶

Bill Extractor¶

Multi-Doc-Chatbot¶

Streamlit App: Full Multi-Document Chatbot¶

Question answering¶

Streamlit App¶

using `prompt | model`¶

Application with Streamlit ¶

`CharacterTextSplitter`¶

`RecursiveCharacterTextSplitter`¶

`zero-shot-react-description`¶

`conversational-react-description`¶

`react-docstore (docstore)`¶