LangChain¶
In [ ]:
Copied!
!pip install -q -U ragatouille
!pip install -q langchain
!pip install -q langchain-openai
!pip install -q langchain-core
!pip install -q langchain-community
!pip install -q pypdf
!pip install -q -U ragatouille
!pip install -q langchain
!pip install -q langchain-openai
!pip install -q langchain-core
!pip install -q langchain-community
!pip install -q pypdf
In [ ]:
Copied!
from ragatouille import RAGPretrainedModel
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
from ragatouille import RAGPretrainedModel
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
In [ ]:
Copied!
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("Orca_paper.pdf")
pages = loader.load_and_split()
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("Orca_paper.pdf")
pages = loader.load_and_split()
In [ ]:
Copied!
len(pages)
len(pages)
In [ ]:
Copied!
full_document = ""
for page in pages:
full_document += page.page_content
full_document = ""
for page in pages:
full_document += page.page_content
In [ ]:
Copied!
print(full_document)
print(full_document)
In [ ]:
Copied!
type(full_document)
type(full_document)
In [ ]:
Copied!
RAG.index(
collection=[full_document],
index_name="orca_paper",
max_document_length=512,
split_documents=True,
)
RAG.index(
collection=[full_document],
index_name="orca_paper",
max_document_length=512,
split_documents=True,
)
Do Retrieval¶
In [ ]:
Copied!
results = RAG.search(query="What is instruction tuning?", k=3)
results = RAG.search(query="What is instruction tuning?", k=3)
In [ ]:
Copied!
results
results
Use as LangChain Retriever¶
In [ ]:
Copied!
retriever = RAG.as_langchain_retriever(k=3)
retriever = RAG.as_langchain_retriever(k=3)
In [ ]:
Copied!
retriever.invoke("What is instruction tuning?")
retriever.invoke("What is instruction tuning?")
In [ ]:
Copied!
Create a Chain¶
In [ ]:
Copied!
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('openai')
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('openai')
In [ ]:
Copied!
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
prompt = ChatPromptTemplate.from_template(
"""Answer the following question based only on the provided context:
<context>
{context}
</context>
Question: {input}"""
)
llm = ChatOpenAI()
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
prompt = ChatPromptTemplate.from_template(
"""Answer the following question based only on the provided context:
{context}
Question: {input}"""
)
llm = ChatOpenAI()
document_chain = create_stuff_documents_chain(llm, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)
In [ ]:
Copied!
retrieval_chain.invoke({"input": "What is instruction tuning?"})
retrieval_chain.invoke({"input": "What is instruction tuning?"})
In [ ]:
Copied!
response = retrieval_chain.invoke({"input": "What is instruction tuning?"})
response = retrieval_chain.invoke({"input": "What is instruction tuning?"})
In [ ]:
Copied!
response["answer"]
response["answer"]
In [ ]:
Copied!
Llama-Index¶
In [ ]:
Copied!
!pip install -q llama-index
!pip install -q llama-hub
!pip install -q llama-index-core
!pip install -q llama-index-llms-openai
!pip install -q llama-index
!pip install -q llama-hub
!pip install -q llama-index-core
!pip install -q llama-index-llms-openai
In [ ]:
Copied!
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
reader = SimpleDirectoryReader(input_files=["Orca_paper.pdf"])
docs = reader.load_data()
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
reader = SimpleDirectoryReader(input_files=["Orca_paper.pdf"])
docs = reader.load_data()
In [ ]:
Copied!
# docs
# docs
In [ ]:
Copied!
from llama_index.core.llama_pack import download_llama_pack
# download and install dependencies
RAGatouilleRetrieverPack = download_llama_pack(
"RAGatouilleRetrieverPack", "./ragatouille_pack"
)
from llama_index.core.llama_pack import download_llama_pack
# download and install dependencies
RAGatouilleRetrieverPack = download_llama_pack(
"RAGatouilleRetrieverPack", "./ragatouille_pack"
)
In [ ]:
Copied!
from llama_index.llms.openai import OpenAI
from llama_index.llms.openai import OpenAI
In [ ]:
Copied!
# create the pack
ragatouille_pack = RAGatouilleRetrieverPack(
docs, # List[Document]
llm=OpenAI(model="gpt-3.5-turbo"),
index_name="orca_paper",
top_k=5,
)
# create the pack
ragatouille_pack = RAGatouilleRetrieverPack(
docs, # List[Document]
llm=OpenAI(model="gpt-3.5-turbo"),
index_name="orca_paper",
top_k=5,
)
In [ ]:
Copied!
response = ragatouille_pack.run("What is instruction tuning? ")
response = ragatouille_pack.run("What is instruction tuning? ")
In [ ]:
Copied!
response
response
In [ ]:
Copied!
print(response)
print(response)
In [ ]:
Copied!