Spaces:
Paused
Paused
| import chainlit as cl | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.document_loaders import BSHTMLLoader | |
| from langchain.embeddings import CacheBackedEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.chains import RetrievalQA | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.storage import LocalFileStore | |
| from langchain.prompts.chat import ( | |
| ChatPromptTemplate, | |
| SystemMessagePromptTemplate, | |
| HumanMessagePromptTemplate, | |
| ) | |
| import chainlit as cl | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
| system_template = """ | |
| Use the following pieces of context to answer the users question. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| ALWAYS return a "SOURCES" part in your answer. | |
| The "SOURCES" part should be a reference to the source of the document from which you got your answer. | |
| Example of your response should be: | |
| ``` | |
| The answer is foo | |
| SOURCES: xyz | |
| ``` | |
| Begin! | |
| ---------------- | |
| {context}""" | |
| messages = [ | |
| SystemMessagePromptTemplate.from_template(system_template), | |
| HumanMessagePromptTemplate.from_template("{question}"), | |
| ] | |
| prompt = ChatPromptTemplate(messages=messages) | |
| chain_type_kwargs = {"prompt": prompt} | |
| def rename(orig_author: str): | |
| rename_dict = {"RetrievalQA": "the Rubilab Records", | |
| "Chatbot" : "RubiChat"} | |
| return rename_dict.get(orig_author, orig_author) | |
| async def init(): | |
| msg = cl.Message(content=f"Building Index...") | |
| await msg.send() | |
| # build FAISS index from csv | |
| loader = BSHTMLLoader(file_path="./data/Rubilabs.html") | |
| data = loader.load() | |
| documents = text_splitter.transform_documents(data) | |
| store = LocalFileStore("./cache/") | |
| core_embeddings_model = OpenAIEmbeddings() | |
| embedder = CacheBackedEmbeddings.from_bytes_store( | |
| core_embeddings_model, store, namespace=core_embeddings_model.model | |
| ) | |
| # make async docsearch | |
| docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder) | |
| chain = RetrievalQA.from_chain_type( | |
| ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True), | |
| chain_type="stuff", | |
| return_source_documents=True, | |
| retriever=docsearch.as_retriever(), | |
| chain_type_kwargs=chain_type_kwargs, | |
| ) | |
| msg.content = f"Index built!" | |
| await msg.send() | |
| cl.user_session.set("chain", chain) | |
| async def main(message): | |
| chain = cl.user_session.get("chain") | |
| cb = cl.AsyncLangchainCallbackHandler( | |
| stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"] | |
| ) | |
| cb.answer_reached = True | |
| res = await chain.acall(message, callbacks=[cb]) | |
| answer = res["result"] | |
| if cb.has_streamed_final_answer: | |
| await cb.final_stream.update() | |
| else: | |
| await cl.Message(content=answer).send() | |