Spaces:

unstructuredio
/

chat-your-data-isw

Running

App Files Files Community

Matt Robinson commited on Feb 10, 2023

Commit

01095ae

1 Parent(s): fe39603

chat isw app files

Browse files

Files changed (8) hide show

LICENSE +21 -0
README.md +17 -13
app.py +103 -0
cli_app.py +17 -0
ingest_data.py +30 -0
query_data.py +34 -0
requirements.txt +6 -0
vectorstore.pkl +0 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2023 Harrison Chase
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,17 @@
----
-title: Chat Your Data Isw
-emoji: 📚
-colorFrom: indigo
-colorTo: red
-sdk: gradio
-sdk_version: 3.18.0
-app_file: app.py
-pinned: false
-license: apache-2.0
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Chat-Your-Data
+Create a ChatGPT like experience over your custom docs using [LangChain](https://github.com/hwchase17/langchain).
+See [this blog post](https://blog.langchain.dev/tutorial-chatgpt-over-your-data/) for a more detailed explanation.
+## Ingest data
+Ingestion of data is done over the `state_of_the_union.txt` file.
+Therefore, the only thing that is needed is to be done to ingest data is run `python ingest_data.py`
+## Query data
+Custom prompts are used to ground the answers in the state of the union text file.
+## Running the Application
+By running `python app.py` from the command line you can easily interact with your ChatGPT over your own data.

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import os
+from typing import Optional, Tuple
+import gradio as gr
+import pickle
+from query_data import get_chain
+from threading import Lock
+with open("vectorstore.pkl", "rb") as f:
+    vectorstore = pickle.load(f)
+def set_openai_api_key(api_key: str):
+    """Set the api key and return chain.
+    If no api_key, then None is returned.
+    """
+    if api_key:
+        os.environ["OPENAI_API_KEY"] = api_key
+        chain = get_chain(vectorstore)
+        os.environ["OPENAI_API_KEY"] = ""
+        return chain
+class ChatWrapper:
+    def __init__(self):
+        self.lock = Lock()
+    def __call__(
+        self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain
+    ):
+        """Execute the chat functionality."""
+        self.lock.acquire()
+        try:
+            history = history or []
+            # If chain is None, that is because no API key was provided.
+            if chain is None:
+                history.append((inp, "Please paste your OpenAI key to use"))
+                return history, history
+            # Set OpenAI key
+            import openai
+            openai.api_key = api_key
+            # Run chain and append input.
+            output = chain({"question": inp, "chat_history": history})["answer"]
+            history.append((inp, output))
+        except Exception as e:
+            raise e
+        finally:
+            self.lock.release()
+        return history, history
+chat = ChatWrapper()
+block = gr.Blocks(css=".gradio-container {background-color: lightgray}")
+with block:
+    with gr.Row():
+        gr.Markdown("<h3><center>Chat-Your-Data (ISW Updates)</center></h3>")
+        openai_api_key_textbox = gr.Textbox(
+            placeholder="Paste your OpenAI API key (sk-...)",
+            show_label=False,
+            lines=1,
+            type="password",
+        )
+    chatbot = gr.Chatbot()
+    with gr.Row():
+        message = gr.Textbox(
+            label="What's your question?",
+            placeholder="Ask questions about the war in Ukraine",
+            lines=1,
+        )
+        submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
+    gr.Examples(
+        examples=[
+            "What is the focus of the Russian offensive?",
+            "Where are the frontlines?",
+            "How are they consolidating power?",
+        ],
+        inputs=message,
+    )
+    gr.HTML("Demo application of a LangChain chain.")
+    gr.HTML("""<center>
+            Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a>
+            and <a href='https://github.com/unstructured-io/unstructured'>Unstructured.IO</a>
+        </center>""")
+    state = gr.State()
+    agent_state = gr.State()
+    submit.click(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
+    message.submit(chat, inputs=[openai_api_key_textbox, message, state, agent_state], outputs=[chatbot, state])
+    openai_api_key_textbox.change(
+        set_openai_api_key,
+        inputs=[openai_api_key_textbox],
+        outputs=[agent_state],
+    )
+block.launch(debug=True)

cli_app.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import pickle
+from query_data import get_chain
+if __name__ == "__main__":
+    with open("vectorstore.pkl", "rb") as f:
+        vectorstore = pickle.load(f)
+    qa_chain = get_chain(vectorstore)
+    chat_history = []
+    print("Chat with your docs!")
+    while True:
+        print("Human:")
+        question = input()
+        result = qa_chain({"question": question, "chat_history": chat_history})
+        chat_history.append((question, result["answer"]))
+        print("AI:")
+        print(result["answer"])

ingest_data.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import UnstructuredURLLoader
+from langchain.vectorstores.faiss import FAISS
+from langchain.embeddings import OpenAIEmbeddings
+import pickle
+# Load Data
+urls = [
+    "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-4-2023",
+    "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-6-2023",
+    "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-7-2023",
+    "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023",
+    "https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-9-2023",
+]
+loader = UnstructuredURLLoader(urls=urls)
+raw_documents = loader.load()
+# Split text
+text_splitter = RecursiveCharacterTextSplitter()
+documents = text_splitter.split_documents(raw_documents)
+# Load Data to vectorstore
+embeddings = OpenAIEmbeddings()
+vectorstore = FAISS.from_documents(documents, embeddings)
+# Save vectorstore
+with open("vectorstore.pkl", "wb") as f:
+    pickle.dump(vectorstore, f)

query_data.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from langchain.prompts.prompt import PromptTemplate
+from langchain.llms import OpenAI
+from langchain.chains import ChatVectorDBChain
+_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
+You can assume the question about the war in Ukraine.
+Chat History:
+{chat_history}
+Follow Up Input: {question}
+Standalone question:"""
+CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+template = """You are an AI assistant for answering questions about the war in Ukraine.
+You are given the following extracted parts of a long document and a question. Provide a conversational answer.
+If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
+If the question is not about the war in Ukraine, politely inform them that you are tuned to only answer questions about the war in Ukraine.
+Question: {question}
+=========
+{context}
+=========
+Answer in Markdown:"""
+QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
+def get_chain(vectorstore):
+    llm = OpenAI(temperature=0)
+    qa_chain = ChatVectorDBChain.from_llm(
+        llm,
+        vectorstore,
+        qa_prompt=QA_PROMPT,
+        condense_question_prompt=CONDENSE_QUESTION_PROMPT,
+    )
+    return qa_chain

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+beautifulsoup4
+langchain
+openai
+unstructured>=0.4.7
+faiss-cpu
+gradio

vectorstore.pkl ADDED Viewed

Binary file (499 kB). View file