diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 000000000..26d33521a --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/caches/deviceStreaming.xml b/.idea/caches/deviceStreaming.xml new file mode 100644 index 000000000..17b82fc87 --- /dev/null +++ b/.idea/caches/deviceStreaming.xml @@ -0,0 +1,860 @@ + + + + + + \ No newline at end of file diff --git a/.idea/genai-stack.iml b/.idea/genai-stack.iml new file mode 100644 index 000000000..d6ebd4805 --- /dev/null +++ b/.idea/genai-stack.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 000000000..6e8667213 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 000000000..7ba5e54d4 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 000000000..35eb1ddfb --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/api.py b/api.py index b7e8c9969..d05b7d00d 100644 --- a/api.py +++ b/api.py @@ -1,6 +1,6 @@ import os -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from dotenv import load_dotenv from utils import ( create_vector_index, @@ -128,10 +128,7 @@ def qstream(question: Question = Depends()): q = Queue() def cb(): - output_function( - {"question": question.text, "chat_history": []}, - callbacks=[QueueCallback(q)], - ) + output_function.invoke(question.text, config={"callbacks": [QueueCallback(q)]}) def generate(): yield json.dumps({"init": True, "model": llm_name}) @@ -146,9 +143,7 @@ async def ask(question: Question = Depends()): output_function = llm_chain if question.rag: output_function = rag_chain - result = output_function( - {"question": question.text, "chat_history": []}, callbacks=[] - ) + result = output_function.invoke(question.text) return {"result": result["answer"], "model": llm_name} diff --git a/bot.py b/bot.py index 970a1a446..4f3cf7fe5 100644 --- a/bot.py +++ b/bot.py @@ -3,7 +3,7 @@ import streamlit as st from streamlit.logger import get_logger from langchain.callbacks.base import BaseCallbackHandler -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph from dotenv import load_dotenv from utils import ( create_vector_index, @@ -92,10 +92,10 @@ def chat_input(): with st.chat_message("assistant"): st.caption(f"RAG: {name}") stream_handler = StreamHandler(st.empty()) - result = output_function( - {"question": user_input, "chat_history": []}, callbacks=[stream_handler] - )["answer"] - output = result + output = output_function.invoke( + user_input, config={"callbacks": [stream_handler]} + ) + st.session_state[f"user_input"].append(user_input) st.session_state[f"generated"].append(output) st.session_state[f"rag_mode"].append(name) diff --git a/chains.py b/chains.py index 138ced667..584988224 100644 --- a/chains.py +++ b/chains.py @@ -1,4 +1,3 @@ - from langchain_openai import OpenAIEmbeddings from langchain_ollama import OllamaEmbeddings from langchain_aws import BedrockEmbeddings @@ -8,21 +7,30 @@ from langchain_ollama import ChatOllama from langchain_aws import ChatBedrock -from langchain_community.vectorstores import Neo4jVector +from langchain_neo4j import Neo4jVector -from langchain.chains import RetrievalQAWithSourcesChain -from langchain.chains.qa_with_sources import load_qa_with_sources_chain +from langchain_core.runnables import RunnableParallel, RunnablePassthrough +from langchain_core.output_parsers import StrOutputParser from langchain.prompts import ( ChatPromptTemplate, HumanMessagePromptTemplate, - SystemMessagePromptTemplate + SystemMessagePromptTemplate, ) from typing import List, Any -from utils import BaseLogger, extract_title_and_question +from utils import BaseLogger, extract_title_and_question, format_docs from langchain_google_genai import GoogleGenerativeAIEmbeddings +AWS_MODELS = ( + "ai21.jamba-instruct-v1:0", + "amazon.titan", + "anthropic.claude", + "cohere.command", + "meta.llama", + "mistral.mi", +) + def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config={}): if embedding_model_name == "ollama": @@ -39,10 +47,8 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config= embeddings = BedrockEmbeddings() dimension = 1536 logger.info("Embedding: Using AWS") - elif embedding_model_name == "google-genai-embedding-001": - embeddings = GoogleGenerativeAIEmbeddings( - model="models/embedding-001" - ) + elif embedding_model_name == "google-genai-embedding-001": + embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") dimension = 768 logger.info("Embedding: Using Google Generative AI Embeddings") else: @@ -55,9 +61,9 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config= def load_llm(llm_name: str, logger=BaseLogger(), config={}): - if llm_name == "gpt-4": + if llm_name in ["gpt-4", "gpt-4o", "gpt-4-turbo"]: logger.info("LLM: Using GPT-4") - return ChatOpenAI(temperature=0, model_name="gpt-4", streaming=True) + return ChatOpenAI(temperature=0, model_name=llm_name, streaming=True) elif llm_name == "gpt-3.5": logger.info("LLM: Using GPT-3.5") return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True) @@ -68,6 +74,14 @@ def load_llm(llm_name: str, logger=BaseLogger(), config={}): model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024}, streaming=True, ) + elif llm_name.startswith(AWS_MODELS): + logger.info(f"LLM: {llm_name}") + return ChatBedrock( + model_id=llm_name, + model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024}, + streaming=True, + ) + elif len(llm_name): logger.info(f"LLM: Using Ollama: {llm_name}") return ChatOllama( @@ -96,17 +110,8 @@ def configure_llm_only_chain(llm): chat_prompt = ChatPromptTemplate.from_messages( [system_message_prompt, human_message_prompt] ) - - def generate_llm_output( - user_input: str, callbacks: List[Any], prompt=chat_prompt - ) -> str: - chain = prompt | llm - answer = chain.invoke( - {"question": user_input}, config={"callbacks": callbacks} - ).content - return {"answer": answer} - - return generate_llm_output + chain = chat_prompt | llm | StrOutputParser() + return chain def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, password): @@ -136,12 +141,6 @@ def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, pass ] qa_prompt = ChatPromptTemplate.from_messages(messages) - qa_chain = load_qa_with_sources_chain( - llm, - chain_type="stuff", - prompt=qa_prompt, - ) - # Vector + Knowledge Graph response kg = Neo4jVector.from_existing_index( embedding=embeddings, @@ -167,12 +166,16 @@ def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, pass ORDER BY similarity ASC // so that best answers are the last """, ) - - kg_qa = RetrievalQAWithSourcesChain( - combine_documents_chain=qa_chain, - retriever=kg.as_retriever(search_kwargs={"k": 2}), - reduce_k_below_max_tokens=False, - max_tokens_limit=3375, + kg_qa = ( + RunnableParallel( + { + "summaries": kg.as_retriever(search_kwargs={"k": 2}) | format_docs, + "question": RunnablePassthrough(), + } + ) + | qa_prompt + | llm + | StrOutputParser() ) return kg_qa diff --git a/docker-compose.yml b/docker-compose.yml index 7dacfd59c..3a1bbc084 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,7 +31,7 @@ services: database: user: neo4j:neo4j - image: neo4j:5.23 + image: neo4j:5.26 ports: - 7687:7687 - 7474:7474 diff --git a/env.example b/env.example index 88e33cc33..7d9574f38 100644 --- a/env.example +++ b/env.example @@ -1,7 +1,7 @@ #***************************************************************** # LLM and Embedding Model #***************************************************************** -LLM=llama2 #or any Ollama model tag, gpt-4, gpt-3.5, or claudev2 +LLM=llama2 #or any Ollama model tag, gpt-4 (o or turbo), gpt-3.5, or any bedrock model EMBEDDING_MODEL=sentence_transformer #or google-genai-embedding-001 openai, ollama, or aws #***************************************************************** diff --git a/front-end/.vscode/extensions.json b/front-end/.vscode/extensions.json index bdef82015..9de591fce 100644 --- a/front-end/.vscode/extensions.json +++ b/front-end/.vscode/extensions.json @@ -1,3 +1,5 @@ + { + "recommendations": ["svelte.svelte-vscode"] } diff --git a/loader.py b/loader.py index a02027996..cf06a7367 100644 --- a/loader.py +++ b/loader.py @@ -1,7 +1,7 @@ import os import requests from dotenv import load_dotenv -from langchain_community.graphs import Neo4jGraph +from langchain_neo4j import Neo4jGraph import streamlit as st from streamlit.logger import get_logger from chains import load_embedding_model @@ -15,8 +15,6 @@ password = os.getenv("NEO4J_PASSWORD") ollama_base_url = os.getenv("OLLAMA_BASE_URL") embedding_model_name = os.getenv("EMBEDDING_MODEL") -# Remapping for Langchain Neo4j integration -os.environ["NEO4J_URL"] = url logger = get_logger(__name__) diff --git a/pdf_bot.py b/pdf_bot.py index fb8f4d461..96acee2cf 100644 --- a/pdf_bot.py +++ b/pdf_bot.py @@ -1,16 +1,19 @@ import os import streamlit as st -from langchain.chains import RetrievalQA from PyPDF2 import PdfReader from langchain.callbacks.base import BaseCallbackHandler from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import Neo4jVector +from langchain.prompts import ChatPromptTemplate +from langchain_neo4j import Neo4jVector from streamlit.logger import get_logger from chains import ( load_embedding_model, load_llm, ) +from langchain_core.runnables import RunnableParallel, RunnablePassthrough +from langchain_core.output_parsers import StrOutputParser +from utils import format_docs # load api key lib from dotenv import load_dotenv @@ -67,6 +70,14 @@ def main(): ) chunks = text_splitter.split_text(text=text) + qa_prompt = ChatPromptTemplate.from_messages( + [ + ( + "human", + "Based on the provided summary: {summaries} \n Answer the following question:{question}", + ) + ] + ) # Store the chunks part in db (vector) vectorstore = Neo4jVector.from_texts( @@ -79,8 +90,17 @@ def main(): node_label="PdfBotChunk", pre_delete_collection=True, # Delete existing PDF data ) - qa = RetrievalQA.from_chain_type( - llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever() + qa = ( + RunnableParallel( + { + "summaries": vectorstore.as_retriever(search_kwargs={"k": 2}) + | format_docs, + "question": RunnablePassthrough(), + } + ) + | qa_prompt + | llm + | StrOutputParser() ) # Accept user questions/query @@ -88,7 +108,7 @@ def main(): if query: stream_handler = StreamHandler(st.empty()) - qa.run(query, callbacks=[stream_handler]) + qa.invoke(query, {"callbacks": [stream_handler]}) if __name__ == "__main__": diff --git a/pull_model.Dockerfile b/pull_model.Dockerfile index e59398f75..b06625f7d 100644 --- a/pull_model.Dockerfile +++ b/pull_model.Dockerfile @@ -15,7 +15,15 @@ COPY < None: driver.query(index_query) except: # Already exists pass - index_query = "CREATE VECTOR INDEX top_answers IF NOT EXISTS FOR (m:Answer) ON m.embedding" + index_query = ( + "CREATE VECTOR INDEX top_answers IF NOT EXISTS FOR (m:Answer) ON m.embedding" + ) try: driver.query(index_query) except: # Already exists @@ -52,3 +54,7 @@ def create_constraints(driver): driver.query( "CREATE CONSTRAINT tag_name IF NOT EXISTS FOR (t:Tag) REQUIRE (t.name) IS UNIQUE" ) + + +def format_docs(docs): + return "\n\n".join(doc.page_content for doc in docs)