diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 000000000..26d33521a
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/caches/deviceStreaming.xml b/.idea/caches/deviceStreaming.xml
new file mode 100644
index 000000000..17b82fc87
--- /dev/null
+++ b/.idea/caches/deviceStreaming.xml
@@ -0,0 +1,860 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/genai-stack.iml b/.idea/genai-stack.iml
new file mode 100644
index 000000000..d6ebd4805
--- /dev/null
+++ b/.idea/genai-stack.iml
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 000000000..6e8667213
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 000000000..7ba5e54d4
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 000000000..35eb1ddfb
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/api.py b/api.py
index b7e8c9969..d05b7d00d 100644
--- a/api.py
+++ b/api.py
@@ -1,6 +1,6 @@
import os
-from langchain_community.graphs import Neo4jGraph
+from langchain_neo4j import Neo4jGraph
from dotenv import load_dotenv
from utils import (
create_vector_index,
@@ -128,10 +128,7 @@ def qstream(question: Question = Depends()):
q = Queue()
def cb():
- output_function(
- {"question": question.text, "chat_history": []},
- callbacks=[QueueCallback(q)],
- )
+ output_function.invoke(question.text, config={"callbacks": [QueueCallback(q)]})
def generate():
yield json.dumps({"init": True, "model": llm_name})
@@ -146,9 +143,7 @@ async def ask(question: Question = Depends()):
output_function = llm_chain
if question.rag:
output_function = rag_chain
- result = output_function(
- {"question": question.text, "chat_history": []}, callbacks=[]
- )
+ result = output_function.invoke(question.text)
return {"result": result["answer"], "model": llm_name}
diff --git a/bot.py b/bot.py
index 970a1a446..4f3cf7fe5 100644
--- a/bot.py
+++ b/bot.py
@@ -3,7 +3,7 @@
import streamlit as st
from streamlit.logger import get_logger
from langchain.callbacks.base import BaseCallbackHandler
-from langchain_community.graphs import Neo4jGraph
+from langchain_neo4j import Neo4jGraph
from dotenv import load_dotenv
from utils import (
create_vector_index,
@@ -92,10 +92,10 @@ def chat_input():
with st.chat_message("assistant"):
st.caption(f"RAG: {name}")
stream_handler = StreamHandler(st.empty())
- result = output_function(
- {"question": user_input, "chat_history": []}, callbacks=[stream_handler]
- )["answer"]
- output = result
+ output = output_function.invoke(
+ user_input, config={"callbacks": [stream_handler]}
+ )
+
st.session_state[f"user_input"].append(user_input)
st.session_state[f"generated"].append(output)
st.session_state[f"rag_mode"].append(name)
diff --git a/chains.py b/chains.py
index 138ced667..584988224 100644
--- a/chains.py
+++ b/chains.py
@@ -1,4 +1,3 @@
-
from langchain_openai import OpenAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_aws import BedrockEmbeddings
@@ -8,21 +7,30 @@
from langchain_ollama import ChatOllama
from langchain_aws import ChatBedrock
-from langchain_community.vectorstores import Neo4jVector
+from langchain_neo4j import Neo4jVector
-from langchain.chains import RetrievalQAWithSourcesChain
-from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+from langchain_core.runnables import RunnableParallel, RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
- SystemMessagePromptTemplate
+ SystemMessagePromptTemplate,
)
from typing import List, Any
-from utils import BaseLogger, extract_title_and_question
+from utils import BaseLogger, extract_title_and_question, format_docs
from langchain_google_genai import GoogleGenerativeAIEmbeddings
+AWS_MODELS = (
+ "ai21.jamba-instruct-v1:0",
+ "amazon.titan",
+ "anthropic.claude",
+ "cohere.command",
+ "meta.llama",
+ "mistral.mi",
+)
+
def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config={}):
if embedding_model_name == "ollama":
@@ -39,10 +47,8 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config=
embeddings = BedrockEmbeddings()
dimension = 1536
logger.info("Embedding: Using AWS")
- elif embedding_model_name == "google-genai-embedding-001":
- embeddings = GoogleGenerativeAIEmbeddings(
- model="models/embedding-001"
- )
+ elif embedding_model_name == "google-genai-embedding-001":
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
dimension = 768
logger.info("Embedding: Using Google Generative AI Embeddings")
else:
@@ -55,9 +61,9 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config=
def load_llm(llm_name: str, logger=BaseLogger(), config={}):
- if llm_name == "gpt-4":
+ if llm_name in ["gpt-4", "gpt-4o", "gpt-4-turbo"]:
logger.info("LLM: Using GPT-4")
- return ChatOpenAI(temperature=0, model_name="gpt-4", streaming=True)
+ return ChatOpenAI(temperature=0, model_name=llm_name, streaming=True)
elif llm_name == "gpt-3.5":
logger.info("LLM: Using GPT-3.5")
return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True)
@@ -68,6 +74,14 @@ def load_llm(llm_name: str, logger=BaseLogger(), config={}):
model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024},
streaming=True,
)
+ elif llm_name.startswith(AWS_MODELS):
+ logger.info(f"LLM: {llm_name}")
+ return ChatBedrock(
+ model_id=llm_name,
+ model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024},
+ streaming=True,
+ )
+
elif len(llm_name):
logger.info(f"LLM: Using Ollama: {llm_name}")
return ChatOllama(
@@ -96,17 +110,8 @@ def configure_llm_only_chain(llm):
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
-
- def generate_llm_output(
- user_input: str, callbacks: List[Any], prompt=chat_prompt
- ) -> str:
- chain = prompt | llm
- answer = chain.invoke(
- {"question": user_input}, config={"callbacks": callbacks}
- ).content
- return {"answer": answer}
-
- return generate_llm_output
+ chain = chat_prompt | llm | StrOutputParser()
+ return chain
def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, password):
@@ -136,12 +141,6 @@ def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, pass
]
qa_prompt = ChatPromptTemplate.from_messages(messages)
- qa_chain = load_qa_with_sources_chain(
- llm,
- chain_type="stuff",
- prompt=qa_prompt,
- )
-
# Vector + Knowledge Graph response
kg = Neo4jVector.from_existing_index(
embedding=embeddings,
@@ -167,12 +166,16 @@ def configure_qa_rag_chain(llm, embeddings, embeddings_store_url, username, pass
ORDER BY similarity ASC // so that best answers are the last
""",
)
-
- kg_qa = RetrievalQAWithSourcesChain(
- combine_documents_chain=qa_chain,
- retriever=kg.as_retriever(search_kwargs={"k": 2}),
- reduce_k_below_max_tokens=False,
- max_tokens_limit=3375,
+ kg_qa = (
+ RunnableParallel(
+ {
+ "summaries": kg.as_retriever(search_kwargs={"k": 2}) | format_docs,
+ "question": RunnablePassthrough(),
+ }
+ )
+ | qa_prompt
+ | llm
+ | StrOutputParser()
)
return kg_qa
diff --git a/docker-compose.yml b/docker-compose.yml
index 7dacfd59c..3a1bbc084 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -31,7 +31,7 @@ services:
database:
user: neo4j:neo4j
- image: neo4j:5.23
+ image: neo4j:5.26
ports:
- 7687:7687
- 7474:7474
diff --git a/env.example b/env.example
index 88e33cc33..7d9574f38 100644
--- a/env.example
+++ b/env.example
@@ -1,7 +1,7 @@
#*****************************************************************
# LLM and Embedding Model
#*****************************************************************
-LLM=llama2 #or any Ollama model tag, gpt-4, gpt-3.5, or claudev2
+LLM=llama2 #or any Ollama model tag, gpt-4 (o or turbo), gpt-3.5, or any bedrock model
EMBEDDING_MODEL=sentence_transformer #or google-genai-embedding-001 openai, ollama, or aws
#*****************************************************************
diff --git a/front-end/.vscode/extensions.json b/front-end/.vscode/extensions.json
index bdef82015..9de591fce 100644
--- a/front-end/.vscode/extensions.json
+++ b/front-end/.vscode/extensions.json
@@ -1,3 +1,5 @@
+
{
+
"recommendations": ["svelte.svelte-vscode"]
}
diff --git a/loader.py b/loader.py
index a02027996..cf06a7367 100644
--- a/loader.py
+++ b/loader.py
@@ -1,7 +1,7 @@
import os
import requests
from dotenv import load_dotenv
-from langchain_community.graphs import Neo4jGraph
+from langchain_neo4j import Neo4jGraph
import streamlit as st
from streamlit.logger import get_logger
from chains import load_embedding_model
@@ -15,8 +15,6 @@
password = os.getenv("NEO4J_PASSWORD")
ollama_base_url = os.getenv("OLLAMA_BASE_URL")
embedding_model_name = os.getenv("EMBEDDING_MODEL")
-# Remapping for Langchain Neo4j integration
-os.environ["NEO4J_URL"] = url
logger = get_logger(__name__)
diff --git a/pdf_bot.py b/pdf_bot.py
index fb8f4d461..96acee2cf 100644
--- a/pdf_bot.py
+++ b/pdf_bot.py
@@ -1,16 +1,19 @@
import os
import streamlit as st
-from langchain.chains import RetrievalQA
from PyPDF2 import PdfReader
from langchain.callbacks.base import BaseCallbackHandler
from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Neo4jVector
+from langchain.prompts import ChatPromptTemplate
+from langchain_neo4j import Neo4jVector
from streamlit.logger import get_logger
from chains import (
load_embedding_model,
load_llm,
)
+from langchain_core.runnables import RunnableParallel, RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+from utils import format_docs
# load api key lib
from dotenv import load_dotenv
@@ -67,6 +70,14 @@ def main():
)
chunks = text_splitter.split_text(text=text)
+ qa_prompt = ChatPromptTemplate.from_messages(
+ [
+ (
+ "human",
+ "Based on the provided summary: {summaries} \n Answer the following question:{question}",
+ )
+ ]
+ )
# Store the chunks part in db (vector)
vectorstore = Neo4jVector.from_texts(
@@ -79,8 +90,17 @@ def main():
node_label="PdfBotChunk",
pre_delete_collection=True, # Delete existing PDF data
)
- qa = RetrievalQA.from_chain_type(
- llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever()
+ qa = (
+ RunnableParallel(
+ {
+ "summaries": vectorstore.as_retriever(search_kwargs={"k": 2})
+ | format_docs,
+ "question": RunnablePassthrough(),
+ }
+ )
+ | qa_prompt
+ | llm
+ | StrOutputParser()
)
# Accept user questions/query
@@ -88,7 +108,7 @@ def main():
if query:
stream_handler = StreamHandler(st.empty())
- qa.run(query, callbacks=[stream_handler])
+ qa.invoke(query, {"callbacks": [stream_handler]})
if __name__ == "__main__":
diff --git a/pull_model.Dockerfile b/pull_model.Dockerfile
index e59398f75..b06625f7d 100644
--- a/pull_model.Dockerfile
+++ b/pull_model.Dockerfile
@@ -15,7 +15,15 @@ COPY < None:
driver.query(index_query)
except: # Already exists
pass
- index_query = "CREATE VECTOR INDEX top_answers IF NOT EXISTS FOR (m:Answer) ON m.embedding"
+ index_query = (
+ "CREATE VECTOR INDEX top_answers IF NOT EXISTS FOR (m:Answer) ON m.embedding"
+ )
try:
driver.query(index_query)
except: # Already exists
@@ -52,3 +54,7 @@ def create_constraints(driver):
driver.query(
"CREATE CONSTRAINT tag_name IF NOT EXISTS FOR (t:Tag) REQUIRE (t.name) IS UNIQUE"
)
+
+
+def format_docs(docs):
+ return "\n\n".join(doc.page_content for doc in docs)