from collections import defaultdict, deque
from dotenv import load_dotenv


from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate

# --------------------------------------------------
# VECTOR STORE
# --------------------------------------------------
load_dotenv()
VECTOR_DIR = "vector_store/tracwater_faiss"
EMBEDDING_MODEL = "text-embedding-3-large"

embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)

vectordb = FAISS.load_local(
    VECTOR_DIR,
    embeddings,
    allow_dangerous_deserialization=True
)

# --------------------------------------------------
# LLM
# --------------------------------------------------

llm = ChatOpenAI(
    model="gpt-4.1-mini",
    temperature=0.2
)

# --------------------------------------------------
# SESSION MEMORY (IN-MEMORY)
# --------------------------------------------------

MAX_TURNS = 14
SESSION_MEMORY = defaultdict(lambda: deque(maxlen=MAX_TURNS))

# --------------------------------------------------
# RETRIEVAL CONFIG
# --------------------------------------------------

SIMILARITY_THRESHOLD = 25
TOP_K = 6

# --------------------------------------------------
# PROMPT
# --------------------------------------------------

# PROMPT = ChatPromptTemplate.from_template("""
# You are an expert assistant answering questions using ONLY the provided context.
                                          

# Conversation history:
# {history}

# Context:
# {context}

# Rules:
# - You are allowed to anwer and greet  people politely.
# - Use ONLY the context.
# - If the answer is not present, say:
#   "Sorry no information for this query in TracWater sources. Kindly rephrase or ask another question."
# - Be clear, professional, and concise.

# Question:
# {question}
# """)


PROMPT = ChatPromptTemplate.from_template("""
You are a domain-specific expert assistant representing TracWater.

You must answer all user questions using ONLY the information provided in the context below and speak strictly in the first person plural (e.g., "we", "our", "us") when referring to TracWater, its products, or services.

Conversation History:
{history}

Context:
{context}

Mandatory Rules:
- ALWAYS refer to TracWater using first-person language ("we", "our", "us").
- NEVER use third-person references such as "they", "the company", or "TracWater provides".
- Do NOT use any external knowledge, assumptions, or hallucinations.
- Base your response strictly on the provided context.
- If the required information is not present in the context, respond EXACTLY with:
  "Sorry, no information for this query is available in TracWater sources. Kindly rephrase or ask another question."
- Keep responses clear, professional, and concise.

User Question:
{question}
""")



# --------------------------------------------------
# QUERY FUNCTION
# --------------------------------------------------

def query_search(q: str, session_id: str):
    """
    Perform session-aware RAG query with relevance threshold.
    """

    # 1. Retrieve chunks with similarity scores
    results = vectordb.similarity_search_with_score(q, k=TOP_K)

    filtered = [
        (doc, score)
        for doc, score in results
        if score < SIMILARITY_THRESHOLD
    ]

    if not filtered:
        return {
            "query": q,
            "answer": "Information not available in the provided sources.",
            # "sources": []
        }

    # 2. Build context
    docs = [doc for doc, _ in filtered]

    context = "\n\n".join(
        f"[Source: {d.metadata['source_url']}]\n{d.page_content}"
        for d in docs
    )

    # 3. Build session history
    history = "\n".join(
        f"User: {turn['q']}\nAssistant: {turn['a']}"
        for turn in SESSION_MEMORY[session_id]
    )

    # 4. Generate answer
    response = llm.invoke(
        PROMPT.format(
            history=history or "None",
            context=context,
            question=q
        )
    )

    answer = response.content

    # 5. Update memory
    SESSION_MEMORY[session_id].append({
        "q": q,
        "a": answer
    })

    # # 6. Collect sources
    # sources = list({
    #     d.metadata["source_url"]
    #     for d in docs
    # })

    return {
        "query": q,
        "answer": answer,
        # "sources": sources
        # "result":results,
         
    }


# print(query_search("What wireless sensors does TracWater offer?", "test_session_1"))