import json
from pathlib import Path
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from dotenv import load_dotenv
# --------------------------------------------------
# CONFIGURATION
# --------------------------------------------------
load_dotenv()
CHUNKS_FILE = "chunked_data/tracwater_chunks.json"
VECTOR_DIR = Path("vector_store/tracwater_faiss")
VECTOR_DIR.mkdir(parents=True, exist_ok=True)

EMBEDDING_MODEL = "text-embedding-3-large"

# --------------------------------------------------
# LOAD CHUNKS
# --------------------------------------------------

with open(CHUNKS_FILE, "r", encoding="utf-8") as f:
    chunks = json.load(f)

print(f"Loaded {len(chunks)} chunks")

# --------------------------------------------------
# CONVERT TO LANGCHAIN DOCUMENTS
# --------------------------------------------------

documents = []

for chunk in chunks:
    documents.append(
        Document(
            page_content=chunk["text"],
            metadata={
                "chunk_id": chunk["chunk_id"],
                "source_url": chunk["metadata"]["source_url"],
                "source_type": chunk["metadata"]["source_type"],
                "chunk_index": chunk["metadata"]["chunk_index"],
                "total_chunks": chunk["metadata"]["total_chunks"]
            }
        )
    )

# --------------------------------------------------
# CREATE EMBEDDINGS
# --------------------------------------------------

embeddings = OpenAIEmbeddings(model=EMBEDDING_MODEL)

vectordb = FAISS.from_documents(documents, embeddings)

# --------------------------------------------------
# SAVE VECTOR STORE
# --------------------------------------------------

vectordb.save_local(str(VECTOR_DIR))

print("----------------------------------")
print("Embeddings created successfully")
print(f"Vector store saved at: {VECTOR_DIR}")
print("----------------------------------")