from langchain_openai import ChatOpenAI
from langchain.chains import QAGenerationChain
from langchain.text_splitter import TokenTextSplitter
from langchain_community.docstore.document import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import RetrievalQA
import os
import json
import time
from PyPDF2 import PdfReader
import csv
import json
import argparse
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()




os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# Set file path
# file_path = 'SDG.pdf'

def count_pdf_pages(pdf_path):
    try:
        pdf = PdfReader(pdf_path)
        return len(pdf.pages)
    except Exception as e:
        print("Error:", e)
        return None

def file_processing(file_path):

    # Load data from PDF
    file_type=file_path.split('.')[-1]
    if file_type=='pdf':
        loader = PyPDFLoader(file_path)
        data = loader.load()
        question_gen=""
        for page in data:
          question_gen += page.page_content
    elif file_type =="json":
      with open(file_path, 'r') as file:
        data = json.load(file)
      question_gen = " ".join(data)

    splitter_ques_gen = TokenTextSplitter(
        model_name = 'gpt-3.5-turbo',
        chunk_size = 10000,
        chunk_overlap = 200
    )

    chunks_ques_gen = splitter_ques_gen.split_text(question_gen)

    document_ques_gen = [Document(page_content=t) for t in chunks_ques_gen]

    splitter_ans_gen = TokenTextSplitter(
        model_name = 'gpt-3.5-turbo',
        chunk_size = 1000,
        chunk_overlap = 100
    )


    document_answer_gen = splitter_ans_gen.split_documents(
        document_ques_gen
    )

    return document_ques_gen, document_answer_gen

def llm_pipeline(file_path):

    document_ques_gen, document_answer_gen = file_processing(file_path)

    llm_ques_gen_pipeline = ChatOpenAI(
        temperature = 0.3,
        model = "gpt-3.5-turbo"
    )

    prompt_template = """
    You are an expert at creating questions based on coding materials and documentation.
    Your goal is to prepare a coder or programmer for their exam and coding tests.
    You do this by asking questions about the text below:

    ------------
    {text}
    ------------

    Create questions that will prepare the coders or programmers for their tests.
    Make sure not to lose any important information.

    QUESTIONS:
    """

    PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=["text"])

    refine_template = ("""
    You are an expert at creating practice questions based onstudy material and documentation.
    Your goal is to help a student prepare for a test.
    We have received some practice questions to a certain extent: {existing_answer}.
    We have the option to refine the existing questions or add new ones.
    (only if necessary) with some more context below.
    ------------
    {text}
    ------------

    Given the new context, refine the original questions in English.
    If the context is not helpful, please provide the original questions.
    QUESTIONS:
    """
    )

    REFINE_PROMPT_QUESTIONS = PromptTemplate(
        input_variables=["existing_answer", "text"],
        template=refine_template,
    )

    ques_gen_chain = load_summarize_chain(llm = llm_ques_gen_pipeline,
                                            chain_type = "refine",
                                            verbose = False,
                                            question_prompt=PROMPT_QUESTIONS,
                                            refine_prompt=REFINE_PROMPT_QUESTIONS)

    ques = ques_gen_chain.run(document_ques_gen)

    embeddings = OpenAIEmbeddings()

    vector_store = Chroma.from_documents(document_answer_gen, embeddings)

    llm_answer_gen = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo")

    ques_list = ques.split("\n")
    filtered_ques_list = [element for element in ques_list if element.endswith('?') or element.endswith('.')]

    answer_generation_chain = RetrievalQA.from_chain_type(llm=llm_answer_gen,
                                                chain_type="stuff",
                                                retriever=vector_store.as_retriever())

    return answer_generation_chain, filtered_ques_list




# Answer each question and save to a file
# for question in question_list:
#     print("Question: ", question)
#     answer = answer_gen_chain.run(question)
#     print("Answer: ", answer)
#     print("--------------------------------------------------\\n\\n")
#     # Save answer to file
#     with open("answers.txt", "a") as f:
#         f.write("Question: " + question + "\\n")
#         f.write("Answer: " + answer + "\\n")
#         f.write("--------------------------------------------------\\n\\n")


def qa_main(file_path, output_path):
    answer_generation_chain, ques_list = llm_pipeline(file_path)
    base_folder = output_path  # Folder named after the user ID

    

    # Create a file name with current datetime
    #current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    #output_file = output_path

    # List to hold the questions and answers
    qa_data = []

    for question in ques_list:
        print("Question: ", question)
        answer = answer_generation_chain.run(question)
        print("Answer: ", answer)
        print("--------------------------------------------------\n\n")

        # Add the question and answer as a dictionary to the list
        qa_data.extend([
            question,
            answer
        ])

    # Write the questions and answers to the JSON file
    with open(base_folder, "w", encoding="utf-8") as jsonfile:
        json.dump(qa_data, jsonfile, indent=4, ensure_ascii=False)

    return base_folder

if __name__ == "__main__":
    parser= argparse.ArgumentParser(description="StudyBuddy app for generating questions and answers")
    parser.add_argument(
      "--file_path",
      required=True,
      help="Paths of the files"
    )
    parser.add_argument(
      "--output_path",
      required=True,
      help="Output path"
    )

    args = parser.parse_args()
    file_path= args.file_path
    output_path= args.output_path

    qa_main(file_path, output_path)