
    bh                        d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dl mZ d dlmZ d d	lmZ d d
lmZ d dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlmZ d dlmZ  e         ej8                  d      ej:                  d<   d Zd Zd Z d Z!e"dk(  rm ejF                  d      Z$e$jK                  ddd       e$jK                  ddd       e$jM                         Z'e'jP                  Z(e'jR                  Z) e!e(e)       yy)    )
ChatOpenAI)QAGenerationChain)TokenTextSplitter)Document)PyPDFLoader)PromptTemplate)OpenAIEmbeddings)Chroma)load_summarize_chain)RetrievalQAN)	PdfReader)datetime)load_dotenvOPENAI_API_KEYc                     	 t        |       }t        |j                        S # t        $ r}t	        d|       Y d }~y d }~ww xY w)NzError:)r   lenpages	Exceptionprint)pdf_pathpdfes      ./var/www/html/eduruby.in/utils/qa_generator.pycount_pdf_pagesr      s<    !399~ hs   " 	A<Ac                    | j                  d      d   }|dk(  r4t        |       }|j                         }d}|D ]  }||j                  z  } n@|dk(  r;t	        | d      5 }t        j                  |      }d d d        dj                        }t        dd	d
      }|j                        }|D 	cg c]  }	t        |	       }
}	t        ddd      }|j                  |
      }|
|fS # 1 sw Y   uxY wc c}	w )N.r    jsonr gpt-3.5-turboi'     )
model_name
chunk_sizechunk_overlap)page_contenti  d   )splitr   loadr'   openr   joinr   
split_textr   split_documents)	file_path	file_typeloaderdataquestion_genpagefilesplitter_ques_genchunks_ques_gentdocument_ques_gensplitter_ans_gendocument_answer_gens                r   file_processingr<   &   s    ooc"2&I%Y'{{} 	,D
$++
+,	,	V		3 4yyXXd^l)$ (22<@O;JKaq1KK($ +:: 1113  Ls   C)1C5)C2c                    t        |       \  }}t        dd      }d}t        |dg      }d}t        ddg|	      }t        |d
d||      }|j	                  |      }	t               }
t        j                  ||
      }t        dd      }|	j                  d      }|D cg c]'  }|j                  d      s|j                  d      s&|) }}t        j                  |d|j                               }||fS c c}w )Ng333333?r"   )temperaturemodela  
    You are an expert at creating questions based on coding materials and documentation.
    Your goal is to prepare a coder or programmer for their exam and coding tests.
    You do this by asking questions about the text below:

    ------------
    {text}
    ------------

    Create questions that will prepare the coders or programmers for their tests.
    Make sure not to lose any important information.

    QUESTIONS:
    text)templateinput_variablesa;  
    You are an expert at creating practice questions based onstudy material and documentation.
    Your goal is to help a student prepare for a test.
    We have received some practice questions to a certain extent: {existing_answer}.
    We have the option to refine the existing questions or add new ones.
    (only if necessary) with some more context below.
    ------------
    {text}
    ------------

    Given the new context, refine the original questions in English.
    If the context is not helpful, please provide the original questions.
    QUESTIONS:
    existing_answer)rB   rA   refineF)llm
chain_typeverbosequestion_promptrefine_promptg?
?r   stuff)rE   rF   	retriever)r<   r   r   r   runr	   r
   from_documentsr)   endswithr   from_chain_typeas_retriever)r/   r9   r;   llm_ques_gen_pipelineprompt_templatePROMPT_QUESTIONSrefine_templateREFINE_PROMPT_QUESTIONSques_gen_chainques
embeddingsvector_storellm_answer_gen	ques_listelementfiltered_ques_listanswer_generation_chains                    r   llm_pipelinera   L   s(   -<Y-G**&
O &QWPXYO  -*F3 
 *0E9A6;<L:Q	SN /0D!#J(()<jILCGN

4 I1:mgg>N>Ns>SW^WgWghkWl'mm)99n;B:F:S:S:UW #$666 ns    'C8C8c                 @   t        |       \  }}|}g }|D ]I  }t        d|       |j                  |      }t        d|       t        d       |j                  ||g       K t	        |dd      5 }t        j                  ||dd	       d d d        |S # 1 sw Y   |S xY w)
Nz
Question: zAnswer: z4--------------------------------------------------

wzutf-8)encoding   F)indentensure_ascii)ra   r   rN   extendr+   r   dump)	r/   output_pathr`   r]   base_folderqa_dataquestionanswerjsonfiles	            r   qa_mainrp      s    )5i)@&YK G 
lH%(,,X6j&!FG 	
 	
 
k3	1 CX		'8AEBC C s   /BB__main__z3StudyBuddy app for generating questions and answers)descriptionz--file_pathTzPaths of the files)requiredhelpz--output_pathzOutput path)*langchain_openair   langchain.chainsr   langchain.text_splitterr   %langchain_community.docstore.documentr   $langchain_community.document_loadersr   langchain.promptsr   r	    langchain_community.vectorstoresr
   langchain.chains.summarizer   r   osr   timePyPDF2r   csvargparser   dotenvr   getenvenvironr   r<   ra   rp   __name__ArgumentParserparseradd_argument
parse_argsargsr/   rj        r   <module>r      s   ' . 5 : < , - 3 ; ( 	    
     
  )ryy)9:

 
$2LD7l> z#H##0efF
  
    D~~I!!KI{## r   