
    ]h:                        d dl mZ d dlZd dlZd dlZd dlmZmZ d dlZ	d dl
Z
d dlZd dlmZmZ d dlmZ d dlmZ erd dl
mZ d dlmZ  ej,                  e      Z G d	 d
e      Zy)    )annotationsN)TYPE_CHECKINGCallable)average_precision_score
ndcg_score)SentenceEvaluator)cos_sim)Tensor)SentenceTransformerc            	           e Zd ZdZdddedddddf		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 	 	 	 	 dd	Zdd
ZddZddZ		 	 d	 	 	 	 	 	 	 	 	 ddZ
d Z xZS )RerankingEvaluatoraD  
    This class evaluates a SentenceTransformer model for the task of re-ranking.

    Given a query and a list of documents, it computes the score [query, doc_i] for all possible
    documents and sorts them in decreasing order. Then, MRR@10, NDCG@10 and MAP is compute to measure the quality of the ranking.

    Args:
        samples (list): A list of dictionaries, where each dictionary represents a sample and has the following keys:

            - 'query': The search query.
            - 'positive': A list of positive (relevant) documents.
            - 'negative': A list of negative (irrelevant) documents.
        at_k (int, optional): Only consider the top k most similar documents to each query for the evaluation. Defaults to 10.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        similarity_fct (Callable[[torch.Tensor, torch.Tensor], torch.Tensor], optional): Similarity function between sentence embeddings. By default, cosine similarity. Defaults to cos_sim.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 64.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        use_batched_encoding (bool, optional): Whether or not to encode queries and documents in batches for greater speed, or 1-by-1 to save memory. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation dimension. Defaults to None.
        mrr_at_k (Optional[int], optional): Deprecated parameter. Please use `at_k` instead. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import RerankingEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer("all-MiniLM-L6-v2")

            # Load a dataset with queries, positives, and negatives
            eval_dataset = load_dataset("microsoft/ms_marco", "v1.1", split="validation")

            samples = [
                {
                    "query": sample["query"],
                    "positive": [text for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) if is_selected],
                    "negative": [text for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) if not is_selected],
                }
                for sample in eval_dataset
            ]

            # Initialize the evaluator
            reranking_evaluator = RerankingEvaluator(
                samples=samples,
                name="ms-marco-dev",
            )
            results = reranking_evaluator(model)
            '''
            RerankingEvaluator: Evaluating the model on the ms-marco-dev dataset:
            Queries: 9706      Positives: Min 1.0, Mean 1.1, Max 5.0   Negatives: Min 1.0, Mean 7.1, Max 9.0
            MAP: 56.07
            MRR@10: 56.70
            NDCG@10: 67.08
            '''
            print(reranking_evaluator.primary_metric)
            # => ms-marco-dev_ndcg@10
            print(results[reranking_evaluator.primary_metric])
            # => 0.6708042171399308
    
    T@   FNc                   t         |           || _        || _        |
!t        j                  d|
 d       |
| _        n|| _        || _        || _        || _	        || _
        |	| _        t        | j                  t              r(t        | j                  j                               | _        | j                  D cg c](  }t!        |d         dkD  st!        |d         dkD  s'|* c}| _        d|rd|z   ndz   d	| j                   d
z   | _        dddd| j                   d| j                   g| _        || _        d| j                   | _        y c c}w )Nz?The `mrr_at_k` parameter has been deprecated; please use `at_k=z
` instead.positiver   negativer   _r   z
_results_@z.csvepochstepsMAPMRR@NDCG@ndcg@)super__init__samplesnameloggerwarningat_ksimilarity_fct
batch_sizeshow_progress_baruse_batched_encodingtruncate_dim
isinstancedictlistvalueslencsv_filecsv_headers	write_csvprimary_metric)selfr   r!   r   r.   r"   r#   r$   r%   r&   mrr_at_ksample	__class__s               q/var/www/html/eduruby.in/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/RerankingEvaluator.pyr   zRerankingEvaluator.__init__Y   sZ    		NN\]e\ffpqr DIDI,$!2$8!(dllD) 3 3 56DL "&
VJ5G1H11LQTU[\fUgQhklQlF
 -dd
KPZ[_[d[dZeeiNjj499+DII;
 # %dii[1
s   ;EE$Ec                V   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    d	z  }t        j                  d
| j                   d| d       | j	                  |      }|d   }|d   }|d   }	| j
                  D 
cg c]  }
t        |
d          }}
| j
                  D 
cg c]  }
t        |
d          }}
t        j                  dt        | j
                         dt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      d       t        j                  d|dz  d       t        j                  d| j                   d|dz  d       t        j                  d| j                   d|	dz  d       || j                  rt        j                  |d       t        j                  j!                  || j"                        }t        j                  j%                  |      }t'        |d|rd nd!d"#      5 }t)        j*                  |      }|s|j-                  | j.                         |j-                  |||||	g       ddd       d|d$| j                   |d%| j                   |	i}| j1                  || j                        }| j3                  ||||       |S c c}
w c c}
w # 1 sw Y   gxY w)&a  
        Evaluates the model on the dataset and returns the evaluation metrics.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to evaluate.
            output_path (str, optional): The output path to write the results. Defaults to None.
            epoch (int, optional): The current epoch number. Defaults to -1.
            steps (int, optional): The current step number. Defaults to -1.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        z after epoch z
 in epoch z after z stepsr   Nz (truncated to )z0RerankingEvaluator: Evaluating the model on the z dataset:mapmrrndcgr   r   z	Queries: z 	 Positives: Min z.1fz, Mean z, Max z 	 Negatives: Min zMAP: d   z.2fr   z: r   T)exist_okawzutf-8)newlinemodeencodingzmrr@r   )r&   r   infor   compute_metricesr   r+   npminmeanmaxr!   r.   osmakedirspathjoinr,   isfileopencsvwriterwriterowr-   prefix_name_to_metrics store_metrics_in_model_card_data)r0   modeloutput_pathr   r   out_txtscoresmean_apmean_mrr	mean_ndcgr2   num_positivesnum_negativescsv_pathoutput_file_existsfrP   metricss                     r4   __call__zRerankingEvaluator.__call__   s    B;{)%1&ugWUG6BG():):(;1==GFtyykQYZaYbbcde&&u--%=6N	 @D||LVVJ/0LL?C||LVVJ/0LLDLL)**=bff]>STW=XX_`b`g`ghu`vwz_{  |B  CE  CI  CI  JW  CX  Y\  B]  ]p  qs  qw  qw  xE  qF  GJ  pK  KR  SU  SZ  SZ  [h  Si  jm  Rn  nt  uw  u{  u{  |I  uJ  KN  tO  P	
 	eGcM#./0d499+R3s';<=eDII;bS(=>? "t~~KKd3ww||K?H!#!9h8JPS^ef NjkA)OOD$4$45w) LMN 7499+DII;

 --gtyyA--eWeUK; MLN Ns   L5L0A	LL(c                ^    | j                   r| j                  |      S | j                  |      S )a  
        Computes the evaluation metrics for the given model.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        )r%   compute_metrices_batchedcompute_metrices_individual)r0   rT   s     r4   rD   z#RerankingEvaluator.compute_metrices   s7     (( ))%0	
 11%8	
    c                n   g }g }g }| j                  || j                  D cg c]  }|d   	 c}d| j                        }g }| j                  D ]*  }|j                  |d          |j                  |d          , | j                  ||d| j                        }d\  }	}
| j                  D ]=  }||	   }|	dz  }	t	        |d         }t	        |d         }||
|
|z   |z    }|
||z   z  }
|dk(  s|dk(  rH| j                  ||      }t	        |j                        dkD  r|d   }t        j                  |       }|j                         j                         }dg|z  dg|z  z   }d}t        |d| j                         D ]  \  }}||   sd|dz   z  } n |j                  |       |j                  t        |g|g| j                  	             |j                  t        ||             @ t!        j"                  |      }t!        j"                  |      }t!        j"                  |      }|||d
S c c}w )aE  
        Computes the evaluation metrics in a batched way, by batching all queries and all documents together.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        queryencode_fn_namer$   r   r   document)r   r      r   kr9   r:   r;   )embed_inputsr   r$   extendr+   r"   shapetorchargsortcputolist	enumerater!   appendr   r   rE   rG   )r0   rT   all_mrr_scoresall_ndcg_scoresall_ap_scoresr2   all_query_embsall_docsall_docs_embs	query_idxdocs_idxinstance	query_embnum_posnum_negdocs_embpred_scorespred_scores_argsortis_relevant	mrr_scorerankindexrX   rY   rZ   s                            r4   rc   z+RerankingEvaluator.compute_metrices_batched   sm    **+/<<8VG_8""44	 + 
 ll 	0FOOF:./OOF:./	0 ))8J$J`J` * 

 #	8  	TH&y1INI(:./G(:./G$X70BW0LMH'))H!|w!|--iBK;$$%))!n"'--"=%//+224K #-1#-7KI()<Q)KL eu% !TAXI !!), "":{mk]dii#XY   !8k!RSA 	TD ''-(77>*GGO,	xCCo 9s   H2
c                   g }g }g }t        j                   | j                  | j                   d      D ]y  }|d   }t        |d         }t        |d         }t	        |      dk(  st	        |      dk(  rB||z   }	dgt	        |      z  dgt	        |      z  z   }
| j                  ||gdd	      }| j                  ||	d
d	      }| j                  ||      }t	        |j                        dkD  r|d   }t        j                  |       }|j                         j                         }d}t        |d| j                         D ]  \  }}|
|   sd|dz   z  } n |j                  |       |j                  t        |
g|g| j                               |j                  t!        |
|             | t#        j$                  |      }t#        j$                  |      }t#        j$                  |      }|||dS )aO  
        Computes the evaluation metrics individually by embedding every (query, positive, negative) tuple individually.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        Samples)disabledescrg   r   r   r   rk   Frh   rj   rl   rn   )tqdmr   r$   r)   r+   ro   r"   rq   rr   rs   rt   ru   rv   r!   rw   r   r   rE   rG   )r0   rT   rx   ry   rz   r   rg   r   r   docsr   r   r   r   r   r   r   r   rX   rY   rZ   s                        r4   rd   z.RerankingEvaluator.compute_metrices_individual"  s    		$,,D<R<R8RYbc !	THW%EHZ01HHZ01H8}!S]a%7h&D#H-c(m0CCK))%%di)jI((Zch(iH--iBK;$$%))!n"'--"=%//+224K I()<Q)KL eu% !TAXI !!), "":{mk]dii#XY   !8k!RSC!	TF ''-(77>*GGO,	xCCre   c                    ||j                   }n#|dk(  r|j                  }n|dk(  r|j                  } |f| j                  |d| j                  d|S )Nrg   rj   T)r#   r$   convert_to_tensorr&   )encodeencode_queryencode_documentr#   r&   )r0   rT   	sentencesri   r$   kwargs	encode_fns          r4   ro   zRerankingEvaluator.embed_inputsY  sn     !Iw&**Iz)--I
/"**
 
 	
re   c                X    d| j                   i}| j                  | j                  |d<   |S )Nr!   r&   )r!   r&   )r0   config_dicts     r4   get_config_dictz"RerankingEvaluator.get_config_dictp  s2    tyy)(*.*;*;K're   )r   z list[dict[str, str | list[str]]]r!   intr   strr.   boolr"   z4Callable[[torch.Tensor, torch.Tensor], torch.Tensor]r#   r   r$   r   r%   r   r&   
int | Noner1   r   )Nr6   r6   )
rT   r   rU   
str | Noner   r   r   r   returnzdict[str, float])rT   r   )NN)
rT   r   r   zstr | list[str] | np.ndarrayri   r   r$   zbool | Noner   r
   )__name__
__module____qualname____doc__r	   r   ra   rD   rc   rd   ro   r   __classcell__)r3   s   @r4   r   r      s   =D OV"'%)#'#.21.2 .2 	.2
 .2 M.2 .2  .2 #.2 !.2 .2b ik>(>7A>QT>be>	>@
 GDR5Dv &*)-
"
 0
 #	

 '
 

.re   r   )
__future__r   rO   loggingrI   typingr   r   numpyrE   rr   r   sklearn.metricsr   r   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr	   r
   )sentence_transformers.SentenceTransformerr   	getLoggerr   r   r    re   r4   <module>r      sR    " 
  	 *    ? P .M 
		8	$[* [re   