Source code for gptcache.similarity_evaluation.np

from typing import Dict, Tuple, Any

import numpy as np

from gptcache.similarity_evaluation import SimilarityEvaluation


[docs]class NumpyNormEvaluation(SimilarityEvaluation): """Using Numpy norm to evaluate sentences pair similarity. This evaluator calculate the L2 distance of two embeddings for similarity check. if `enable_normal` is True, both query embedding and cache embedding will be normalized. Note normalized distance will substracted by maximum distance so it will be positively correlated with the similarity. :param enable_normal: whether to normalize the embedding, defaults to False. :type enable_normal: bool :param question_embedding_function: optional, a function to generate question embedding :type question_embedding_function: function Example: .. code-block:: python from gptcache.similarity_evaluation import NumpyNormEvaluation import numpy as np evaluation = NumpyNormEvaluation() score = evaluation.evaluation( { 'question': 'What is color of sky?' 'embedding': np.array([-0.5, -0.5]) }, { 'question': 'What is the color of sky?' 'embedding': np.array([-0.49, -0.51]) } ) """ def __init__(self, enable_normal: bool = True, question_embedding_function=None): self.enable_normal = enable_normal self.question_encoder = question_embedding_function
[docs] @staticmethod def normalize(vec: np.ndarray): """Normalize the input vector. :param vec: numpy vector needs to normalize. :type vec: numpy.array :return: normalized vector. """ magnitude = np.linalg.norm(vec) normalized_v = vec / magnitude return normalized_v
[docs] def evaluation( self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **_ ) -> float: """Evaluate the similarity score of pair. :param src_dict: the query dictionary to evaluate with cache. :type src_dict: Dict :param cache_dict: the cache dictionary. :type cache_dict: Dict :return: evaluation score. """ if 'question' in src_dict and 'question' in cache_dict: if src_dict['question'].lower() == cache_dict['question'].lower(): return self.range()[1] if 'embedding' not in src_dict or 'embedding' not in cache_dict or src_dict['embedding'] is None or cache_dict['embedding'] is None: assert self.question_encoder, 'You need to a valid question_embedding_function to generate question embedding in the evaluator.' src_dict['embedding'] = self.question_encoder(src_dict['question']) cache_dict['embedding'] = self.question_encoder(cache_dict['question']) src_embedding = ( self.normalize(src_dict['embedding']) if self.enable_normal else src_dict['embedding'] ) cache_embedding = cache_dict['embedding'] cache_embedding = ( self.normalize(cache_embedding) if self.enable_normal else cache_embedding ) return self.range()[1] - np.linalg.norm(src_embedding - cache_embedding)
[docs] def range(self) -> Tuple[float, float]: """Range of similarity score. :return: minimum and maximum of similarity score. """ return 0.0, 2.0