Source code for gptcache.similarity_evaluation.np
from typing import Dict, Tuple, Any
import numpy as np
from gptcache.similarity_evaluation import SimilarityEvaluation
[docs]class NumpyNormEvaluation(SimilarityEvaluation):
"""Using Numpy norm to evaluate sentences pair similarity.
This evaluator calculate the L2 distance of two embeddings for similarity check. if `enable_normal` is True,
both query embedding and cache embedding will be normalized. Note normalized distance will substracted by
maximum distance so it will be positively correlated with the similarity.
:param enable_normal: whether to normalize the embedding, defaults to False.
:type enable_normal: bool
:param question_embedding_function: optional, a function to generate question embedding
:type question_embedding_function: function
Example:
.. code-block:: python
from gptcache.similarity_evaluation import NumpyNormEvaluation
import numpy as np
evaluation = NumpyNormEvaluation()
score = evaluation.evaluation(
{
'question': 'What is color of sky?'
'embedding': np.array([-0.5, -0.5])
},
{
'question': 'What is the color of sky?'
'embedding': np.array([-0.49, -0.51])
}
)
"""
def __init__(self, enable_normal: bool = True, question_embedding_function=None):
self.enable_normal = enable_normal
self.question_encoder = question_embedding_function
[docs] @staticmethod
def normalize(vec: np.ndarray):
"""Normalize the input vector.
:param vec: numpy vector needs to normalize.
:type vec: numpy.array
:return: normalized vector.
"""
magnitude = np.linalg.norm(vec)
normalized_v = vec / magnitude
return normalized_v
[docs] def evaluation(
self, src_dict: Dict[str, Any], cache_dict: Dict[str, Any], **_
) -> float:
"""Evaluate the similarity score of pair.
:param src_dict: the query dictionary to evaluate with cache.
:type src_dict: Dict
:param cache_dict: the cache dictionary.
:type cache_dict: Dict
:return: evaluation score.
"""
if 'question' in src_dict and 'question' in cache_dict:
if src_dict['question'].lower() == cache_dict['question'].lower():
return self.range()[1]
if 'embedding' not in src_dict or 'embedding' not in cache_dict or src_dict['embedding'] is None or cache_dict['embedding'] is None:
assert self.question_encoder, 'You need to a valid question_embedding_function to generate question embedding in the evaluator.'
src_dict['embedding'] = self.question_encoder(src_dict['question'])
cache_dict['embedding'] = self.question_encoder(cache_dict['question'])
src_embedding = (
self.normalize(src_dict['embedding'])
if self.enable_normal
else src_dict['embedding']
)
cache_embedding = cache_dict['embedding']
cache_embedding = (
self.normalize(cache_embedding) if self.enable_normal else cache_embedding
)
return self.range()[1] - np.linalg.norm(src_embedding - cache_embedding)
[docs] def range(self) -> Tuple[float, float]:
"""Range of similarity score.
:return: minimum and maximum of similarity score.
"""
return 0.0, 2.0