Source code for gptcache.embedding.onnx

import numpy as np

from gptcache.embedding.base import BaseEmbedding
from gptcache.utils import (
    import_onnxruntime,
    import_huggingface_hub,
    import_huggingface,
)

import_huggingface()
import_onnxruntime()
import_huggingface_hub()

from transformers import AutoTokenizer, AutoConfig  # pylint: disable=C0413
from huggingface_hub import hf_hub_download  # pylint: disable=C0413
import onnxruntime  # pylint: disable=C0413


[docs]class Onnx(BaseEmbedding): """Generate text embedding for given text using ONNX Model. Example: .. code-block:: python from gptcache.embedding import Onnx test_sentence = 'Hello, world.' encoder = Onnx(model='GPTCache/paraphrase-albert-onnx') embed = encoder.to_embeddings(test_sentence) """ def __init__(self, model="GPTCache/paraphrase-albert-onnx"): tokenizer_name = "GPTCache/paraphrase-albert-small-v2" self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) self.model = model onnx_model_path = hf_hub_download(repo_id=model, filename="model.onnx") self.ort_session = onnxruntime.InferenceSession(onnx_model_path) config = AutoConfig.from_pretrained( "GPTCache/paraphrase-albert-small-v2" ) self.__dimension = config.hidden_size
[docs] def to_embeddings(self, data, **_): """Generate embedding given text input. :param data: text in string. :type data: str :return: a text embedding in shape of (dim,). """ encoded_text = self.tokenizer.encode_plus(data, padding="max_length") ort_inputs = { "input_ids": np.array(encoded_text["input_ids"]).astype("int64").reshape(1, -1), "attention_mask": np.array(encoded_text["attention_mask"]).astype("int64").reshape(1, -1), "token_type_ids": np.array(encoded_text["token_type_ids"]).astype("int64").reshape(1, -1), } ort_outputs = self.ort_session.run(None, ort_inputs) ort_feat = ort_outputs[0] emb = self.post_proc(ort_feat, ort_inputs["attention_mask"]) return emb.flatten()
[docs] def post_proc(self, token_embeddings, attention_mask): input_mask_expanded = ( np.expand_dims(attention_mask, -1) .repeat(token_embeddings.shape[-1], -1) .astype(float) ) sentence_embs = np.sum(token_embeddings * input_mask_expanded, 1) / np.maximum( input_mask_expanded.sum(1), 1e-9 ) return sentence_embs
@property def dimension(self): """Embedding dimension. :return: embedding dimension """ return self.__dimension