Source code for gptcache.manager.vector_data.docarray_index
from typing import List, Optional, Tuple
import numpy as np
from gptcache.manager.vector_data.base import VectorBase, VectorData
from gptcache.utils import import_docarray
import_docarray()
from docarray.typing import NdArray # pylint: disable=C0413
from docarray import BaseDoc, DocList # pylint: disable=C0413
from docarray.index import InMemoryExactNNIndex # pylint: disable=C0413
[docs]class DocarrayVectorData(BaseDoc):
"""Class representing a vector data element with an ID and associated data."""
id: int
data: NdArray
[docs]class DocArrayIndex(VectorBase):
"""
Class representing in-memory exact nearest neighbor index for vector search.
:param index_file_path: the path to docarray index, defaults to 'docarray_index.bin'.
:type index_file_path: str
:param top_k: the number of the vectors results to return, defaults to 1.
:type top_k: int
"""
def __init__(self, index_file_path: str, top_k: int):
self._index = InMemoryExactNNIndex[DocarrayVectorData](
index_file_path=index_file_path
)
self._index_file_path = index_file_path
self._top_k = top_k
[docs] def mul_add(self, datas: List[VectorData]) -> None:
"""
Add multiple vector data elements to the index.
:param datas: A list of vector data elements to be added.
"""
docs = DocList[DocarrayVectorData](
DocarrayVectorData(id=data.id, data=data.data) for data in datas
)
self._index.index(docs)
[docs] def search(
self, data: np.ndarray, top_k: int = -1
) -> Optional[List[Tuple[float, int]]]:
"""
Search for the nearest vector data elements in the index.
:param data: The query vector data.
:param top_k: The number of top matches to return.
:return: A list of tuples, each containing the match score and
the ID of the matched vector data element.
"""
if len(self._index) == 0:
return None
if top_k == -1:
top_k = self._top_k
docs, scores = self._index.find(data, search_field="data", limit=top_k)
return list(zip(scores, docs.id))
[docs] def rebuild(self, ids: Optional[List[int]] = None) -> bool:
"""
In the case of DocArrayIndex, the rebuild operation is not needed.
"""
return True
[docs] def delete(self, ids: Optional[List[str]]) -> None:
"""
Delete the specified vector data elements from the index.
:param ids: A list of IDs of the vector data elements to be deleted.
"""
if ids is not None:
del self._index[ids]
[docs] def flush(self) -> None:
self._index.persist(self._index_file_path)
[docs] def close(self) -> None:
self.flush()