GPTCache #

Index

GPTCache
- core
- config
- session
- report
- client

core #

class gptcache.core.Cache[source]#

Bases: object

GPTCache core object.

Example

from gptcache import cache
from gptcache.adapter import openai

cache.init()
cache.set_openai_key()

init(cache_enable_func=<function cache_all>, pre_embedding_func=<function last_content>, pre_func=None, embedding_func=<function to_embeddings>, data_manager: gptcache.manager.data_manager.DataManager = <gptcache.manager.data_manager.MapDataManager object>, similarity_evaluation=<gptcache.similarity_evaluation.exact_match.ExactMatchEvaluation object>, post_process_messages_func=<function temperature_softmax>, post_func=None, config=<gptcache.config.Config object>, next_cache=None)[source]#

Pass parameters to initialize GPTCache.

Parameters

cache_enable_func – a function to enable cache, defaults to cache_all
pre_embedding_func – a function to preprocess embedding, defaults to last_content
pre_func – a function to preprocess embedding, same as pre_embedding_func
embedding_func – a function to extract embeddings from requests for similarity search, defaults to string_embedding
data_manager – a DataManager module, defaults to get_data_manager()
similarity_evaluation – a module to calculate embedding similarity, defaults to ExactMatchEvaluation()
post_process_messages_func – a function to post-process messages, defaults to temperature_softmax with a default temperature of 0.0
post_func – a function to post-process messages, same as post_process_messages_func
config – a module to pass configurations, defaults to Config()
next_cache – customized method for next cache

import_data(questions: List[Any], answers: List[Any], session_ids: Optional[List[Optional[str]]] = None) → None[source]#

Import data to GPTCache

Parameters

questions – preprocessed question Data
answers – list of answers to questions
session_ids – list of the session id.

Returns

None

flush()[source]#: Flush data, to prevent accidental loss of memory data, such as using map cache management or faiss, hnswlib vector storage will be useful

static set_openai_key()[source]#

static set_azure_openai_key()[source]#

config #

class gptcache.config.Config(log_time_func: Optional[Callable[[str, float], None]] = None, similarity_threshold: float = 0.8, prompts: Optional[List[str]] = None, template: Optional[str] = None, auto_flush: int = 20, enable_token_counter: bool = True, input_summary_len: Optional[int] = None, context_len: Optional[int] = None, skip_list: Optional[List[str]] = None, data_check: bool = False)[source]#

Bases: object

Pass configuration.

Parameters

log_time_func (Optional[Callable[[str, float], None]]) – optional, customized log time function
similarity_threshold (float) – a threshold ranged from 0 to 1 to filter search results with similarity score higher than the threshold. When it is 0, there is no hits. When it is 1, all search results will be returned as hits.
prompts (Optional[List[str]]) – optional, if the request content will remove the prompt string when the request contains the prompt list
template (Optional[str]) – optional, if the request content will remove the template string and only keep the parameter value in the template
auto_flush (int) – it will be automatically flushed every time xx pieces of data are added, default to 20
enable_token_counter (bool) – enable token counter, default to False
input_summary_len (Optional[int]) – optional, summarize input to specified length.
skip_list (Optional[List[str]]) – for sequence preprocessing, skip those sentences in skip_list.
context_len (Optional[int]) – optional, the length of context.

Example

from gptcache import Config

configs = Config(similarity_threshold=0.6)

session #

class gptcache.session.Session(name: Optional[str] = None, data_manager: Optional[gptcache.manager.data_manager.DataManager] = None, check_hit_func: Optional[Callable] = None)[source]#

Bases: object

Session for gptcache. Session can isolate the context of each connection, and can also filter the results after recall, and if not satisfied will re-request rather than return the cache results directly.

Parameters

name (str) – the name of the session, defaults to uuid.uuid4().hex.
data_manager (DataManager) – the DataManager of the session, defaults to cache.data_manager with the initialized cache.
check_hit_func (Callable) – a Callable to check the hit, defaults to processor.check_hit.check_hit_session，which will not return cached data if you ask the same or similar question in the same session.

Example

from gptcache import cache
from gptcache.session import Session
# init gptcache
cache.init()
cache.set_openai_key()
session = Session()

from gptcache.adapter import openai
# run ChatCompletion model with gptcache on session
response = openai.ChatCompletion.create(
              model='gpt-3.5-turbo',
              messages=[
                {
                    'role': 'user',
                    'content': "what's github"
                }],
              session=session
            )
response_content = response['choices'][0]['message']['content']

property name#

drop()[source]#: Drop the session and delete all data in the session

report #

class gptcache.report.Report[source]#

Bases: object

Get GPTCache report including time and counts for different operations.

pre(delta_time)[source]#

Pre-process counts and time.

Parameters: delta_time – additional runtime.

embedding(delta_time)[source]#

Embedding counts and time.

Parameters: delta_time – additional runtime.

search(delta_time)[source]#

Search counts and time.

Parameters: delta_time – additional runtime.

data(delta_time)[source]#

Get data counts and time.

Parameters: delta_time – additional runtime.

evaluation(delta_time)[source]#

Evaluation counts and time.

Parameters: delta_time – additional runtime.

post(delta_time)[source]#

Post-process counts and time.

Parameters: delta_time – additional runtime.

llm(delta_time)[source]#

LLM counts and time.

Parameters: delta_time – additional runtime.

save(delta_time)[source]#

Save counts and time.

Parameters: delta_time – additional runtime.

average_pre_time()[source]#: Average pre-process time.

average_embedding_time()[source]#: Average embedding time.

average_search_time()[source]#: Average search time.

average_data_time()[source]#: Average data time.

average_evaluation_time()[source]#: Average evaluation time.

average_post_time()[source]#: Average post-process time.

average_llm_time()[source]#: Average LLM time.

average_save_time()[source]#: Average save time.

hint_cache()[source]#: hint cache count.

class gptcache.report.OpCounter[source]#

Bases: object

Operation counter.

count = 0#: Operation count.

total_time = 0#: Total time.

average()[source]#: Average time.

client #

class gptcache.client.Client(uri: str = 'http://localhost:8000')[source]#

Bases: object

GPTCache client to send requests to GPTCache server.

Parameters: uri (str) – the uri leads to the server, defaults to “http://localhost:8000”.

Example

from gptcache import client

client = Client(uri="http://localhost:8000")
client.put("Hi", "Hi back")
ans = client.get("Hi")

put(question: str, answer: str)[source]#

Parameters

question (str) – the question to be put.
answer (str) – the answer to the question to be put.

Returns

status code.

get(question: str)[source]#

Parameters: question (str) – the question to get an answer.
Returns: answer to the question.

GPTCache

Contents

GPTCache#

core#

config#

session#

report#

client#

GPTCache #

core #

config #

session #

report #

client #