Adapter#
Index
openai#
- class gptcache.adapter.openai.ChatCompletion(engine: Optional[str] = None, **kwargs)[source]#
Bases:
openai.api_resources.chat_completion.ChatCompletion
,gptcache.adapter.base.BaseCacheLLM
Openai ChatCompletion Wrapper
Example
from gptcache import cache from gptcache.processor.pre import get_prompt # init gptcache cache.init() cache.set_openai_key() from gptcache.adapter import openai # run ChatCompletion model with gptcache response = openai.ChatCompletion.create( model='gpt-3.5-turbo', messages=[ { 'role': 'user', 'content': "what's github" }], ) response_content = response['choices'][0]['message']['content']
- classmethod create(*args, **kwargs)[source]#
Creates a new chat completion for the provided messages and parameters.
See https://platform.openai.com/docs/api-reference/chat-completions/create for a list of valid parameters.
- class gptcache.adapter.openai.Completion(engine: Optional[str] = None, **kwargs)[source]#
Bases:
openai.api_resources.completion.Completion
,gptcache.adapter.base.BaseCacheLLM
Openai Completion Wrapper
Example
from gptcache import cache from gptcache.processor.pre import get_prompt # init gptcache cache.init() cache.set_openai_key() from gptcache.adapter import openai # run Completion model with gptcache response = openai.Completion.create(model="text-davinci-003", prompt="Hello world.") response_text = response["choices"][0]["text"]
- classmethod create(*args, **kwargs)[source]#
Creates a new completion for the provided prompt and parameters.
See https://platform.openai.com/docs/api-reference/completions/create for a list of valid parameters.
- class gptcache.adapter.openai.Audio(id=None, api_key=None, api_version=None, api_type=None, organization=None, response_ms: Optional[int] = None, api_base=None, engine=None, **params)[source]#
Bases:
openai.api_resources.audio.Audio
Openai Audio Wrapper
Example
from gptcache import cache from gptcache.processor.pre import get_file_bytes # init gptcache cache.init(pre_embedding_func=get_file_bytes) cache.set_openai_key() from gptcache.adapter import openai # run audio transcribe model with gptcache audio_file= open("/path/to/audio.mp3", "rb") transcript = openai.Audio.transcribe("whisper-1", audio_file) # run audio transcribe model with gptcache audio_file= open("/path/to/audio.mp3", "rb") transcript = openai.Audio.translate("whisper-1", audio_file)
- class gptcache.adapter.openai.Image(id=None, api_key=None, api_version=None, api_type=None, organization=None, response_ms: Optional[int] = None, api_base=None, engine=None, **params)[source]#
Bases:
openai.api_resources.image.Image
Openai Image Wrapper
Example
from gptcache import cache from gptcache.processor.pre import get_prompt # init gptcache cache.init(pre_embedding_func=get_prompt) cache.set_openai_key() from gptcache.adapter import openai # run image generation model with gptcache response = openai.Image.create( prompt="a white siamese cat", n=1, size="256x256" ) response_url = response['data'][0]['url']
- class gptcache.adapter.openai.Moderation(id=None, api_key=None, api_version=None, api_type=None, organization=None, response_ms: Optional[int] = None, api_base=None, engine=None, **params)[source]#
Bases:
openai.api_resources.moderation.Moderation
,gptcache.adapter.base.BaseCacheLLM
Openai Moderation Wrapper
Example
from gptcache.adapter import openai from gptcache.adapter.api import init_similar_cache from gptcache.processor.pre import get_openai_moderation_input init_similar_cache(pre_func=get_openai_moderation_input) openai.Moderation.create( input="I want to kill them.", )
api#
- gptcache.adapter.api.put(prompt: str, data: Any, **kwargs) None [source]#
put api, put qa pair information to GPTCache Please make sure that the pre_embedding_func param is get_prompt when initializing the cache
- Parameters
prompt (str) β the cache data key, usually question text
data (Any) β the cache data value, usually answer text
kwargs (Dict) β list of user-defined parameters
Example
from gptcache.adapter.api import put from gptcache.processor.pre import get_prompt cache.init(pre_embedding_func=get_prompt) put("hello", "foo")
- gptcache.adapter.api.get(prompt: str, **kwargs) Any [source]#
get api, get the cache data according to the prompt Please make sure that the pre_embedding_func param is get_prompt when initializing the cache
- Parameters
prompt (str) β the cache data key, usually question text
kwargs (Dict) β list of user-defined parameters
Example
from gptcache.adapter.api import put, get from gptcache.processor.pre import get_prompt cache.init(pre_embedding_func=get_prompt) put("hello", "foo") print(get("hello"))
- gptcache.adapter.api.init_similar_cache(data_dir: str = 'api_cache', cache_obj: typing.Optional[gptcache.core.Cache] = None, pre_func: typing.Callable = <function get_prompt>, embedding: typing.Optional[gptcache.embedding.base.BaseEmbedding] = None, data_manager: typing.Optional[gptcache.manager.data_manager.DataManager] = None, evaluation: typing.Optional[gptcache.similarity_evaluation.similarity_evaluation.SimilarityEvaluation] = None, post_func: typing.Callable = <function temperature_softmax>, config: gptcache.config.Config = <gptcache.config.Config object>)[source]#
Provide a quick way to initialize cache for api service
- Parameters
data_dir (str) β cache data storage directory
cache_obj (Optional[Cache]) β specify to initialize the Cache object, if not specified, initialize the global object
pre_func (Callable) β pre-processing of the cache input text
embedding (BaseEmbedding) β embedding object
data_manager (DataManager) β data manager object
evaluation (SimilarityEvaluation) β similarity evaluation object
post_func (Callable[[List[Any]], Any]) β post-processing of the cached result list, the most similar result is taken by default
config (Config) β cache configuration, the core is similar threshold
- Returns
None
Example
from gptcache.adapter.api import put, get, init_similar_cache init_similar_cache() put("hello", "foo") print(get("hello"))
- gptcache.adapter.api.init_similar_cache_from_config(config_dir: str, cache_obj: Optional[gptcache.core.Cache] = None)[source]#
base#
- class gptcache.adapter.base.BaseCacheLLM[source]#
Bases:
object
Base LLM, When you have enhanced llm without using the original llm api, you can use this class as a proxy to use the ability of the cache.
NOTE: Please make sure that the custom llm returns the same value as the original llm.
For example, if you use the openai proxy, you perform delay statistics before sending the openai request, and then you package this part of the function, so you may have a separate package, which is different from openai. If the api request parameters and return results you wrap are the same as the original ones, then you can use this class to obtain cache-related capabilities.
Example
import time import openai from gptcache import Cache from gptcache.adapter import openai as cache_openai def proxy_openai_chat_complete(*args, **kwargs): start_time = time.time() res = openai.ChatCompletion.create(*args, **kwargs) print("Consume Time Spent =", round((time.time() - start_time), 2)) return res llm_cache = Cache() cache_openai.ChatCompletion.llm = proxy_openai_chat_complete cache_openai.ChatCompletion.cache_args = {"cache_obj": llm_cache} cache_openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ { "role": "user", "content": "What's GitHub?", } ], )
- llm: Optional[Callable] = None#
On a cache miss, if that variable is set, it will be called; if not, it will call the original llm.
dolly#
- class gptcache.adapter.dolly.Dolly(dolly_pipeline: Any)[source]#
Bases:
object
Wrapper for Dolly (databrickslabs/dolly.git).
- Example using from_model:
from gptcache import cache from gptcache.processor.pre import get_inputs cache.init(pre_embedding_func=get_inputs) from gptcache.adapter.dolly import Dolly dolly = Dolly.from_model( model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0 )
- Example passing pipeline in directly:
import torch from transformers import pipeline from gptcache import cache from gptcache.processor.pre import get_inputs cache.init(pre_embedding_func=get_inputs) from gptcache.adapter.dolly import Dolly pipe = pipeline( model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0 ) dolly = Dolly(pipe)
llama_cpp#
- class gptcache.adapter.llama_cpp.Llama(*args: Any, **kwargs: Any)[source]#
Bases:
llama_cpp.Llama
llama.cpp wrapper
You should have the llama-cpp-python library installed. abetlen/llama-cpp-python
Example
onnx = Onnx() m = manager_factory('sqlite,faiss,local', data_dir=root, vector_params={"dimension": onnx.dimension}) llm_cache = Cache() llm_cache.init( pre_embedding_func=get_prompt, data_manager=m, embedding_func=onnx.to_embeddings ) llm = Llama('./models/7B/ggml-model.bin') answer = llm(prompt=question, cache_obj=llm_cache)
stability_sdk#
- class gptcache.adapter.stability_sdk.StabilityInference(*args: Any, **kwargs: Any)[source]#
Bases:
stability_sdk.client.StabilityInference
client.StabilityInference Wrapper
Example
import os import io from PIL import Image from gptcache import cache from gptcache.processor.pre import get_prompt from gptcache.adapter.stability_sdk import StabilityInference, generation # init gptcache cache.init(pre_embedding_func=get_prompt) # run with gptcache os.environ['STABILITY_KEY'] = 'key-goes-here' stability_api = StabilityInference( key=os.environ['STABILITY_KEY'], # API Key reference. verbose=False, # Print debug messages. engine="stable-diffusion-xl-beta-v2-2-2", # Set the engine to use for generation. ) answers = stability_api.generate( prompt="a cat sitting besides a dog", width=256, height=256 ) for resp in answers: for artifact in resp.artifacts: if artifact.type == generation.ARTIFACT_IMAGE: img = Image.open(io.BytesIO(artifact.binary)) img.save('path/to/save/image.png')
- class gptcache.adapter.stability_sdk.MockAnswer(artifacts: List[gptcache.adapter.stability_sdk.MockArtifact])[source]#
Bases:
object
- artifacts: List[gptcache.adapter.stability_sdk.MockArtifact]#
diffusers#
- class gptcache.adapter.diffusers.StableDiffusionPipeline(*args: Any, **kwargs: Any)[source]#
Bases:
diffusers.StableDiffusionPipeline
Diffuser StableDiffusionPipeline Wrapper
Example
import torch from gptcache import cache from gptcache.processor.pre import get_prompt from gptcache.adapter.diffusers import StableDiffusionPipeline # init gptcache cache.init(pre_embedding_func=get_prompt) # run with gptcache model_id = "stabilityai/stable-diffusion-2-1" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) pipe = pipe.to("cuda") prompt = "a photo of an astronaut riding a horse on mars" image = pipe(prompt=prompt).images[0]
minigpt4#
- class gptcache.adapter.minigpt4.MiniGPT4(chat, return_hit)[source]#
Bases:
object
MiniGPT4 Wrapper
Example
from gptcache import cache from gptcache.processor.pre import get_image_question from gptcache.adapter.minigpt4 import MiniGPT4 # init gptcache cache.init(pre_embedding_func=get_image_question) # run with gptcache pipe = MiniGPT4.from_pretrained(cfg_path='eval_configs/minigpt4_eval.yaml', gpu_id=3, options=None) question = "Which city is this photo taken?" image = "./merlion.png" answer = pipe(image, question)
adapter#
- gptcache.adapter.adapter.adapt(llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs)[source]#
Adapt to different llm
- Parameters
llm_handler β LLM calling method, when the cache misses, this function will be called
cache_data_convert β When the cache hits, convert the answer in the cache to the format of the result returned by llm
update_cache_callback β If the cache misses, after getting the result returned by llm, save the result to the cache
args β llm args
kwargs β llm kwargs
- Returns
llm result
- async gptcache.adapter.adapter.aadapt(llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs)[source]#
Simple copy of the βadaptβ method to different llm for βasync llm functionβ
- Parameters
llm_handler β Async LLM calling method, when the cache misses, this function will be called
cache_data_convert β When the cache hits, convert the answer in the cache to the format of the result returned by llm
update_cache_callback β If the cache misses, after getting the result returned by llm, save the result to the cache
args β llm args
kwargs β llm kwargs
- Returns
llm result
- gptcache.adapter.adapter.cache_health_check(vectordb, cache_dict)[source]#
This function checks if the embedding from vector store matches one in cache store. If cache store and vector store are out of sync with each other, cache retrieval can be incorrect. If this happens, force the similary score to the lowerest possible value.
langchain_models#
replicate#
- class gptcache.adapter.replicate.Client(api_token=None)[source]#
Bases:
replicate.client.Client
replicate.client.Client Wrapper
Example
from gptcache import cache from gptcache.adapter import replicate from gptcache.processor.pre import get_input_str from gptcache.embedding import Timm from gptcache.similarity_evaluation.onnx import OnnxModelEvaluation # init gptcache timm = Timm('resnet18') cache_base = CacheBase('sqlite') vector_base = VectorBase('faiss', dimension=timm.dimension) object_base = ObjectBase('local', path='./objects') data_manager = get_data_manager(cache_base, vector_base, object_base) cache.init( pre_embedding_func=get_input_image_file_name, data_manager=data_manager, embedding_func=timm.to_embeddings, similarity_evaluation=OnnxModelEvaluation() ) # run replicate clinet with gptcache output = replicate.run( "andreasjansson/blip-2:4b32258c42e9efd4288bb9910bc532a69727f9acd26aa08e175713a0a857a608", input={"image": open("/path/to/merlion.png", "rb"), "question": "Which city is this photo taken on?"} )