Adapter #

Index

Adapter
- openai
- api
- base
- dolly
- llama_cpp
- stability_sdk
- diffusers
- minigpt4
- adapter
- langchain_models
- replicate

openai #

class gptcache.adapter.openai.ChatCompletion(engine: Optional[str] = None, **kwargs)[source]#

Bases: openai.api_resources.chat_completion.ChatCompletion, gptcache.adapter.base.BaseCacheLLM

Openai ChatCompletion Wrapper

Example

from gptcache import cache
from gptcache.processor.pre import get_prompt
# init gptcache
cache.init()
cache.set_openai_key()

from gptcache.adapter import openai
# run ChatCompletion model with gptcache
response = openai.ChatCompletion.create(
              model='gpt-3.5-turbo',
              messages=[
                {
                    'role': 'user',
                    'content': "what's github"
                }],
            )
response_content = response['choices'][0]['message']['content']

classmethod create(*args, **kwargs)[source]#

Creates a new chat completion for the provided messages and parameters.

See https://platform.openai.com/docs/api-reference/chat-completions/create for a list of valid parameters.

class gptcache.adapter.openai.Completion(engine: Optional[str] = None, **kwargs)[source]#

Bases: openai.api_resources.completion.Completion, gptcache.adapter.base.BaseCacheLLM

Openai Completion Wrapper

Example

from gptcache import cache
from gptcache.processor.pre import get_prompt
# init gptcache
cache.init()
cache.set_openai_key()

from gptcache.adapter import openai
# run Completion model with gptcache
response = openai.Completion.create(model="text-davinci-003",
                                    prompt="Hello world.")
response_text = response["choices"][0]["text"]

classmethod create(*args, **kwargs)[source]#

Creates a new completion for the provided prompt and parameters.

See https://platform.openai.com/docs/api-reference/completions/create for a list of valid parameters.

class gptcache.adapter.openai.Audio(id=None, api_key=None, api_version=None, api_type=None, organization=None, response_ms: Optional[int] = None, api_base=None, engine=None, **params)[source]#

Bases: openai.api_resources.audio.Audio

Openai Audio Wrapper

Example

from gptcache import cache
from gptcache.processor.pre import get_file_bytes
# init gptcache
cache.init(pre_embedding_func=get_file_bytes)
cache.set_openai_key()

from gptcache.adapter import openai
# run audio transcribe model with gptcache
audio_file= open("/path/to/audio.mp3", "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)

# run audio transcribe model with gptcache
audio_file= open("/path/to/audio.mp3", "rb")
transcript = openai.Audio.translate("whisper-1", audio_file)

classmethod transcribe(model: str, file: Any, *args, **kwargs)[source]#

classmethod translate(model: str, file: Any, *args, **kwargs)[source]#

class gptcache.adapter.openai.Image(id=None, api_key=None, api_version=None, api_type=None, organization=None, response_ms: Optional[int] = None, api_base=None, engine=None, **params)[source]#

Bases: openai.api_resources.image.Image

Openai Image Wrapper

Example

from gptcache import cache
from gptcache.processor.pre import get_prompt
# init gptcache
cache.init(pre_embedding_func=get_prompt)
cache.set_openai_key()

from gptcache.adapter import openai
# run image generation model with gptcache
response = openai.Image.create(
  prompt="a white siamese cat",
  n=1,
  size="256x256"
)
response_url = response['data'][0]['url']

classmethod create(*args, **kwargs)[source]#

class gptcache.adapter.openai.Moderation(id=None, api_key=None, api_version=None, api_type=None, organization=None, response_ms: Optional[int] = None, api_base=None, engine=None, **params)[source]#

Bases: openai.api_resources.moderation.Moderation, gptcache.adapter.base.BaseCacheLLM

Openai Moderation Wrapper

Example

from gptcache.adapter import openai
from gptcache.adapter.api import init_similar_cache
from gptcache.processor.pre import get_openai_moderation_input

init_similar_cache(pre_func=get_openai_moderation_input)
openai.Moderation.create(
    input="I want to kill them.",
)

classmethod create(*args, **kwargs)[source]#

api #

gptcache.adapter.api.put(prompt: str, data: Any, **kwargs) → None[source]#

put api, put qa pair information to GPTCache Please make sure that the pre_embedding_func param is get_prompt when initializing the cache

Parameters

prompt (str) – the cache data key, usually question text
data (Any) – the cache data value, usually answer text
kwargs (Dict) – list of user-defined parameters

Example

from gptcache.adapter.api import put
from gptcache.processor.pre import get_prompt

cache.init(pre_embedding_func=get_prompt)
put("hello", "foo")

gptcache.adapter.api.get(prompt: str, **kwargs) → Any[source]#

get api, get the cache data according to the prompt Please make sure that the pre_embedding_func param is get_prompt when initializing the cache

Parameters

prompt (str) – the cache data key, usually question text
kwargs (Dict) – list of user-defined parameters

Example

from gptcache.adapter.api import put, get
from gptcache.processor.pre import get_prompt

cache.init(pre_embedding_func=get_prompt)
put("hello", "foo")
print(get("hello"))

gptcache.adapter.api.init_similar_cache(data_dir: str = 'api_cache', cache_obj: typing.Optional[gptcache.core.Cache] = None, pre_func: typing.Callable = <function get_prompt>, embedding: typing.Optional[gptcache.embedding.base.BaseEmbedding] = None, data_manager: typing.Optional[gptcache.manager.data_manager.DataManager] = None, evaluation: typing.Optional[gptcache.similarity_evaluation.similarity_evaluation.SimilarityEvaluation] = None, post_func: typing.Callable = <function temperature_softmax>, config: gptcache.config.Config = <gptcache.config.Config object>)[source]#

Provide a quick way to initialize cache for api service

Parameters

data_dir (str) – cache data storage directory
cache_obj (Optional[Cache]) – specify to initialize the Cache object, if not specified, initialize the global object
pre_func (Callable) – pre-processing of the cache input text
embedding (BaseEmbedding) – embedding object
data_manager (DataManager) – data manager object
evaluation (SimilarityEvaluation) – similarity evaluation object
post_func (Callable[[List[Any]], Any]) – post-processing of the cached result list, the most similar result is taken by default
config (Config) – cache configuration, the core is similar threshold

Returns

None

Example

from gptcache.adapter.api import put, get, init_similar_cache

init_similar_cache()
put("hello", "foo")
print(get("hello"))

gptcache.adapter.api.init_similar_cache_from_config(config_dir: str, cache_obj: Optional[gptcache.core.Cache] = None)[source]#

base #

class gptcache.adapter.base.BaseCacheLLM[source]#

Bases: object

Base LLM, When you have enhanced llm without using the original llm api, you can use this class as a proxy to use the ability of the cache.

NOTE: Please make sure that the custom llm returns the same value as the original llm.

For example, if you use the openai proxy, you perform delay statistics before sending the openai request, and then you package this part of the function, so you may have a separate package, which is different from openai. If the api request parameters and return results you wrap are the same as the original ones, then you can use this class to obtain cache-related capabilities.

Example

import time

import openai

from gptcache import Cache
from gptcache.adapter import openai as cache_openai


def proxy_openai_chat_complete(*args, **kwargs):
    start_time = time.time()
    res = openai.ChatCompletion.create(*args, **kwargs)
    print("Consume Time Spent =", round((time.time() - start_time), 2))
    return res


llm_cache = Cache()

cache_openai.ChatCompletion.llm = proxy_openai_chat_complete
cache_openai.ChatCompletion.cache_args = {"cache_obj": llm_cache}

cache_openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {
            "role": "user",
            "content": "What's GitHub?",
        }
    ],
)

llm: Optional[Callable] = None#: On a cache miss, if that variable is set, it will be called; if not, it will call the original llm.

cache_args: Dict[str, Any] = {}#: It can be used to set some cache-related public parameters. If you don’t want to set the same parameters every time when using cache, say cache_obj, you can use it.

classmethod fill_base_args(**kwargs)[source]#: Fill the base args to the cache args

dolly #

class gptcache.adapter.dolly.Dolly(dolly_pipeline: Any)[source]#

Bases: object

Wrapper for Dolly (databrickslabs/dolly.git).

Example using from_model:

from gptcache import cache
from gptcache.processor.pre import get_inputs
cache.init(pre_embedding_func=get_inputs)

from gptcache.adapter.dolly import Dolly
dolly = Dolly.from_model(
    model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0
)

Example passing pipeline in directly:

import torch
from transformers import pipeline
from gptcache import cache
from gptcache.processor.pre import get_inputs
cache.init(pre_embedding_func=get_inputs)
from gptcache.adapter.dolly import Dolly

pipe = pipeline(
    model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device=0
)
dolly = Dolly(pipe)

classmethod from_model(model: str, **kwargs)[source]#

llama_cpp #

class gptcache.adapter.llama_cpp.Llama(*args: Any, **kwargs: Any)[source]#

Bases: llama_cpp.Llama

llama.cpp wrapper

You should have the llama-cpp-python library installed. abetlen/llama-cpp-python

Example

onnx = Onnx()
m = manager_factory('sqlite,faiss,local', data_dir=root, vector_params={"dimension": onnx.dimension})
llm_cache = Cache()
llm_cache.init(
    pre_embedding_func=get_prompt,
    data_manager=m,
    embedding_func=onnx.to_embeddings
)
llm = Llama('./models/7B/ggml-model.bin')
answer = llm(prompt=question, cache_obj=llm_cache)

stability_sdk #

class gptcache.adapter.stability_sdk.StabilityInference(*args: Any, **kwargs: Any)[source]#

Bases: stability_sdk.client.StabilityInference

client.StabilityInference Wrapper

Example

import os
import io
from PIL import Image

from gptcache import cache
from gptcache.processor.pre import get_prompt
from gptcache.adapter.stability_sdk import StabilityInference, generation

# init gptcache
cache.init(pre_embedding_func=get_prompt)

# run with gptcache
os.environ['STABILITY_KEY'] = 'key-goes-here'

stability_api = StabilityInference(
    key=os.environ['STABILITY_KEY'], # API Key reference.
    verbose=False, # Print debug messages.
    engine="stable-diffusion-xl-beta-v2-2-2", # Set the engine to use for generation.
)

answers = stability_api.generate(
    prompt="a cat sitting besides a dog",
    width=256,
    height=256
    )

for resp in answers:
    for artifact in resp.artifacts:
        if artifact.type == generation.ARTIFACT_IMAGE:
            img = Image.open(io.BytesIO(artifact.binary))
            img.save('path/to/save/image.png')

generate(*args, **kwargs)[source]#

class gptcache.adapter.stability_sdk.MockArtifact(type: int, binary: bytes)[source]#

Bases: object

type: int#

binary: bytes#

class gptcache.adapter.stability_sdk.MockAnswer(artifacts: List[gptcache.adapter.stability_sdk.MockArtifact])[source]#

Bases: object

artifacts: List[gptcache.adapter.stability_sdk.MockArtifact]#

diffusers #

class gptcache.adapter.diffusers.StableDiffusionPipeline(*args: Any, **kwargs: Any)[source]#

Bases: diffusers.StableDiffusionPipeline

Diffuser StableDiffusionPipeline Wrapper

Example

import torch

from gptcache import cache
from gptcache.processor.pre import get_prompt
from gptcache.adapter.diffusers import StableDiffusionPipeline

# init gptcache
cache.init(pre_embedding_func=get_prompt)

# run with gptcache
model_id = "stabilityai/stable-diffusion-2-1"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("cuda")

prompt = "a photo of an astronaut riding a horse on mars"
image = pipe(prompt=prompt).images[0]

minigpt4 #

class gptcache.adapter.minigpt4.MiniGPT4(chat, return_hit)[source]#

Bases: object

MiniGPT4 Wrapper

Example

from gptcache import cache
from gptcache.processor.pre import get_image_question
from gptcache.adapter.minigpt4 import MiniGPT4

# init gptcache
cache.init(pre_embedding_func=get_image_question)

# run with gptcache
pipe = MiniGPT4.from_pretrained(cfg_path='eval_configs/minigpt4_eval.yaml', gpu_id=3, options=None)
question = "Which city is this photo taken?"
image = "./merlion.png"
answer = pipe(image, question)

classmethod from_pretrained(cfg_path, gpu_id=0, options=None, return_hit=False)[source]#

adapter #

gptcache.adapter.adapter.adapt(llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs)[source]#

Adapt to different llm

Parameters

llm_handler – LLM calling method, when the cache misses, this function will be called
cache_data_convert – When the cache hits, convert the answer in the cache to the format of the result returned by llm
update_cache_callback – If the cache misses, after getting the result returned by llm, save the result to the cache
args – llm args
kwargs – llm kwargs

Returns

llm result

async gptcache.adapter.adapter.aadapt(llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs)[source]#

Simple copy of the ‘adapt’ method to different llm for ‘async llm function’

Parameters

llm_handler – Async LLM calling method, when the cache misses, this function will be called
cache_data_convert – When the cache hits, convert the answer in the cache to the format of the result returned by llm
update_cache_callback – If the cache misses, after getting the result returned by llm, save the result to the cache
args – llm args
kwargs – llm kwargs

Returns

llm result

gptcache.adapter.adapter.cache_health_check(vectordb, cache_dict)[source]#: This function checks if the embedding from vector store matches one in cache store. If cache store and vector store are out of sync with each other, cache retrieval can be incorrect. If this happens, force the similary score to the lowerest possible value.

langchain_models #

replicate #

class gptcache.adapter.replicate.Client(api_token=None)[source]#

Bases: replicate.client.Client

replicate.client.Client Wrapper

Example

from gptcache import cache
from gptcache.adapter import replicate
from gptcache.processor.pre import get_input_str
from gptcache.embedding import Timm
from gptcache.similarity_evaluation.onnx import OnnxModelEvaluation

# init gptcache
timm = Timm('resnet18')
cache_base = CacheBase('sqlite')
vector_base = VectorBase('faiss', dimension=timm.dimension)
object_base = ObjectBase('local', path='./objects')
data_manager = get_data_manager(cache_base, vector_base, object_base)

cache.init(
    pre_embedding_func=get_input_image_file_name,
    data_manager=data_manager,
    embedding_func=timm.to_embeddings,
    similarity_evaluation=OnnxModelEvaluation()
    )

# run replicate clinet with gptcache
output = replicate.run(
            "andreasjansson/blip-2:4b32258c42e9efd4288bb9910bc532a69727f9acd26aa08e175713a0a857a608",
            input={"image": open("/path/to/merlion.png", "rb"),
                   "question": "Which city is this photo taken on?"}
        )

run(model_version: str, **kwargs)[source]#: Run a model in the format owner/name:version.

gptcache.adapter.replicate.run(model_version: str, **kwargs)#: Run a model in the format owner/name:version.

Adapter

Contents