Source code for gptcache.processor.pre

import re
import string
from typing import Dict, Any


[docs]def last_content(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
    """get the last content of the message list

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import last_content

            content = last_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]})
            # content = "foo2"
    """
    return data.get("messages")[-1]["content"]


[docs]def last_content_without_prompt(data: Dict[str, Any], **params: Dict[str, Any]) -> Any:
    """get the last content of the message list without prompts content

    :param data: the user llm request data
    :type data: Dict[str, Any]
    :param params: the special gptcache params, like prompts param in the cache object
    :type params: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import last_content_without_prompt

            content = last_content_without_prompt(
                    {"messages": [{"content": "foo1"}, {"content": "foo2"}]}, prompts=["foo"]
                )
            # content = "2"
    """

    last_content_str = data.get("messages")[-1]["content"]
    prompts = params.get("prompts", [])
    if prompts is None:
        return last_content_str
    pattern = "|".join(prompts)
    new_content_str = re.sub(pattern, "", last_content_str)
    return new_content_str


def _get_pattern_value(pattern_str: str, value_str: str):
    literal_text_arr = []
    field_name_arr = []
    for literal_text, field_name, _, _ in string.Formatter().parse(pattern_str):
        literal_text_arr.append(literal_text)
        if field_name is not None:
            field_name_arr.append(
                field_name if field_name else str(len(field_name_arr))
            )

    pattern_values = {}
    last_end = 0
    for i, literal_text in enumerate(literal_text_arr):
        start = value_str.find(literal_text, last_end)
        if i == len(literal_text_arr) - 1:
            end = len(value_str)
        else:
            end = value_str.find(literal_text_arr[i + 1], start + 1)
        if start == -1 or end == -1:
            break
        start += len(literal_text)
        pattern_values[field_name_arr[i]] = value_str[start:end]
        last_end = end
    return pattern_values


[docs]def last_content_without_template(data: Dict[str, Any], **params: Dict[str, Any]) -> Any:
    """get the last content's template values of the message list without template content.

    When considering a cache agent or chain, the majority of the content consists of template content,
    while the essential information is simply a list of parameters within the template.
    In this way, the cache key is composed of a string made up of all the parameter values in the list.

    WARNING: Two parameters without intervals cannot appear in the template,
    for example: template = "{foo}{hoo}" is not supported,
    but template = "{foo}:{hoo}" is supported

    :param data: the user llm request data
    :type data: Dict[str, Any]

    :Example with str template:
        .. code-block:: python

            from gptcache import Config
            from gptcache.processor.pre import last_content_without_template

            template_obj = "tell me a joke about {subject}"
            prompt = template_obj.format(subject="animal")
            value = last_content_without_template(
                data={"messages": [{"content": prompt}]}, cache_config=Config(template=template_obj)
            )
            print(value)
            # ['animal']

    :Example with langchain template:
        .. code-block:: python

            from langchain import PromptTemplate

            from gptcache import Config
            from gptcache.processor.pre import last_content_without_template

            template_obj = PromptTemplate.from_template("tell me a joke about {subject}")
            prompt = template_obj.format(subject="animal")

            value = last_content_without_template(
                data={"messages": [{"content": prompt}]},
                cache_config=Config(template=template_obj.template),
            )
            print(value)
            # ['animal']

    NOTE: At present, only the simple PromptTemplate in langchain is supported.
    For ChatPromptTemplate, it needs to be adjusted according to the template array.
    If you need to use it, you need to pass in the final dialog template yourself.
    The reason why it cannot be advanced is that ChatPromptTemplate
    does not provide a method to directly return the template string.
    """
    last_content_str = data.get("messages")[-1]["content"]
    cache_config = params.get("cache_config", None)
    if not (cache_config and cache_config.template):
        return last_content_str

    pattern_value = _get_pattern_value(cache_config.template, last_content_str)
    return str(list(pattern_value.values()))


[docs]def all_content(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
    """get all content of the message list

    :param data: the user llm request data
    :type data: Dict[str, Any]

    :Example:
        .. code-block:: python

            from gptcache.processor.pre import all_content

            content = all_content(
                {"messages": [{"content": "foo1"}, {"content": "foo2"}]}
            )
            # content = "foo1\\nfoo2"
    """
    s = ""
    messages = data.get("messages")
    for i, message in enumerate(messages):
        if i == len(messages) - 1:
            s += message["content"]
        else:
            s += message["content"] + "\n"
    return s


[docs]def nop(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
    """do nothing of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import nop

            content = nop({"str": "hello"})
            # {"str": "hello"}
    """
    return data


[docs]def get_prompt(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
    """get the prompt of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_prompt

            content = get_prompt({"prompt": "foo"})
            # "foo"
    """
    return data.get("prompt")


[docs]def get_file_name(data: Dict[str, Any], **_: Dict[str, Any]) -> str:
    """get the file name of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_file_name

            file = open("test.txt", "a")
            content = get_file_name({"file": file})
            # "test.txt"
    """
    return data.get("file").name


[docs]def get_file_bytes(data: Dict[str, Any], **_: Dict[str, Any]) -> bytes:
    """get the file bytes of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_file_bytes

            content = get_file_bytes({"file": open("test.txt", "rb")})
    """
    return data.get("file").peek()


[docs]def get_input_str(data: Dict[str, Any], **_: Dict[str, Any]) -> str:
    """get the image and question str of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_input_str

            content = get_input_str({"input": {"image": open("test.png", "rb"), "question": "foo"}})
    """
    input_data = data.get("input")
    return str(input_data["image"].peek()) + input_data["question"]


[docs]def get_input_image_file_name(data: Dict[str, Any], **_: Dict[str, Any]) -> str:
    """get the image file name of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_input_image_file_name

            content = get_input_image_file_name({"input": {"image": open("test.png", "rb")}})
            # "test.png"
    """
    input_data = data.get("input")
    return input_data["image"].name


[docs]def get_image_question(data: Dict[str, Any], **_: Dict[str, Any]) -> str:  # pragma: no cover
    """get the image and question str of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_image_question

            content = get_image_question({"image": open("test.png", "rb"), "question": "foo"})
    """
    img = data.get("image")
    data_img = str(open(img, "rb").peek()) if isinstance(img, str) else str(img)  # pylint: disable=consider-using-with
    return data_img + data.get("question")


[docs]def get_image(data: Dict[str, Any], **_: Dict[str, Any]) -> str:  # pragma: no cover
    """get the image of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_image

            content = get_image({"image": open("test.png", "rb")})
            # "test.png"
    """
    return data.get("image")


[docs]def get_inputs(data: Dict[str, Any], **_: Dict[str, Any]):
    """get the inputs of the llm request params

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_inputs

            content = get_inputs({"inputs": "hello"})
            # "hello"
    """
    return data.get("inputs")


[docs]def get_messages_last_content(data: Dict[str, Any], **_: Any) -> str:
    """ get the last content of the llm request messages array

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_messages_last_content

            content = get_messages_last_content({"messages": [{"content": "hello"}, {"content": "world"}]})
            # "world"
    """
    return data.get("messages")[-1].content


[docs]def get_openai_moderation_input(data: Dict[str, Any], **_: Dict[str, Any]) -> str:
    """get the input param of the openai moderation request params

    :param data: the user openai moderation request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import get_openai_moderation_input

            content = get_openai_moderation_input({"input": ["hello", "world"]})
            # "['hello', 'world']"
    """

    return str(data.get("input"))


[docs]def concat_all_queries(data: Dict[str, Any], **params: Dict[str, Any]) -> Any:
    """

    :param data: the user llm request data
    :type data: Dict[str, Any]

    Example:
        .. code-block:: python

            from gptcache.processor.pre import concat_all_queries

            content = concat_all_queries({"messages": [{"role": "system", "content": "hello"},
                {"role": "user", "content": "world"},
                {"role": "assistant", "content": "alice"}]})

    """
    cache_config = params.get("cache_config", None)
    skip_list = cache_config.skip_list
    context_len = cache_config.context_len
    context_len = context_len * 2
    s = ""
    messages = data.get("messages")
    length = min(context_len, len(messages))
    messages = messages[len(messages) - length:]
    for i, message in enumerate(messages):
        if message["role"] in skip_list:
            continue
        if i == len(messages) - 1:
            s += f'{message["role"].upper()}: {message["content"]}'
        else:
            s += f'{message["role"].upper()}: {message["content"]}\n'
    return s