Local#

dbally.llms.local.LocalLLM #

LocalLLM(model_name: str, default_options: Optional[LocalLLMOptions] = None, *, api_key: Optional[str] = None)

Bases: LLM[LocalLLMOptions]

Class for interaction with any LLM available in HuggingFace.

Constructs a new local LLM instance.

PARAMETER	DESCRIPTION
`model_name`	Name of the model to use. This should be a model from the CausalLM class. TYPE: `str`
`default_options`	Default options for the LLM. TYPE: `Optional[LocalLLMOptions]` DEFAULT: `None`
`api_key`	The API key for Hugging Face authentication. TYPE: `Optional[str]` DEFAULT: `None`

Source code in src/dbally/llms/local.py

def __init__(
    self,
    model_name: str,
    default_options: Optional[LocalLLMOptions] = None,
    *,
    api_key: Optional[str] = None,
) -> None:
    """
    Constructs a new local LLM instance.

    Args:
        model_name: Name of the model to use. This should be a model from the CausalLM class.
        default_options: Default options for the LLM.
        api_key: The API key for Hugging Face authentication.
    """

    super().__init__(model_name, default_options)
    self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_key)
    self.api_key = api_key

model_name `instance-attribute` #

model_name = model_name

default_options `instance-attribute` #

default_options = default_options or _options_cls()

tokenizer `instance-attribute` #

tokenizer = from_pretrained(model_name, token=api_key)

api_key `instance-attribute` #

api_key = api_key

client `cached` `property` #

client: LocalLLMClient

Client for the LLM.

RETURNS	DESCRIPTION
`LocalLLMClient`	The client used to interact with the LLM.

generate_text `async` #

generate_text(prompt: PromptTemplate, *, event_tracker: Optional[EventTracker] = None, options: Optional[LLMOptions] = None) -> str

Prepares and sends a prompt to the LLM and returns the response.

PARAMETER	DESCRIPTION
`prompt`	Formatted prompt template with conversation and response parsing configuration. TYPE: `PromptTemplate`
`event_tracker`	Event store used to audit the generation process. TYPE: `Optional[EventTracker]` DEFAULT: `None`
`options`	Options to use for the LLM client. TYPE: `Optional[LLMOptions]` DEFAULT: `None`

RETURNS	DESCRIPTION
`str`	Text response from LLM.

RAISES	DESCRIPTION
`LLMError`	If LLM text generation fails.

Source code in src/dbally/llms/base.py

async def generate_text(
    self,
    prompt: PromptTemplate,
    *,
    event_tracker: Optional[EventTracker] = None,
    options: Optional[LLMOptions] = None,
) -> str:
    """
    Prepares and sends a prompt to the LLM and returns the response.

    Args:
        prompt: Formatted prompt template with conversation and response parsing configuration.
        event_tracker: Event store used to audit the generation process.
        options: Options to use for the LLM client.

    Returns:
        Text response from LLM.

    Raises:
        LLMError: If LLM text generation fails.
    """
    options = (self.default_options | options) if options else self.default_options
    event = LLMEvent(prompt=prompt.chat, type=type(prompt).__name__)
    event_tracker = event_tracker or EventTracker()

    async with event_tracker.track_event(event) as span:
        event.response = await self.client.call(
            conversation=prompt.chat,
            options=options,
            event=event,
            json_mode=prompt.json_mode,
        )
        span(event)

    return event.response

count_tokens #

count_tokens(prompt: PromptTemplate) -> int

Counts tokens in the messages.

PARAMETER	DESCRIPTION
`prompt`	Messages to count tokens for. TYPE: `PromptTemplate`

RETURNS	DESCRIPTION
`int`	Number of tokens in the messages.

Source code in src/dbally/llms/local.py

def count_tokens(self, prompt: PromptTemplate) -> int:
    """
    Counts tokens in the messages.

    Args:
        prompt: Messages to count tokens for.

    Returns:
        Number of tokens in the messages.
    """

    input_ids = self.tokenizer.apply_chat_template(prompt.chat)
    return len(input_ids)

dbally.llms.clients.local.LocalLLMClient #

LocalLLMClient(model_name: str, *, hf_api_key: Optional[str] = None)

Bases: LLMClient[LocalLLMOptions]

Client for the local LLM that supports Hugging Face models.

Constructs a new local LLMClient instance.

PARAMETER	DESCRIPTION
`model_name`	Name of the model to use. TYPE: `str`
`hf_api_key`	The Hugging Face API key for authentication. TYPE: `Optional[str]` DEFAULT: `None`

Source code in src/dbally/llms/clients/local.py

def __init__(
    self,
    model_name: str,
    *,
    hf_api_key: Optional[str] = None,
) -> None:
    """
    Constructs a new local LLMClient instance.

    Args:
        model_name: Name of the model to use.
        hf_api_key: The Hugging Face API key for authentication.
    """

    super().__init__(model_name)

    self.model = AutoModelForCausalLM.from_pretrained(
        model_name, device_map="auto", torch_dtype=torch.bfloat16, token=hf_api_key
    )
    self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_api_key)

model_name `instance-attribute` #

model_name = model_name

model `instance-attribute` #

model = from_pretrained(model_name, device_map='auto', torch_dtype=bfloat16, token=hf_api_key)

tokenizer `instance-attribute` #

tokenizer = from_pretrained(model_name, token=hf_api_key)

call `async` #

call(conversation: ChatFormat, options: LocalLLMOptions, event: LLMEvent, json_mode: bool = False) -> str

Makes a call to the local LLM with the provided prompt and options.

PARAMETER	DESCRIPTION
`conversation`	List of dicts with "role" and "content" keys, representing the chat history so far. TYPE: `ChatFormat`
`options`	Additional settings used by the LLM. TYPE: `LocalLLMOptions`
`event`	Container with the prompt, LLM response, and call metrics. TYPE: `LLMEvent`
`json_mode`	Force the response to be in JSON format. TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`str`	Response string from LLM.

Source code in src/dbally/llms/clients/local.py

async def call(
    self,
    conversation: ChatFormat,
    options: LocalLLMOptions,
    event: LLMEvent,
    json_mode: bool = False,
) -> str:
    """
    Makes a call to the local LLM with the provided prompt and options.

    Args:
        conversation: List of dicts with "role" and "content" keys, representing the chat history so far.
        options: Additional settings used by the LLM.
        event: Container with the prompt, LLM response, and call metrics.
        json_mode: Force the response to be in JSON format.

    Returns:
        Response string from LLM.
    """

    input_ids = self.tokenizer.apply_chat_template(
        conversation, add_generation_prompt=True, return_tensors="pt"
    ).to(self.model.device)

    outputs = self.model.generate(
        input_ids,
        eos_token_id=self.tokenizer.eos_token_id,
        **options.dict(),
    )
    response = outputs[0][input_ids.shape[-1] :]
    event.completion_tokens = len(outputs[0][input_ids.shape[-1] :])
    event.prompt_tokens = len(outputs[0][: input_ids.shape[-1]])
    event.total_tokens = input_ids.shape[-1]
    decoded_response = self.tokenizer.decode(response, skip_special_tokens=True)
    return decoded_response

dbally.llms.clients.local.LocalLLMOptions `dataclass` #

LocalLLMOptions(repetition_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN, do_sample: Union[Optional[bool], NotGiven] = NOT_GIVEN, best_of: Union[Optional[int], NotGiven] = NOT_GIVEN, max_new_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN, top_k: Union[Optional[int], NotGiven] = NOT_GIVEN, top_p: Union[Optional[float], NotGiven] = NOT_GIVEN, seed: Union[Optional[int], NotGiven] = NOT_GIVEN, stop_sequences: Union[Optional[List[str]], NotGiven] = NOT_GIVEN, temperature: Union[Optional[float], NotGiven] = NOT_GIVEN)

Bases: LLMOptions

Dataclass that represents all available LLM call options for the local LLM client. Each of them is described in the [HuggingFace documentation] (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long

repetition_penalty `class-attribute` `instance-attribute` #

repetition_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN

do_sample `class-attribute` `instance-attribute` #

do_sample: Union[Optional[bool], NotGiven] = NOT_GIVEN

best_of `class-attribute` `instance-attribute` #

best_of: Union[Optional[int], NotGiven] = NOT_GIVEN

max_new_tokens `class-attribute` `instance-attribute` #

max_new_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN

top_k `class-attribute` `instance-attribute` #

top_k: Union[Optional[int], NotGiven] = NOT_GIVEN

top_p `class-attribute` `instance-attribute` #

top_p: Union[Optional[float], NotGiven] = NOT_GIVEN

seed `class-attribute` `instance-attribute` #

seed: Union[Optional[int], NotGiven] = NOT_GIVEN

stop_sequences `class-attribute` `instance-attribute` #

stop_sequences: Union[Optional[List[str]], NotGiven] = NOT_GIVEN

temperature `class-attribute` `instance-attribute` #

temperature: Union[Optional[float], NotGiven] = NOT_GIVEN

dict #

dict() -> Dict[str, Any]

Creates a dictionary representation of the LLMOptions instance. If a value is None, it will be replaced with a provider-specific not-given sentinel.

RETURNS	DESCRIPTION
`Dict[str, Any]`	A dictionary representation of the LLMOptions instance.

Source code in src/dbally/llms/clients/base.py

def dict(self) -> Dict[str, Any]:
    """
    Creates a dictionary representation of the LLMOptions instance.
    If a value is None, it will be replaced with a provider-specific not-given sentinel.

    Returns:
        A dictionary representation of the LLMOptions instance.
    """
    options = asdict(self)
    return {
        key: self._not_given if value is None or isinstance(value, NotGiven) else value
        for key, value in options.items()
    }

Local#

dbally.llms.local.LocalLLM #

model_name instance-attribute #

default_options instance-attribute #

tokenizer instance-attribute #

api_key instance-attribute #

client cached property #

generate_text async #

count_tokens #

dbally.llms.clients.local.LocalLLMClient #

model_name instance-attribute #

model instance-attribute #

tokenizer instance-attribute #

call async #

dbally.llms.clients.local.LocalLLMOptions dataclass #

repetition_penalty class-attribute instance-attribute #

do_sample class-attribute instance-attribute #

best_of class-attribute instance-attribute #

max_new_tokens class-attribute instance-attribute #

top_k class-attribute instance-attribute #

top_p class-attribute instance-attribute #

seed class-attribute instance-attribute #

stop_sequences class-attribute instance-attribute #

temperature class-attribute instance-attribute #

dict #

model_name `instance-attribute` #

default_options `instance-attribute` #

tokenizer `instance-attribute` #

api_key `instance-attribute` #

client `cached` `property` #

generate_text `async` #

model_name `instance-attribute` #

model `instance-attribute` #

tokenizer `instance-attribute` #

call `async` #

dbally.llms.clients.local.LocalLLMOptions `dataclass` #

repetition_penalty `class-attribute` `instance-attribute` #

do_sample `class-attribute` `instance-attribute` #

best_of `class-attribute` `instance-attribute` #

max_new_tokens `class-attribute` `instance-attribute` #

top_k `class-attribute` `instance-attribute` #

top_p `class-attribute` `instance-attribute` #

seed `class-attribute` `instance-attribute` #

stop_sequences `class-attribute` `instance-attribute` #

temperature `class-attribute` `instance-attribute` #