Skip to content

Local#

dbally.llms.local.LocalLLM #

LocalLLM(model_name: str, default_options: Optional[LocalLLMOptions] = None, *, api_key: Optional[str] = None)

Bases: LLM[LocalLLMOptions]

Class for interaction with any LLM available in HuggingFace.

Constructs a new local LLM instance.

PARAMETER DESCRIPTION
model_name

Name of the model to use. This should be a model from the CausalLM class.

TYPE: str

default_options

Default options for the LLM.

TYPE: Optional[LocalLLMOptions] DEFAULT: None

api_key

The API key for Hugging Face authentication.

TYPE: Optional[str] DEFAULT: None

Source code in src/dbally/llms/local.py
def __init__(
    self,
    model_name: str,
    default_options: Optional[LocalLLMOptions] = None,
    *,
    api_key: Optional[str] = None,
) -> None:
    """
    Constructs a new local LLM instance.

    Args:
        model_name: Name of the model to use. This should be a model from the CausalLM class.
        default_options: Default options for the LLM.
        api_key: The API key for Hugging Face authentication.
    """

    super().__init__(model_name, default_options)
    self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_key)
    self.api_key = api_key

model_name instance-attribute #

model_name = model_name

default_options instance-attribute #

default_options = default_options or _options_cls()

tokenizer instance-attribute #

tokenizer = from_pretrained(model_name, token=api_key)

api_key instance-attribute #

api_key = api_key

client cached property #

Client for the LLM.

RETURNS DESCRIPTION
LocalLLMClient

The client used to interact with the LLM.

generate_text async #

generate_text(prompt: PromptTemplate, *, event_tracker: Optional[EventTracker] = None, options: Optional[LLMOptions] = None) -> str

Prepares and sends a prompt to the LLM and returns the response.

PARAMETER DESCRIPTION
prompt

Formatted prompt template with conversation and response parsing configuration.

TYPE: PromptTemplate

event_tracker

Event store used to audit the generation process.

TYPE: Optional[EventTracker] DEFAULT: None

options

Options to use for the LLM client.

TYPE: Optional[LLMOptions] DEFAULT: None

RETURNS DESCRIPTION
str

Text response from LLM.

RAISES DESCRIPTION
LLMError

If LLM text generation fails.

Source code in src/dbally/llms/base.py
async def generate_text(
    self,
    prompt: PromptTemplate,
    *,
    event_tracker: Optional[EventTracker] = None,
    options: Optional[LLMOptions] = None,
) -> str:
    """
    Prepares and sends a prompt to the LLM and returns the response.

    Args:
        prompt: Formatted prompt template with conversation and response parsing configuration.
        event_tracker: Event store used to audit the generation process.
        options: Options to use for the LLM client.

    Returns:
        Text response from LLM.

    Raises:
        LLMError: If LLM text generation fails.
    """
    options = (self.default_options | options) if options else self.default_options
    event = LLMEvent(prompt=prompt.chat, type=type(prompt).__name__)
    event_tracker = event_tracker or EventTracker()

    async with event_tracker.track_event(event) as span:
        event.response = await self.client.call(
            conversation=prompt.chat,
            options=options,
            event=event,
            json_mode=prompt.json_mode,
        )
        span(event)

    return event.response

count_tokens #

count_tokens(prompt: PromptTemplate) -> int

Counts tokens in the messages.

PARAMETER DESCRIPTION
prompt

Messages to count tokens for.

TYPE: PromptTemplate

RETURNS DESCRIPTION
int

Number of tokens in the messages.

Source code in src/dbally/llms/local.py
def count_tokens(self, prompt: PromptTemplate) -> int:
    """
    Counts tokens in the messages.

    Args:
        prompt: Messages to count tokens for.

    Returns:
        Number of tokens in the messages.
    """

    input_ids = self.tokenizer.apply_chat_template(prompt.chat)
    return len(input_ids)

dbally.llms.clients.local.LocalLLMClient #

LocalLLMClient(model_name: str, *, hf_api_key: Optional[str] = None)

Bases: LLMClient[LocalLLMOptions]

Client for the local LLM that supports Hugging Face models.

Constructs a new local LLMClient instance.

PARAMETER DESCRIPTION
model_name

Name of the model to use.

TYPE: str

hf_api_key

The Hugging Face API key for authentication.

TYPE: Optional[str] DEFAULT: None

Source code in src/dbally/llms/clients/local.py
def __init__(
    self,
    model_name: str,
    *,
    hf_api_key: Optional[str] = None,
) -> None:
    """
    Constructs a new local LLMClient instance.

    Args:
        model_name: Name of the model to use.
        hf_api_key: The Hugging Face API key for authentication.
    """

    super().__init__(model_name)

    self.model = AutoModelForCausalLM.from_pretrained(
        model_name, device_map="auto", torch_dtype=torch.bfloat16, token=hf_api_key
    )
    self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_api_key)

model_name instance-attribute #

model_name = model_name

model instance-attribute #

model = from_pretrained(model_name, device_map='auto', torch_dtype=bfloat16, token=hf_api_key)

tokenizer instance-attribute #

tokenizer = from_pretrained(model_name, token=hf_api_key)

call async #

call(conversation: ChatFormat, options: LocalLLMOptions, event: LLMEvent, json_mode: bool = False) -> str

Makes a call to the local LLM with the provided prompt and options.

PARAMETER DESCRIPTION
conversation

List of dicts with "role" and "content" keys, representing the chat history so far.

TYPE: ChatFormat

options

Additional settings used by the LLM.

TYPE: LocalLLMOptions

event

Container with the prompt, LLM response, and call metrics.

TYPE: LLMEvent

json_mode

Force the response to be in JSON format.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
str

Response string from LLM.

Source code in src/dbally/llms/clients/local.py
async def call(
    self,
    conversation: ChatFormat,
    options: LocalLLMOptions,
    event: LLMEvent,
    json_mode: bool = False,
) -> str:
    """
    Makes a call to the local LLM with the provided prompt and options.

    Args:
        conversation: List of dicts with "role" and "content" keys, representing the chat history so far.
        options: Additional settings used by the LLM.
        event: Container with the prompt, LLM response, and call metrics.
        json_mode: Force the response to be in JSON format.

    Returns:
        Response string from LLM.
    """

    input_ids = self.tokenizer.apply_chat_template(
        conversation, add_generation_prompt=True, return_tensors="pt"
    ).to(self.model.device)

    outputs = self.model.generate(
        input_ids,
        eos_token_id=self.tokenizer.eos_token_id,
        **options.dict(),
    )
    response = outputs[0][input_ids.shape[-1] :]
    event.completion_tokens = len(outputs[0][input_ids.shape[-1] :])
    event.prompt_tokens = len(outputs[0][: input_ids.shape[-1]])
    event.total_tokens = input_ids.shape[-1]
    decoded_response = self.tokenizer.decode(response, skip_special_tokens=True)
    return decoded_response

dbally.llms.clients.local.LocalLLMOptions dataclass #

LocalLLMOptions(repetition_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN, do_sample: Union[Optional[bool], NotGiven] = NOT_GIVEN, best_of: Union[Optional[int], NotGiven] = NOT_GIVEN, max_new_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN, top_k: Union[Optional[int], NotGiven] = NOT_GIVEN, top_p: Union[Optional[float], NotGiven] = NOT_GIVEN, seed: Union[Optional[int], NotGiven] = NOT_GIVEN, stop_sequences: Union[Optional[List[str]], NotGiven] = NOT_GIVEN, temperature: Union[Optional[float], NotGiven] = NOT_GIVEN)

Bases: LLMOptions

Dataclass that represents all available LLM call options for the local LLM client. Each of them is described in the [HuggingFace documentation] (https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation). # pylint: disable=line-too-long

repetition_penalty class-attribute instance-attribute #

repetition_penalty: Union[Optional[float], NotGiven] = NOT_GIVEN

do_sample class-attribute instance-attribute #

do_sample: Union[Optional[bool], NotGiven] = NOT_GIVEN

best_of class-attribute instance-attribute #

best_of: Union[Optional[int], NotGiven] = NOT_GIVEN

max_new_tokens class-attribute instance-attribute #

max_new_tokens: Union[Optional[int], NotGiven] = NOT_GIVEN

top_k class-attribute instance-attribute #

top_k: Union[Optional[int], NotGiven] = NOT_GIVEN

top_p class-attribute instance-attribute #

top_p: Union[Optional[float], NotGiven] = NOT_GIVEN

seed class-attribute instance-attribute #

seed: Union[Optional[int], NotGiven] = NOT_GIVEN

stop_sequences class-attribute instance-attribute #

stop_sequences: Union[Optional[List[str]], NotGiven] = NOT_GIVEN

temperature class-attribute instance-attribute #

temperature: Union[Optional[float], NotGiven] = NOT_GIVEN

dict #

dict() -> Dict[str, Any]

Creates a dictionary representation of the LLMOptions instance. If a value is None, it will be replaced with a provider-specific not-given sentinel.

RETURNS DESCRIPTION
Dict[str, Any]

A dictionary representation of the LLMOptions instance.

Source code in src/dbally/llms/clients/base.py
def dict(self) -> Dict[str, Any]:
    """
    Creates a dictionary representation of the LLMOptions instance.
    If a value is None, it will be replaced with a provider-specific not-given sentinel.

    Returns:
        A dictionary representation of the LLMOptions instance.
    """
    options = asdict(self)
    return {
        key: self._not_given if value is None or isinstance(value, NotGiven) else value
        for key, value in options.items()
    }