Blame: llama_cpp/_llava.py - abetlen/llama-cpp-python

Python bindings for llama.cpp

0 0 0 Python

Add paligemma server support 2024-10-02 04:50:18 -04:00			`from __future__ import annotations`

			`import os`
			`import ctypes`
			`import typing`
			`import contextlib`

			`import numpy as np`

			`import llama_cpp`
			`import llama_cpp.llava_cpp as llava_cpp`


			`class LlavaEmbedding:`
			`def __init__(self, embedding: ctypes._Pointer[llava_cpp.llava_image_embed]):`
			`self._embedding = embedding`
			`self._exit_stack = contextlib.ExitStack()`

			`def llava_image_embed_free():`
			`llava_cpp.llava_image_embed_free(self._embedding)`

			`self._exit_stack.callback(llava_image_embed_free)`

			`@property`
			`def n_image_pos(self) -> int:`
			`return self._embedding.contents.n_image_pos`

			`def embed(`
			`self, llama_ctx: llama_cpp.llama_context_p, n_tokens: int, n_batch: int`
			`) -> int:`
			`n_past = ctypes.c_int(n_tokens)`
			`n_past_p = ctypes.pointer(n_past)`
			`llava_cpp.llava_eval_image_embed(`
			`llama_ctx,`
			`self._embedding,`
			`n_batch,`
			`n_past_p,`
			`)`
			`return n_past.value`

			`def numpy_view(self, shape: typing.Tuple[int, int]) -> np.ndarray:`
			`return np.ctypeslib.as_array(`
			`self._embedding.contents.embed, shape=shape`
			`)`


			`class LlavaModel:`
			`def __init__(self, path: str, n_threads: int = 1):`
			`self._path = path`
			`self._n_threads = n_threads`
			`self._exit_stack = contextlib.ExitStack()`

			`if not os.path.exists(self._path):`
			`raise ValueError(f"Clip model path does not exist: {self._path}")`

			`clip_ctx = llava_cpp.clip_model_load(self._path.encode(), 0)`

			`if clip_ctx is None:`
			`raise ValueError(f"Failed to load clip model: {self._path}")`

			`self._clip_ctx = clip_ctx`

			`def clip_free():`
			`llava_cpp.clip_free(self._clip_ctx)`
			`print("Clip model freed")`

			`self._exit_stack.callback(clip_free)`

			`def embed_bytes(self, image_bytes: bytes):`
			`embed = llava_cpp.llava_image_embed_make_with_bytes(`
			`self._clip_ctx,`
			`self._n_threads,`
			`(ctypes.c_uint8 * len(image_bytes)).from_buffer(bytearray(image_bytes)),`
			`len(image_bytes),`
			`)`
			`return LlavaEmbedding(embed)`