examples - ggml-org/llama.cpp

LLM inference in C/C++

	batched
	batched-bench
	batched.swift
	convert-llama2c-to-ggml
	cvector-generator
	deprecation-warning
	embedding
	eval-callback
	export-lora
	gbnf-validator
	gen-docs
	gguf
	gguf-hash
	gguf-split
	gritlm
	imatrix
	infill
	jeopardy
	llama-bench
	llama.android
	llama.swiftui
	llava
	lookahead
	lookup
	main
	main-cmake-pkg
	parallel
	passkey
	perplexity
	quantize
	quantize-stats
	retrieval
	rpc
	run
	save-load-state
	server
	simple
	simple-chat
	speculative
	speculative-simple
	sycl
	tokenize
	chat-13B.bat	2.4 KB
	chat-13B.sh	1.3 KB
	chat-persistent.sh	4.8 KB
	chat-vicuna.sh	1.3 KB
	chat.sh	349 B
	CMakeLists.txt	1.6 KB
	convert_legacy_llama.py	59.6 KB
	json_schema_pydantic_example.py	3.1 KB
	json_schema_to_grammar.py	32.9 KB
	llama.vim	25.7 KB
	llm.vim	921 B
	Miku.sh	2.6 KB
	pydantic_models_to_grammar_examples.py	13.4 KB
	pydantic_models_to_grammar.py	54.9 KB
	reason-act.sh	355 B
	regex_to_grammar.py	431 B
	server_embd.py	971 B
	server-llama2-13B.sh	790 B
	ts-type-to-grammar.sh	920 B