Skip to content

Commit 59af4df

Browse files
authored
Add llama-cli example to llama.cpp snippets (#1889)
Add `llama-cli` command alongside `llama-server` for all installation methods. ## Changes - Add `llama-cli` snippet for direct terminal inference - Update `llama-server` comment to mention web UI - Build from source now compiles both targets <img width="3786" height="1702" alt="image" src="https://github.com/user-attachments/assets/305f7a8f-110b-4688-8d58-e18b8f731d25" />
1 parent 3f8ca9a commit 59af4df

File tree

2 files changed

+26
-11
lines changed

2 files changed

+26
-11
lines changed

packages/tasks/src/local-apps.spec.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,12 @@ describe("local-apps", () => {
1212
};
1313
const snippet = snippetFunc(model);
1414

15-
expect(snippet[0].content).toEqual(`# Load and run the model:
16-
llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`);
15+
expect(snippet[0].content).toEqual([
16+
`# Start a local OpenAI-compatible server with a web UI:
17+
llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`,
18+
`# Run inference directly in the terminal:
19+
llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`,
20+
]);
1721
});
1822

1923
it("llama.cpp non-conversational", async () => {
@@ -25,8 +29,12 @@ llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:{{QUANT_TAG}}`);
2529
};
2630
const snippet = snippetFunc(model);
2731

28-
expect(snippet[0].content).toEqual(`# Load and run the model:
29-
llama-server -hf mlabonne/gemma-2b-GGUF:{{QUANT_TAG}}`);
32+
expect(snippet[0].content).toEqual([
33+
`# Start a local OpenAI-compatible server with a web UI:
34+
llama-server -hf mlabonne/gemma-2b-GGUF:{{QUANT_TAG}}`,
35+
`# Run inference directly in the terminal:
36+
llama-cli -hf mlabonne/gemma-2b-GGUF:{{QUANT_TAG}}`,
37+
]);
3038
});
3139

3240
it("vLLM conversational llm", async () => {

packages/tasks/src/local-apps.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -110,20 +110,27 @@ function getQuantTag(filepath?: string): string {
110110
}
111111

112112
const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
113-
const command = (binary: string) => {
114-
const snippet = ["# Load and run the model:", `${binary} -hf ${model.id}${getQuantTag(filepath)}`];
113+
const serverCommand = (binary: string) => {
114+
const snippet = [
115+
"# Start a local OpenAI-compatible server with a web UI:",
116+
`${binary} -hf ${model.id}${getQuantTag(filepath)}`,
117+
];
118+
return snippet.join("\n");
119+
};
120+
const cliCommand = (binary: string) => {
121+
const snippet = ["# Run inference directly in the terminal:", `${binary} -hf ${model.id}${getQuantTag(filepath)}`];
115122
return snippet.join("\n");
116123
};
117124
return [
118125
{
119126
title: "Install from brew",
120127
setup: "brew install llama.cpp",
121-
content: command("llama-server"),
128+
content: [serverCommand("llama-server"), cliCommand("llama-cli")],
122129
},
123130
{
124131
title: "Install from WinGet (Windows)",
125132
setup: "winget install llama.cpp",
126-
content: command("llama-server"),
133+
content: [serverCommand("llama-server"), cliCommand("llama-cli")],
127134
},
128135
{
129136
title: "Use pre-built binary",
@@ -132,17 +139,17 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[]
132139
"# Download pre-built binary from:",
133140
"# https://github.com/ggerganov/llama.cpp/releases",
134141
].join("\n"),
135-
content: command("./llama-server"),
142+
content: [serverCommand("./llama-server"), cliCommand("./llama-cli")],
136143
},
137144
{
138145
title: "Build from source code",
139146
setup: [
140147
"git clone https://github.com/ggerganov/llama.cpp.git",
141148
"cd llama.cpp",
142149
"cmake -B build",
143-
"cmake --build build -j --target llama-server",
150+
"cmake --build build -j --target llama-server llama-cli",
144151
].join("\n"),
145-
content: command("./build/bin/llama-server"),
152+
content: [serverCommand("./build/bin/llama-server"), cliCommand("./build/bin/llama-cli")],
146153
},
147154
];
148155
};

0 commit comments

Comments
 (0)