Skip to content

Commit b9ce635

Browse files
committed
org and llms
1 parent d1a0e1c commit b9ce635

File tree

2 files changed

+78
-87
lines changed

2 files changed

+78
-87
lines changed

scripts/release.sh

Lines changed: 6 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,6 @@
11
#!/bin/bash
22
set -e
33

4-
# Check if version type is provided
5-
if [ -z "$1" ]; then
6-
echo "Please provide version type: major, minor, or patch"
7-
exit 1
8-
fi
9-
10-
VERSION_TYPE=$1
11-
12-
# Store the original version before any changes
13-
ORIGINAL_VERSION=$(poetry version -s)
14-
15-
# Function to rollback changes
16-
rollback() {
17-
echo "Error occurred. Rolling back changes..."
18-
# Reset version in pyproject.toml
19-
poetry version "$ORIGINAL_VERSION"
20-
# Remove local tag if it exists
21-
if git tag | grep -q "v${NEW_VERSION}"; then
22-
git tag -d "v${NEW_VERSION}"
23-
fi
24-
# Reset any changes in git
25-
git reset --hard HEAD^
26-
echo "Rollback complete. Version restored to ${ORIGINAL_VERSION}"
27-
exit 1
28-
}
29-
30-
# Set up trap to catch errors
31-
trap rollback ERR
32-
334
# Ensure we're on main branch
345
current_branch=$(git branch --show-current)
356
if [ "$current_branch" != "main" ]; then
@@ -43,34 +14,13 @@ if [ -n "$(git status --porcelain)" ]; then
4314
exit 1
4415
fi
4516

46-
# Pull latest changes
47-
git pull origin main
48-
49-
# Bump version using poetry
50-
echo "Bumping $VERSION_TYPE version..."
51-
poetry version $VERSION_TYPE
52-
NEW_VERSION=$(poetry version -s)
53-
54-
# Update files
55-
git add pyproject.toml
56-
57-
# Commit version bump
58-
git commit -m "chore: bump version to ${NEW_VERSION}"
59-
60-
# Push commit
61-
git push origin main
62-
63-
# Create and push only the new tag
64-
git tag "v${NEW_VERSION}"
65-
git push origin "v${NEW_VERSION}"
17+
# Get the latest tag
18+
LATEST_TAG=$(git describe --tags --abbrev=0)
6619

6720
# Create GitHub release
68-
echo "Creating GitHub release..."
69-
gh release create "v${NEW_VERSION}" \
70-
--title "Release v${NEW_VERSION}" \
21+
echo "Creating GitHub release for ${LATEST_TAG}..."
22+
gh release create "${LATEST_TAG}" \
23+
--title "Release ${LATEST_TAG}" \
7124
--generate-notes
7225

73-
echo "Released v${NEW_VERSION} successfully!"
74-
75-
# Remove the trap since we succeeded
76-
trap - ERR
26+
echo "Released ${LATEST_TAG} successfully!"

src/inferencesh/models/llm.py

Lines changed: 72 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,13 @@ def stats(self):
139139

140140
def build_messages(
141141
input_data: LLMInput,
142-
message_modifier: Optional[Callable[[List[Dict[str, Any]]], List[Dict[str, Any]]]] = None
142+
transform_user_message: Optional[Callable[[str], str]] = None
143143
) -> List[Dict[str, Any]]:
144144
"""Build messages for LLaMA.cpp chat completion.
145145
146146
Args:
147147
input_data: The input data
148-
message_modifier: Optional function to modify messages before returning (e.g. add /think)
148+
transform_user_message: Optional function to transform user message text before building messages
149149
"""
150150
messages = [
151151
{
@@ -157,8 +157,11 @@ def build_messages(
157157
# Add context messages
158158
for msg in input_data.context:
159159
message_content = []
160-
if msg.text:
161-
message_content.append({"type": "text", "text": msg.text})
160+
text = msg.text
161+
if transform_user_message and msg.role == ContextMessageRole.USER:
162+
text = transform_user_message(text)
163+
if text:
164+
message_content.append({"type": "text", "text": text})
162165
if hasattr(msg, 'image') and msg.image:
163166
if msg.image.path:
164167
message_content.append({"type": "image_url", "image_url": {"url": msg.image.path}})
@@ -171,18 +174,18 @@ def build_messages(
171174

172175
# Add user message
173176
user_content = []
174-
if input_data.text:
175-
user_content.append({"type": "text", "text": input_data.text})
177+
text = input_data.text
178+
if transform_user_message:
179+
text = transform_user_message(text)
180+
if text:
181+
user_content.append({"type": "text", "text": text})
176182
if hasattr(input_data, 'image') and input_data.image:
177183
if input_data.image.path:
178184
user_content.append({"type": "image_url", "image_url": {"url": input_data.image.path}})
179185
elif input_data.image.uri:
180186
user_content.append({"type": "image_url", "image_url": {"url": input_data.image.uri}})
181187
messages.append({"role": "user", "content": user_content})
182188

183-
if message_modifier:
184-
messages = message_modifier(messages)
185-
186189
return messages
187190

188191

@@ -195,8 +198,21 @@ def stream_generate(
195198
max_tokens: int = 4096,
196199
stop: Optional[List[str]] = None,
197200
handle_thinking: bool = False,
201+
transform_response: Optional[Callable[[str, str], tuple[str, LLMOutput]]] = None,
198202
) -> Generator[LLMOutput, None, None]:
199-
"""Stream generate from LLaMA.cpp model with timing and usage tracking."""
203+
"""Stream generate from LLaMA.cpp model with timing and usage tracking.
204+
205+
Args:
206+
model: The LLaMA.cpp model instance
207+
messages: List of messages to send to the model
208+
output_cls: Output class type to use for responses
209+
temperature: Sampling temperature
210+
top_p: Top-p sampling threshold
211+
max_tokens: Maximum tokens to generate
212+
stop: Optional list of stop sequences
213+
handle_thinking: Whether to handle thinking tags
214+
transform_response: Optional function to transform responses, takes (piece, buffer) and returns (new_buffer, output)
215+
"""
200216
response_queue: Queue[Optional[tuple[str, dict]]] = Queue()
201217
thread_exception = None
202218
usage_stats = {
@@ -279,32 +295,57 @@ def generation_thread():
279295
completion_tokens=usage_stats["completion_tokens"],
280296
total_tokens=usage_stats["total_tokens"]
281297
)
282-
yield output_cls(
283-
response=buffer.strip(),
284-
thinking_content=thinking_content.strip() if thinking_content else None,
285-
usage=usage
286-
)
298+
299+
if transform_response:
300+
buffer, output = transform_response(piece or "", buffer)
301+
output.usage = usage
302+
yield output
303+
else:
304+
# Handle thinking vs response content if enabled
305+
if handle_thinking and "</think>" in piece:
306+
parts = piece.split("</think>")
307+
if in_thinking:
308+
thinking_content += parts[0].replace("<think>", "")
309+
buffer = parts[1] if len(parts) > 1 else ""
310+
in_thinking = False
311+
else:
312+
buffer += piece
313+
else:
314+
if in_thinking:
315+
thinking_content += piece.replace("<think>", "")
316+
else:
317+
buffer += piece
318+
319+
yield output_cls(
320+
response=buffer.strip(),
321+
thinking_content=thinking_content.strip() if thinking_content else None,
322+
usage=usage
323+
)
287324
break
288325

289-
# Handle thinking vs response content if enabled
290-
if handle_thinking and "</think>" in piece:
291-
parts = piece.split("</think>")
292-
if in_thinking:
293-
thinking_content += parts[0].replace("<think>", "")
294-
buffer = parts[1] if len(parts) > 1 else ""
295-
in_thinking = False
296-
else:
297-
buffer += piece
326+
if transform_response:
327+
buffer, output = transform_response(piece, buffer)
328+
yield output
298329
else:
299-
if in_thinking:
300-
thinking_content += piece.replace("<think>", "")
330+
# Handle thinking vs response content if enabled
331+
if handle_thinking and "</think>" in piece:
332+
parts = piece.split("</think>")
333+
if in_thinking:
334+
thinking_content += parts[0].replace("<think>", "")
335+
buffer = parts[1] if len(parts) > 1 else ""
336+
in_thinking = False
337+
else:
338+
buffer += piece
301339
else:
302-
buffer += piece
340+
if in_thinking:
341+
thinking_content += piece.replace("<think>", "")
342+
else:
343+
buffer += piece
303344

304-
yield output_cls(
305-
response=buffer.strip(),
306-
thinking_content=thinking_content.strip() if thinking_content else None
307-
)
345+
yield output_cls(
346+
response=buffer.strip(),
347+
thinking_content=thinking_content.strip() if thinking_content else None
348+
)
308349

309350
except Exception as e:
310351
if thread_exception and isinstance(e, thread_exception.__class__):

0 commit comments

Comments
 (0)