Skip to content

Commit 52d089e

Browse files
committed
Add Markdown-KV output format and bump version to 3.8.2
This commit adds a new output format optimized for LLM understanding and updates the package version to 3.8.2. New Feature: Markdown-KV Output Format - Add 'markdownkv' as a new output format option - Optimized for LLM understanding (60.7% accuracy vs 44.3% for CSV) - Based on research: https://www.empiricalagents.com/blog/which-table-format-do-llms-understand-best - Hierarchical structure with markdown headers and code blocks - Ideal for RAG pipelines and AI systems processing tabular data Implementation: - Update OutputFormatter class to support markdownkv - Add _format_markdownkv() for query results - Add _format_markdownkv_error() for error formatting - Add _format_markdownkv_statement() for statement results - Format: "# Query Results" + "## Record N" + code blocks with key: value pairs - Update StackQL class for server mode support - Handle markdownkv in execute() for queries - Handle markdownkv in executeStmt() for statements - Add comprehensive test suite - tests/test_markdownkv_format.py - Tests for simple data, null values, errors, statements - Tests for LLM-friendly structure validation - Tests for server mode compatibility Version & Documentation: - Bump version from 3.8.1 to 3.8.2 in pyproject.toml - Update CHANGELOG.md with: - Centralized error detection feature - Markdown-KV output format feature - New dependencies (PyYAML) - New test suites This release includes both the error detection feature (previous commits) and the new Markdown-KV format, making pystackql more powerful for AI/LLM use cases and production deployments.
1 parent 25c8041 commit 52d089e

File tree

5 files changed

+354
-8
lines changed

5 files changed

+354
-8
lines changed

CHANGELOG.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,35 @@
11
# Changelog
22

3+
## v3.8.2 (2025-11-09)
4+
5+
### New Features
6+
7+
- **Centralized Error Detection**: Added centralized error detection system with configurable patterns
8+
- New `errors.yaml` configuration file with error patterns
9+
- Supports three pattern types: fuzzy matches, exact matches, and regex matches
10+
- Automatically detects errors in stdout and moves them to error field
11+
- Eliminates need for external applications to parse error messages
12+
- Includes patterns for HTTP 4xx/5xx errors, DNS failures, connection errors, and timeouts
13+
- Added `ErrorDetector` class for pattern-based error detection
14+
15+
- **Markdown-KV Output Format**: Added `markdownkv` output format optimized for LLM understanding
16+
- Based on research showing 60.7% LLM accuracy vs 44.3% for CSV
17+
- Ideal for RAG pipelines and AI-based systems processing tabular data
18+
- Hierarchical structure with markdown headers and code blocks
19+
- Supported in both local and server modes
20+
- Reference: [Which Table Format Do LLMs Understand Best?](https://www.empiricalagents.com/blog/which-table-format-do-llms-understand-best)
21+
22+
### Dependencies
23+
24+
- Added `PyYAML>=5.4.0` for error pattern configuration
25+
26+
### Testing
27+
28+
- Added comprehensive test suite for error detection (`tests/test_error_detection.py`)
29+
- Added test suite for Markdown-KV format (`tests/test_markdownkv_format.py`)
30+
- Tests for regex pattern matching, DNS errors, connection errors, and timeouts
31+
- Tests for LLM-friendly data formatting
32+
333
## v3.8.1 (2025-06-25)
434

535
### Updates

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "pystackql"
7-
version = "3.8.1"
7+
version = "3.8.2"
88
description = "A Python interface for StackQL"
99
readme = "README.rst"
1010
authors = [

pystackql/core/output.py

Lines changed: 97 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def __init__(self, output_format='dict'):
2727
Raises:
2828
ValueError: If an invalid output format is specified
2929
"""
30-
ALLOWED_OUTPUTS = {'dict', 'pandas', 'csv'}
30+
ALLOWED_OUTPUTS = {'dict', 'pandas', 'csv', 'markdownkv'}
3131
if output_format.lower() not in ALLOWED_OUTPUTS:
3232
raise ValueError(f"Invalid output format. Expected one of {ALLOWED_OUTPUTS}, got {output_format}.")
3333
self.output_format = output_format.lower()
@@ -63,10 +63,10 @@ def format_query_result(self, result, suppress_errors=True):
6363

6464
def _format_exception(self, exception_msg):
6565
"""Format an exception message.
66-
66+
6767
Args:
6868
exception_msg (str): The exception message
69-
69+
7070
Returns:
7171
The formatted exception in the specified output format
7272
"""
@@ -75,15 +75,17 @@ def _format_exception(self, exception_msg):
7575
return pd.DataFrame({'error': [exception_msg]}) if exception_msg else pd.DataFrame({'error': []})
7676
elif self.output_format == 'csv':
7777
return exception_msg
78+
elif self.output_format == 'markdownkv':
79+
return self._format_markdownkv_error(exception_msg)
7880
else: # dict
7981
return [{"error": exception_msg}]
8082

8183
def _format_error(self, error_msg):
8284
"""Format an error message.
83-
85+
8486
Args:
8587
error_msg (str): The error message
86-
88+
8789
Returns:
8890
The formatted error in the specified output format
8991
"""
@@ -92,6 +94,8 @@ def _format_error(self, error_msg):
9294
return pd.DataFrame({'error': [error_msg]}) if error_msg else pd.DataFrame({'error': []})
9395
elif self.output_format == 'csv':
9496
return error_msg
97+
elif self.output_format == 'markdownkv':
98+
return self._format_markdownkv_error(error_msg)
9599
else: # dict
96100
return [{"error": error_msg}]
97101

@@ -118,6 +122,11 @@ def _format_data(self, data):
118122
return data # Return the error message as-is for CSV
119123
return data
120124

125+
if self.output_format == 'markdownkv':
126+
# For markdownkv, check for errors before parsing
127+
if isinstance(data, str) and self.error_detector.is_error(data):
128+
return self._format_markdownkv_error(data)
129+
121130
# Check if the raw data string itself is an error message (before JSON parsing)
122131
if isinstance(data, str) and self.error_detector.is_error(data):
123132
# The entire response is an error message
@@ -157,6 +166,8 @@ def _format_data(self, data):
157166
import pandas as pd
158167
# Convert the preprocessed JSON data to a DataFrame
159168
return pd.DataFrame(processed_json_data)
169+
elif self.output_format == 'markdownkv':
170+
return self._format_markdownkv(processed_json_data)
160171

161172
# Return the preprocessed dictionary data
162173
return processed_json_data
@@ -245,7 +256,7 @@ def _process_sql_types(self, data):
245256

246257
def _format_empty(self):
247258
"""Format an empty result.
248-
259+
249260
Returns:
250261
An empty result in the specified output format
251262
"""
@@ -254,8 +265,64 @@ def _format_empty(self):
254265
return pd.DataFrame()
255266
elif self.output_format == 'csv':
256267
return ""
268+
elif self.output_format == 'markdownkv':
269+
return "# Query Results\n\nNo records found.\n"
257270
else: # dict
258271
return []
272+
273+
def _format_markdownkv(self, data):
274+
"""Format data as Markdown Key-Value pairs.
275+
276+
This format is optimized for LLM understanding based on research showing
277+
it achieves 60.7% accuracy vs 44.3% for CSV when LLMs process tabular data.
278+
279+
Args:
280+
data: The processed data (list of dicts)
281+
282+
Returns:
283+
str: Markdown-formatted key-value representation
284+
"""
285+
if not data:
286+
return "# Query Results\n\nNo records found.\n"
287+
288+
# Handle single dict (convert to list for consistency)
289+
if isinstance(data, dict):
290+
data = [data]
291+
292+
output = ["# Query Results\n"]
293+
294+
for idx, record in enumerate(data, 1):
295+
output.append(f"## Record {idx}\n")
296+
output.append("```")
297+
298+
# Format each key-value pair
299+
for key, value in record.items():
300+
# Handle None values
301+
if value is None:
302+
value = "null"
303+
output.append(f"{key}: {value}")
304+
305+
output.append("```\n")
306+
307+
return "\n".join(output)
308+
309+
def _format_markdownkv_error(self, error_msg):
310+
"""Format an error message in Markdown-KV style.
311+
312+
Args:
313+
error_msg (str): The error message
314+
315+
Returns:
316+
str: Markdown-formatted error
317+
"""
318+
return f"""# Query Results
319+
320+
## Error
321+
322+
```
323+
error: {error_msg}
324+
```
325+
"""
259326

260327
def format_statement_result(self, result):
261328
"""Format a statement result.
@@ -284,5 +351,28 @@ def format_statement_result(self, result):
284351
return pd.DataFrame({'message': [message]}) if message else pd.DataFrame({'message': []})
285352
elif self.output_format == 'csv':
286353
return message
354+
elif self.output_format == 'markdownkv':
355+
return self._format_markdownkv_statement(message)
287356
else: # dict
288-
return {'message': message.rstrip('\n')}
357+
return {'message': message.rstrip('\n')}
358+
359+
def _format_markdownkv_statement(self, message):
360+
"""Format a statement result message in Markdown-KV style.
361+
362+
Args:
363+
message (str): The statement result message
364+
365+
Returns:
366+
str: Markdown-formatted statement result
367+
"""
368+
if not message:
369+
return "# Statement Result\n\nNo message returned.\n"
370+
371+
return f"""# Statement Result
372+
373+
## Result
374+
375+
```
376+
message: {message.rstrip()}
377+
```
378+
"""

pystackql/core/stackql.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,12 @@ def executeStmt(self, query, custom_auth=None, env_vars=None, **kwargs):
310310
elif output_format == 'csv':
311311
# Return the string representation of the result
312312
return result[0]['message']
313+
elif output_format == 'markdownkv':
314+
from .output import OutputFormatter
315+
temp_formatter = OutputFormatter('markdownkv')
316+
# Extract message from result
317+
message = result[0].get('message', '') if result else ''
318+
return temp_formatter._format_markdownkv_statement(message)
313319
else:
314320
return result
315321
else:
@@ -392,6 +398,10 @@ def execute(self, query, suppress_errors=True, custom_auth=None, env_vars=None,
392398
return pd.read_json(StringIO(json_str))
393399
elif output_format == 'csv':
394400
raise ValueError("CSV output is not supported in server_mode.")
401+
elif output_format == 'markdownkv':
402+
from .output import OutputFormatter
403+
temp_formatter = OutputFormatter('markdownkv')
404+
return temp_formatter._format_markdownkv(result)
395405
else: # Assume 'dict' output
396406
return result
397407
else:

0 commit comments

Comments
 (0)