Skip to content

Commit dba09f5

Browse files
committed
Centralize error detection logic in pystackql
This commit implements centralized error detection to move error handling logic from external applications (like stackql-deploy) into pystackql itself. Changes: - Add errors.yaml configuration file with error patterns - Fuzzy matches for HTTP 4xx/5xx status codes - Exact matches for error prefixes - StackQL-specific error patterns (disparity, missing operations) - Implement ErrorDetector class (pystackql/core/error_detector.py) - Loads error patterns from errors.yaml at initialization - Supports fuzzy (case-insensitive substring) matching - Supports exact (prefix) matching - Provides is_error() and extract_error_info() methods - Integrate error detection into OutputFormatter - Check raw data strings for error patterns - Check parsed JSON data recursively for errors - Move detected errors to 'error' field instead of 'data' - Return empty list for data when error is detected - Apply detection to both query and statement results - Add PyYAML>=5.4.0 dependency - Updated requirements.txt - Updated pyproject.toml dependencies - Add MANIFEST.in to include errors.yaml in package distribution - Add comprehensive test suite (tests/test_error_detection.py) - Tests for ErrorDetector class - Tests for OutputFormatter integration - Tests for specific homebrew provider 404 error scenario This centralizes error detection so external applications no longer need to parse stdout messages to identify error conditions. When StackQL returns error messages in stdout (instead of stderr), they are now automatically detected and properly formatted as errors.
1 parent ac55a60 commit dba09f5

File tree

7 files changed

+539
-15
lines changed

7 files changed

+539
-15
lines changed

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include pystackql/errors.yaml

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ dependencies = [
3131
"nest-asyncio>=1.5.5",
3232
"termcolor>=1.1.0",
3333
"tqdm>=4.61.0",
34+
"PyYAML>=5.4.0",
3435
]
3536

3637
[tool.setuptools.packages.find]

pystackql/core/error_detector.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
# pystackql/core/error_detector.py
2+
3+
"""
4+
Error detection module for PyStackQL.
5+
6+
This module provides centralized error detection logic that checks messages
7+
against predefined error patterns loaded from errors.yaml.
8+
"""
9+
10+
import os
11+
import yaml
12+
13+
14+
class ErrorDetector:
15+
"""Detects errors in query results based on predefined patterns.
16+
17+
This class loads error patterns from errors.yaml and provides methods
18+
to check if a message contains any of these error patterns.
19+
"""
20+
21+
def __init__(self):
22+
"""Initialize the ErrorDetector by loading error patterns from errors.yaml."""
23+
self.fuzzy_patterns = []
24+
self.exact_patterns = []
25+
self._load_error_patterns()
26+
27+
def _load_error_patterns(self):
28+
"""Load error patterns from the errors.yaml file.
29+
30+
The errors.yaml file should be located in the same directory as this module.
31+
"""
32+
# Get the directory containing the pystackql package
33+
current_dir = os.path.dirname(os.path.abspath(__file__))
34+
package_dir = os.path.dirname(current_dir)
35+
errors_file = os.path.join(package_dir, 'errors.yaml')
36+
37+
try:
38+
if os.path.exists(errors_file):
39+
with open(errors_file, 'r') as f:
40+
error_config = yaml.safe_load(f)
41+
42+
if error_config and 'errors' in error_config:
43+
errors = error_config['errors']
44+
45+
# Load fuzzy match patterns (case-insensitive substring matching)
46+
if 'fuzzy_matches' in errors:
47+
self.fuzzy_patterns = [
48+
pattern.lower()
49+
for pattern in errors['fuzzy_matches']
50+
if pattern
51+
]
52+
53+
# Load exact match patterns (case-sensitive exact/prefix matching)
54+
if 'exact_matches' in errors:
55+
self.exact_patterns = [
56+
pattern
57+
for pattern in errors['exact_matches']
58+
if pattern
59+
]
60+
except Exception as e:
61+
# If we can't load the error patterns, continue with empty lists
62+
# This ensures the module doesn't break existing functionality
63+
print(f"Warning: Could not load error patterns from {errors_file}: {e}")
64+
65+
def is_error(self, message):
66+
"""Check if a message contains any error patterns.
67+
68+
Args:
69+
message (str): The message to check for error patterns
70+
71+
Returns:
72+
bool: True if the message matches any error pattern, False otherwise
73+
"""
74+
if not message or not isinstance(message, str):
75+
return False
76+
77+
message_lower = message.lower()
78+
79+
# Check fuzzy matches (case-insensitive substring matching)
80+
for pattern in self.fuzzy_patterns:
81+
if pattern in message_lower:
82+
return True
83+
84+
# Check exact matches (exact string or starts with prefix)
85+
for pattern in self.exact_patterns:
86+
if message == pattern or message.startswith(pattern):
87+
return True
88+
89+
return False
90+
91+
def extract_error_info(self, message):
92+
"""Extract error information from a message.
93+
94+
Args:
95+
message (str): The error message
96+
97+
Returns:
98+
dict: Dictionary containing error details with 'error' and 'detected_pattern' keys
99+
"""
100+
if not self.is_error(message):
101+
return None
102+
103+
message_lower = message.lower()
104+
detected_pattern = None
105+
106+
# Find which pattern was matched
107+
for pattern in self.fuzzy_patterns:
108+
if pattern in message_lower:
109+
detected_pattern = pattern
110+
break
111+
112+
if not detected_pattern:
113+
for pattern in self.exact_patterns:
114+
if message == pattern or message.startswith(pattern):
115+
detected_pattern = pattern
116+
break
117+
118+
return {
119+
"error": message,
120+
"detected_pattern": detected_pattern
121+
}

pystackql/core/output.py

Lines changed: 76 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import json
1010
from io import StringIO
11+
from .error_detector import ErrorDetector
1112

1213
class OutputFormatter:
1314
"""Formats query results into different output formats.
@@ -18,18 +19,19 @@ class OutputFormatter:
1819

1920
def __init__(self, output_format='dict'):
2021
"""Initialize the OutputFormatter.
21-
22+
2223
Args:
2324
output_format (str, optional): The output format. Defaults to 'dict'.
2425
Allowed values: 'dict', 'pandas', 'csv'
25-
26+
2627
Raises:
2728
ValueError: If an invalid output format is specified
2829
"""
2930
ALLOWED_OUTPUTS = {'dict', 'pandas', 'csv'}
3031
if output_format.lower() not in ALLOWED_OUTPUTS:
3132
raise ValueError(f"Invalid output format. Expected one of {ALLOWED_OUTPUTS}, got {output_format}.")
3233
self.output_format = output_format.lower()
34+
self.error_detector = ErrorDetector()
3335

3436
def format_query_result(self, result, suppress_errors=True):
3537
"""Format a query result.
@@ -95,21 +97,32 @@ def _format_error(self, error_msg):
9597

9698
def _format_data(self, data):
9799
"""Format data.
98-
100+
99101
This method processes SQL type objects from StackQL:
100102
- SQL NULL values: {'String': '', 'Valid': False} → None
101103
- Regular values: {'String': 'value', 'Valid': True} → 'value'
102104
- Empty strings: {'String': '', 'Valid': True} → '' (preserved as empty string)
103-
105+
106+
Additionally, this method checks for error patterns in the data and
107+
converts them to proper error responses.
108+
104109
Args:
105110
data (str): The data string
106-
111+
107112
Returns:
108113
The formatted data in the specified output format
109114
"""
110115
if self.output_format == 'csv':
116+
# For CSV, check if the raw data contains error patterns
117+
if self.error_detector.is_error(data):
118+
return data # Return the error message as-is for CSV
111119
return data
112-
120+
121+
# Check if the raw data string itself is an error message (before JSON parsing)
122+
if isinstance(data, str) and self.error_detector.is_error(data):
123+
# The entire response is an error message
124+
return self._format_error(data)
125+
113126
try:
114127
# Attempt to parse JSON first
115128
raw_json_data = json.loads(data)
@@ -129,27 +142,70 @@ def _format_data(self, data):
129142
try:
130143
# Process the JSON data to clean up SQL type objects
131144
processed_json_data = self._process_sql_types(raw_json_data)
132-
145+
133146
# Handle empty data
134147
if not processed_json_data:
135148
return pd.DataFrame() if self.output_format == 'pandas' else []
136-
149+
150+
# Check if the processed data contains error patterns
151+
# This handles cases where StackQL returns error messages in structured data
152+
detected_error = self._check_data_for_errors(processed_json_data)
153+
if detected_error:
154+
return self._format_error(detected_error)
155+
137156
if self.output_format == 'pandas':
138157
import pandas as pd
139158
# Convert the preprocessed JSON data to a DataFrame
140159
return pd.DataFrame(processed_json_data)
141-
160+
142161
# Return the preprocessed dictionary data
143162
return processed_json_data
144-
163+
145164
except Exception as e:
146165
# Handle any errors during processing
147166
error_msg = f"Error processing data: {str(e)}"
148167
if self.output_format == 'pandas':
149168
import pandas as pd
150169
return pd.DataFrame([{"error": error_msg}])
151170
return [{"error": error_msg}]
152-
171+
172+
def _check_data_for_errors(self, data):
173+
"""Check if processed data contains error patterns.
174+
175+
This method recursively checks all string values in the data structure
176+
to detect error patterns that might have been returned as valid data.
177+
178+
Args:
179+
data: The processed data (list, dict, or primitive type)
180+
181+
Returns:
182+
str: The error message if an error pattern is detected, None otherwise
183+
"""
184+
if isinstance(data, list):
185+
# Check each item in the list
186+
for item in data:
187+
error = self._check_data_for_errors(item)
188+
if error:
189+
return error
190+
191+
elif isinstance(data, dict):
192+
# Check each value in the dictionary
193+
for key, value in data.items():
194+
# Check string values for error patterns
195+
if isinstance(value, str) and self.error_detector.is_error(value):
196+
return value
197+
# Recursively check nested structures
198+
error = self._check_data_for_errors(value)
199+
if error:
200+
return error
201+
202+
elif isinstance(data, str):
203+
# Check if the string itself is an error
204+
if self.error_detector.is_error(data):
205+
return data
206+
207+
return None
208+
153209
def _process_sql_types(self, data):
154210
"""Process SQL type objects in the data.
155211
@@ -203,21 +259,26 @@ def _format_empty(self):
203259

204260
def format_statement_result(self, result):
205261
"""Format a statement result.
206-
262+
207263
Args:
208264
result (dict): The raw statement result from the executor
209-
265+
210266
Returns:
211267
The formatted result in the specified output format
212268
"""
213269
# Handle exceptions
214270
if "exception" in result:
215271
exception_msg = result["exception"]
216272
return self._format_exception(exception_msg)
217-
273+
218274
# Message on stderr or empty message
219275
message = result.get("error", "")
220-
276+
277+
# Check if the message contains error patterns
278+
if message and self.error_detector.is_error(message):
279+
# Return as error instead of as a regular message
280+
return self._format_error(message)
281+
221282
if self.output_format == 'pandas':
222283
import pandas as pd
223284
return pd.DataFrame({'message': [message]}) if message else pd.DataFrame({'message': []})

pystackql/errors.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Error patterns for centralized error detection in PyStackQL
2+
#
3+
# This file defines patterns that should be detected as errors when they appear
4+
# in query results. These patterns are checked against messages returned in stdout
5+
# to identify error conditions that would otherwise be treated as valid data.
6+
#
7+
# Pattern Types:
8+
# - fuzzy_matches: Substring matching (case-insensitive)
9+
# - exact_matches: Exact string matching (case-sensitive)
10+
11+
errors:
12+
# Fuzzy matches - will match if the pattern appears anywhere in the message
13+
fuzzy_matches:
14+
# HTTP error status codes (4xx client errors, 5xx server errors)
15+
- "http response status code: 4"
16+
- "http response status code: 5"
17+
18+
# StackQL-specific error patterns from stackql-deploy
19+
- "disparity in fields"
20+
- "cannot find matching operation"
21+
22+
# Additional StackQL error patterns
23+
- "invalid query"
24+
- "syntax error"
25+
26+
# Exact matches - must match the entire message or start with this prefix
27+
exact_matches:
28+
- "error:"
29+
- "ERROR:"
30+
- "Error:"
31+
- "FAILED"
32+
- "FAILURE"

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ pandas>=1.3.0
33
requests>=2.25.0
44
IPython>=7.0.0
55
termcolor>=1.1.0
6+
PyYAML>=5.4.0
67

78
# Documentation
89
sphinx>=4.0.0

0 commit comments

Comments
 (0)