Skip to content

Commit 4f129aa

Browse files
SNOW-295953: DML stats (#2625)
1 parent 465e2c3 commit 4f129aa

File tree

5 files changed

+1400
-1
lines changed

5 files changed

+1400
-1
lines changed

DESCRIPTION.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
99
# Release Notes
1010
- v4.2.0(TBD)
1111
- Added support for async I/O. Asynchronous version of connector is available via `snowflake.connector.aio` module.
12+
- Added `SnowflakeCursor.stats` property to expose granular DML statistics (rows inserted, deleted, updated, and duplicates) for operations like CTAS where `rowcount` is insufficient.
13+
1214
- v4.1.1(TBD)
1315
- Relaxed pandas dependency requirements for Python below 3.12.
1416
- Changed CRL cache cleanup background task to daemon to avoid blocking main thread.

src/snowflake/connector/aio/_cursor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,10 @@ async def _init_result_and_meta(self, data: dict[Any, Any]) -> None:
378378
self._rownumber = -1
379379
self._result_state = ResultState.VALID
380380

381+
# Extract stats object if available (for DML operations like CTAS, INSERT, UPDATE, DELETE)
382+
self._stats_data = data.get("stats", None)
383+
logger.debug("Execution DML stats: %s", self.stats)
384+
381385
# don't update the row count when the result is returned from `describe` method
382386
if is_dml and "rowset" in data and len(data["rowset"]) > 0:
383387
updated_rows = 0

src/snowflake/connector/cursor.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,10 @@ def __init__(
418418
self._log_max_query_length = connection.log_max_query_length
419419
self._inner_cursor: SnowflakeCursorBase | None = None
420420
self._prefetch_hook = None
421+
self._stats_data: dict[str, int] | None = (
422+
None # Stores stats from response for DML operations
423+
)
424+
421425
self._rownumber: int | None = None
422426

423427
self.reset()
@@ -454,6 +458,23 @@ def _description_internal(self) -> list[ResultMetadataV2]:
454458
def rowcount(self) -> int | None:
455459
return self._total_rowcount if self._total_rowcount >= 0 else None
456460

461+
@property
462+
def stats(self) -> QueryResultStats | None:
463+
"""Returns detailed rows affected statistics for DML operations.
464+
465+
Returns a NamedTuple with fields:
466+
- num_rows_inserted: Number of rows inserted
467+
- num_rows_deleted: Number of rows deleted
468+
- num_rows_updated: Number of rows updated
469+
- num_dml_duplicates: Number of duplicates in DML statement
470+
471+
Returns None on each position if no DML stats are available - this includes DML operations where no rows were
472+
affected as well as other type of SQL statements (e.g. DDL, DQL).
473+
"""
474+
if self._stats_data is None:
475+
return QueryResultStats(None, None, None, None)
476+
return QueryResultStats.from_dict(self._stats_data)
477+
457478
@property
458479
def rownumber(self) -> int | None:
459480
return self._rownumber if self._rownumber >= 0 else None
@@ -1201,6 +1222,10 @@ def _init_result_and_meta(self, data: dict[Any, Any]) -> None:
12011222
self._rownumber = -1
12021223
self._result_state = ResultState.VALID
12031224

1225+
# Extract stats object if available (for DML operations like CTAS, INSERT, UPDATE, DELETE)
1226+
self._stats_data = data.get("stats", None)
1227+
logger.debug("Execution DML stats: %s", self.stats)
1228+
12041229
# don't update the row count when the result is returned from `describe` method
12051230
if is_dml and "rowset" in data and len(data["rowset"]) > 0:
12061231
updated_rows = 0
@@ -2007,3 +2032,26 @@ def __getattr__(name):
20072032
)
20082033
return None
20092034
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
2035+
2036+
2037+
class QueryResultStats(NamedTuple):
2038+
"""
2039+
Statistics for rows affected by a DML operation.
2040+
None value expresses particular statistic being unknown - not returned by the backend service.
2041+
2042+
Added in the first place to expose DML data of CTAS statements - SNOW-295953
2043+
"""
2044+
2045+
num_rows_inserted: int | None = None
2046+
num_rows_deleted: int | None = None
2047+
num_rows_updated: int | None = None
2048+
num_dml_duplicates: int | None = None
2049+
2050+
@classmethod
2051+
def from_dict(cls, stats_dict: dict[str, int]) -> QueryResultStats:
2052+
return cls(
2053+
num_rows_inserted=stats_dict.get("numRowsInserted", None),
2054+
num_rows_deleted=stats_dict.get("numRowsDeleted", None),
2055+
num_rows_updated=stats_dict.get("numRowsUpdated", None),
2056+
num_dml_duplicates=stats_dict.get("numDmlDuplicates", None),
2057+
)

0 commit comments

Comments
 (0)