From 72af8a5512981c59c962d4e96298eefcba08b284 Mon Sep 17 00:00:00 2001 From: Istvan Toth Date: Thu, 4 Dec 2025 09:30:31 +0100 Subject: [PATCH] Clear block progress for compaction scanners --- .../hbase/mob/DefaultMobStoreCompactor.java | 2 +- .../hbase/regionserver/ScannerContext.java | 32 ++++++++++++++++--- .../regionserver/compactions/Compactor.java | 2 +- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java index f0beea647611..07bc8431d0b7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java @@ -348,7 +348,7 @@ protected boolean performCompaction(FileDetails fd, InternalScanner scanner, Cel ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax) .setSizeLimit(ScannerContext.LimitScope.BETWEEN_CELLS, Long.MAX_VALUE, Long.MAX_VALUE, compactScannerSizeLimit) - .build(); + .setKeepBlockProgress(false).build(); throughputController.start(compactionName); KeyValueScanner kvs = (scanner instanceof KeyValueScanner) ? (KeyValueScanner) scanner : null; long shippedCallSizeLimit = diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScannerContext.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScannerContext.java index c681e91c615e..4932fceb61aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScannerContext.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScannerContext.java @@ -103,6 +103,14 @@ public class ScannerContext { boolean keepProgress; private static boolean DEFAULT_KEEP_PROGRESS = false; + /** + * For some chores (specifically Compaction) we use the block size limit to avoid too long next() + * calls. In those cases the entire region is handled with a single ScannerContext, and we do want + * to reset the block progress. + */ + boolean keepBlockProgress; + private static boolean DEFAULT_KEEP_BLOCK_PROGRESS = true; + /** * Allows temporarily ignoring limits and skipping tracking of batch and size progress. Used when * skipping to the next row, in which case all processed cells are thrown away so should not count @@ -129,6 +137,11 @@ public class ScannerContext { ScannerContext(boolean keepProgress, LimitFields limitsToCopy, boolean trackMetrics, ServerSideScanMetrics scanMetrics) { + this(keepProgress, DEFAULT_KEEP_BLOCK_PROGRESS, limitsToCopy, trackMetrics, scanMetrics); + } + + ScannerContext(boolean keepProgress, boolean keepBlockProgress, LimitFields limitsToCopy, + boolean trackMetrics, ServerSideScanMetrics scanMetrics) { this.limits = new LimitFields(); if (limitsToCopy != null) { this.limits.copy(limitsToCopy); @@ -138,6 +151,7 @@ public class ScannerContext { progress = new ProgressFields(0, 0, 0, 0); this.keepProgress = keepProgress; + this.keepBlockProgress = keepBlockProgress; this.scannerState = DEFAULT_STATE; this.metrics = trackMetrics ? (scanMetrics != null ? scanMetrics : new ServerSideScanMetrics()) : null; @@ -259,11 +273,15 @@ void setBatchProgress(int batchProgress) { /** * Clear away any progress that has been made so far. All progress fields are reset to initial * values. Only clears progress that should reset between rows. {@link #getBlockSizeProgress()} is - * not reset because it increments for all blocks scanned whether the result is included or - * filtered. + * not reset by default because it increments for all blocks scanned whether the result is + * included or filtered. */ void clearProgress() { - progress.setFields(0, 0, 0, getBlockSizeProgress()); + if (keepBlockProgress) { + progress.setFields(0, 0, 0, getBlockSizeProgress()); + } else { + progress.setFields(0, 0, 0, 0); + } } /** @@ -421,6 +439,7 @@ public static Builder newBuilder(boolean keepProgress) { public static final class Builder { boolean keepProgress = DEFAULT_KEEP_PROGRESS; + boolean keepBlockProgress = DEFAULT_KEEP_BLOCK_PROGRESS; boolean trackMetrics = false; LimitFields limits = new LimitFields(); ServerSideScanMetrics scanMetrics = null; @@ -437,6 +456,11 @@ public Builder setKeepProgress(boolean keepProgress) { return this; } + public Builder setKeepBlockProgress(boolean keepblockProgress) { + this.keepBlockProgress = keepblockProgress; + return this; + } + public Builder setTrackMetrics(boolean trackMetrics) { this.trackMetrics = trackMetrics; return this; @@ -468,7 +492,7 @@ public Builder setScanMetrics(ServerSideScanMetrics scanMetrics) { } public ScannerContext build() { - return new ScannerContext(keepProgress, limits, trackMetrics, scanMetrics); + return new ScannerContext(keepProgress, keepBlockProgress, limits, trackMetrics, scanMetrics); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java index 069968294b84..8acbcf234da5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/compactions/Compactor.java @@ -441,7 +441,7 @@ protected boolean performCompaction(FileDetails fd, InternalScanner scanner, Cel ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax) .setSizeLimit(ScannerContext.LimitScope.BETWEEN_CELLS, Long.MAX_VALUE, Long.MAX_VALUE, compactScannerSizeLimit) - .build(); + .setKeepBlockProgress(false).build(); throughputController.start(compactionName); Shipper shipper = (scanner instanceof Shipper) ? (Shipper) scanner : null;