apache · ankitsol · Dec 7, 2025 · Dec 7, 2025 · Apache9 · Dec 11, 2025
diff --git a/.../java/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java b/.../java/org/apache/hadoop/hbase/backup/replication/ContinuousBackupReplicationEndpoint.java
@@ -47,8 +47,6 @@
 import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
 import org.apache.hadoop.hbase.replication.BaseReplicationEndpoint;
 import org.apache.hadoop.hbase.replication.EmptyEntriesPolicy;
-import org.apache.hadoop.hbase.replication.ReplicationResult;
-import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.wal.FSHLogProvider;
@@ -83,7 +81,6 @@ public class ContinuousBackupReplicationEndpoint extends BaseReplicationEndpoint
   private final Map<Long, FSHLogProvider.Writer> walWriters = new ConcurrentHashMap<>();
   private final ReentrantLock lock = new ReentrantLock();
 
-  private ReplicationSourceInterface replicationSource;
   private Configuration conf;
   private BackupFileSystemManager backupFileSystemManager;
   private UUID peerUUID;
@@ -98,7 +95,6 @@ public class ContinuousBackupReplicationEndpoint extends BaseReplicationEndpoint
   @Override
   public void init(Context context) throws IOException {
     super.init(context);
-    this.replicationSource = context.getReplicationSource();
     this.peerId = context.getPeerId();
     this.conf = HBaseConfiguration.create(context.getConfiguration());
 
@@ -155,7 +151,7 @@ private void flushAndBackupSafely() {
     try {
       LOG.info("{} Periodic WAL flush triggered", Utils.logPeerId(peerId));
       flushWriters();
-      replicationSource.persistOffsets();
+      getReplicationSource().persistOffsets();
       LOG.info("{} Periodic WAL flush and offset persistence completed successfully",
         Utils.logPeerId(peerId));
     } catch (IOException e) {
@@ -220,11 +216,11 @@ public EmptyEntriesPolicy getEmptyEntriesPolicy() {
   }
 
   @Override
-  public ReplicationResult replicate(ReplicateContext replicateContext) {
+  public boolean replicate(ReplicateContext replicateContext) {
     final List<WAL.Entry> entries = replicateContext.getEntries();
     if (entries.isEmpty()) {
       LOG.debug("{} No WAL entries to replicate", Utils.logPeerId(peerId));
-      return ReplicationResult.SUBMITTED;
+      return true;
     }
 
     LOG.debug("{} Received {} WAL entries for replication", Utils.logPeerId(peerId),
@@ -253,15 +249,16 @@ public ReplicationResult replicate(ReplicateContext replicateContext) {
           Utils.logPeerId(peerId));
         flushWriters();
         LOG.debug("{} Replication committed after WAL flush", Utils.logPeerId(peerId));
-        return ReplicationResult.COMMITTED;
+        getReplicationSource().cleanupHFileRefsAndPersistOffsets(entries);
+        return true;
       }
 
       LOG.debug("{} Replication submitted successfully", Utils.logPeerId(peerId));
-      return ReplicationResult.SUBMITTED;
+      return true;
     } catch (IOException e) {
       LOG.error("{} Replication failed. Error details: {}", Utils.logPeerId(peerId), e.getMessage(),
         e);
-      return ReplicationResult.FAILED;
+      return false;
     } finally {
       lock.unlock();
     }
@@ -277,8 +274,8 @@ public ReplicationResult replicate(ReplicateContext replicateContext) {
   private void updateLastReplicatedTimestampForContinuousBackup() throws IOException {
     try (final Connection conn = ConnectionFactory.createConnection(conf);
       BackupSystemTable backupSystemTable = new BackupSystemTable(conn)) {
-      backupSystemTable.updateBackupCheckpointTimestamp(replicationSource.getServerWALsBelongTo(),
-        latestWALEntryTimestamp);
+      backupSystemTable.updateBackupCheckpointTimestamp(
+        getReplicationSource().getServerWALsBelongTo(), latestWALEntryTimestamp);
     }
   }
 
@@ -379,7 +376,7 @@ private void close() {
     lock.lock();
     try {
       flushWriters();
-      replicationSource.persistOffsets();
+      getReplicationSource().persistOffsets();
     } catch (IOException e) {
       LOG.error("{} Failed to Flush Open Wal Writers: {}", Utils.logPeerId(peerId), e.getMessage(),
         e);

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/BaseReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/BaseReplicationEndpoint.java
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -40,13 +41,15 @@ public abstract class BaseReplicationEndpoint extends AbstractService
   public static final String REPLICATION_WALENTRYFILTER_CONFIG_KEY =
     "hbase.replication.source.custom.walentryfilters";
   protected Context ctx;
+  private ReplicationSourceInterface replicationSource;
 
   @Override
   public void init(Context context) throws IOException {
     this.ctx = context;
 
     if (this.ctx != null) {
       ReplicationPeer peer = this.ctx.getReplicationPeer();
+      this.replicationSource = context.getReplicationSource();
       if (peer != null) {
         peer.registerPeerConfigListener(this);
       } else {
@@ -120,4 +123,8 @@ public boolean canReplicateToSameCluster() {
   public boolean isStarting() {
     return state() == State.STARTING;
   }
+
+  public ReplicationSourceInterface getReplicationSource() {
+    return replicationSource;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationEndpoint.java
@@ -216,7 +216,7 @@ public int getTimeout() {
    * the context are assumed to be persisted in the target cluster.
    * @param replicateContext a context where WAL entries and other parameters can be obtained.
    */
-  ReplicationResult replicate(ReplicateContext replicateContext);
+  boolean replicate(ReplicateContext replicateContext) throws IOException;
 
   // The below methods are inspired by Guava Service. See
   // https://github.com/google/guava/wiki/ServiceExplained for overview of Guava Service.

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationResult.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationResult.java
diff --git a/...rc/main/java/org/apache/hadoop/hbase/replication/VerifyWALEntriesReplicationEndpoint.java b/...rc/main/java/org/apache/hadoop/hbase/replication/VerifyWALEntriesReplicationEndpoint.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.replication;
 
+import java.io.IOException;
 import java.util.UUID;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellUtil;
@@ -59,10 +60,11 @@ private void checkCell(Cell cell) {
   }
 
   @Override
-  public ReplicationResult replicate(ReplicateContext replicateContext) {
+  public boolean replicate(ReplicateContext replicateContext) throws IOException {
     replicateContext.entries.stream().map(WAL.Entry::getEdit).flatMap(e -> e.getCells().stream())
       .forEach(this::checkCell);
-    return ReplicationResult.COMMITTED;
+    getReplicationSource().cleanupHFileRefsAndPersistOffsets(replicateContext.getEntries());
+    return true;
   }
 
   @Override

diff --git a/...rg/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java b/...rg/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
@@ -48,7 +48,6 @@
 import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
 import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
 import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint;
-import org.apache.hadoop.hbase.replication.ReplicationResult;
 import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
@@ -425,7 +424,7 @@ private long parallelReplicate(ReplicateContext replicateContext, List<List<Entr
    * Do the shipping logic
    */
   @Override
-  public ReplicationResult replicate(ReplicateContext replicateContext) {
+  public boolean replicate(ReplicateContext replicateContext) throws IOException {
     int sleepMultiplier = 1;
     int initialTimeout = replicateContext.getTimeout();
 
@@ -445,7 +444,7 @@ public ReplicationResult replicate(ReplicateContext replicateContext) {
         lastSinkFetchTime = EnvironmentEdgeManager.currentTime();
       }
       sleepForRetries("No sinks available at peer", sleepMultiplier);
-      return ReplicationResult.FAILED;
+      return false;
     }
 
     List<List<Entry>> batches = createBatches(replicateContext.getEntries());
@@ -459,7 +458,8 @@ public ReplicationResult replicate(ReplicateContext replicateContext) {
       try {
         // replicate the batches to sink side.
         parallelReplicate(replicateContext, batches);
-        return ReplicationResult.COMMITTED;
+        getReplicationSource().cleanupHFileRefsAndPersistOffsets(replicateContext.getEntries());
+        return true;
       } catch (IOException ioe) {
         if (ioe instanceof RemoteException) {
           if (dropOnDeletedTables && isTableNotFoundException(ioe)) {
@@ -468,14 +468,18 @@ public ReplicationResult replicate(ReplicateContext replicateContext) {
             batches = filterNotExistTableEdits(batches);
             if (batches.isEmpty()) {
               LOG.warn("After filter not exist table's edits, 0 edits to replicate, just return");
-              return ReplicationResult.COMMITTED;
+              getReplicationSource()
+                .cleanupHFileRefsAndPersistOffsets(replicateContext.getEntries());
+              return true;
             }
           } else if (dropOnDeletedColumnFamilies && isNoSuchColumnFamilyException(ioe)) {
             batches = filterNotExistColumnFamilyEdits(batches);
             if (batches.isEmpty()) {
               LOG.warn("After filter not exist column family's edits, 0 edits to replicate, "
                 + "just return");
-              return ReplicationResult.COMMITTED;
+              getReplicationSource()
+                .cleanupHFileRefsAndPersistOffsets(replicateContext.getEntries());
+              return true;
             }
           } else {
             LOG.warn("{} Peer encountered RemoteException, rechecking all sinks: ", logPeerId(),
@@ -507,7 +511,7 @@ public ReplicationResult replicate(ReplicateContext replicateContext) {
         }
       }
     }
-    return ReplicationResult.FAILED; // in case we exited before replicating
+    return false; // in case we exited before replicating
   }
 
   protected boolean isPeerEnabled() {

diff --git a/...ver/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/...ver/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
@@ -41,6 +41,8 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.Server;
@@ -58,7 +60,6 @@
 import org.apache.hadoop.hbase.replication.ReplicationQueueData;
 import org.apache.hadoop.hbase.replication.ReplicationQueueId;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
-import org.apache.hadoop.hbase.replication.ReplicationResult;
 import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
 import org.apache.hadoop.hbase.replication.WALEntryFilter;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -67,13 +68,16 @@
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
+import org.apache.hadoop.hbase.wal.WALEdit;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap;
 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
 
+import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
+
 /**
  * Class that handles the source of a replication stream. Currently does not handle more than 1
  * slave cluster. For each slave cluster it selects a random number of peers using a replication
@@ -867,16 +871,12 @@ public long getTotalReplicatedEdits() {
   }
 
   @Override
-  public void logPositionAndCleanOldLogs(WALEntryBatch entryBatch, ReplicationResult replicated) {
+  public void logPositionAndCleanOldLogs(WALEntryBatch entryBatch) {
     String walName = entryBatch.getLastWalPath().getName();
     String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(walName);
 
     synchronized (lastEntryBatch) { // Synchronize addition and processing
       lastEntryBatch.put(walPrefix, entryBatch);
-
-      if (replicated == ReplicationResult.COMMITTED) {
-        processAndClearEntries();
-      }
     }
   }
 
@@ -893,4 +893,38 @@ private void processAndClearEntries() {
     // Clear all processed entries
     lastEntryBatch.clear();
   }
+
+  @Override
+  public void cleanupHFileRefsAndPersistOffsets(List<Entry> entries) throws IOException {
+    // Clean up hfile references
+    for (Entry entry : entries) {
+      cleanUpHFileRefs(entry.getEdit());
+      LOG.trace("shipped entry {}: ", entry);
+    }
+    persistOffsets();
+  }
+
+  private void cleanUpHFileRefs(WALEdit edit) throws IOException {
+    String peerId = getPeerId();
+    if (peerId.contains("-")) {
+      // peerClusterZnode will be in the form peerId + "-" + rsZNode.
+      // A peerId will not have "-" in its name, see HBASE-11394
+      peerId = peerId.split("-")[0];
+    }
+    List<Cell> cells = edit.getCells();
+    int totalCells = cells.size();
+    for (int i = 0; i < totalCells; i++) {
+      Cell cell = cells.get(i);
+      if (CellUtil.matchingQualifier(cell, WALEdit.BULK_LOAD)) {
+        WALProtos.BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cell);
+        List<WALProtos.StoreDescriptor> stores = bld.getStoresList();
+        int totalStores = stores.size();
+        for (int j = 0; j < totalStores; j++) {
+          List<String> storeFileList = stores.get(j).getStoreFileList();
+          getSourceManager().cleanUpHFileRefs(peerId, storeFileList);
+          getSourceMetrics().decrSizeOfHFileRefsQueue(storeFileList.size());
+        }
+      }
+    }
+  }
 }
diff --git a/...ain/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java b/...ain/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
@@ -34,7 +34,6 @@
 import org.apache.hadoop.hbase.replication.ReplicationQueueData;
 import org.apache.hadoop.hbase.replication.ReplicationQueueId;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
-import org.apache.hadoop.hbase.replication.ReplicationResult;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -208,11 +207,15 @@ default boolean isRecovered() {
    * @param entryBatch the wal entry batch we just shipped
    * @return The instance of queueStorage used by this ReplicationSource.
    */
-  default void logPositionAndCleanOldLogs(WALEntryBatch entryBatch, ReplicationResult replicated) {
+  default void logPositionAndCleanOldLogs(WALEntryBatch entryBatch) {
 
   }
 
   default public void persistOffsets() {
 
   }
+
+  default public void cleanupHFileRefsAndPersistOffsets(List<Entry> entries) throws IOException {
+
+  }
 }