diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java index c1f98edd75ab..2f3bee39eda3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionMover.java @@ -115,6 +115,7 @@ public class RegionMover extends AbstractHBaseTool implements Closeable { private Connection conn; private Admin admin; private RackManager rackManager; + private int maxRetries; private RegionMover(RegionMoverBuilder builder) throws IOException { this.hostname = builder.hostname; @@ -146,6 +147,8 @@ private RegionMover(RegionMoverBuilder builder) throws IOException { // provided as @InterfaceAudience.Private and it is commented that this is just // to be used by unit test. rackManager = builder.rackManager == null ? new RackManager(conf) : builder.rackManager; + this.maxRetries = admin.getConfiguration().getInt(RegionMover.MOVE_RETRIES_MAX_KEY, + RegionMover.DEFAULT_MOVE_RETRIES_MAX); } private RegionMover() { @@ -396,8 +399,13 @@ private void loadRegions(List regionsToMove) throws Exception { } moveRegionsPool.shutdown(); + // Calculate timeout based on acknowledge mode. In ack mode, account for retry attempts since + // MoveWithAck retries failed moves up to MOVE_RETRIES_MAX_KEY times. In no-ack mode, each + // region move is attempted only once. Timeout = regions * wait_per_region * retries + int retries = (ack) ? maxRetries : 1; long timeoutInSeconds = regionsToMove.size() - * admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX); + * admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX) * retries; + waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds); } @@ -682,8 +690,13 @@ private void submitRegionMovesWhileUnloading(ServerName server, List serverIndex = (serverIndex + 1) % regionServers.size(); } moveRegionsPool.shutdown(); + // Calculate timeout based on acknowledge mode. In ack mode, account for retry attempts since + // MoveWithAck retries failed moves up to MOVE_RETRIES_MAX_KEY times. In no-ack mode, each + // region move is attempted only once. Timeout = regions * wait_per_region * retries + int retries = (ack || forceMoveRegionByAck) ? maxRetries : 1; long timeoutInSeconds = regionsToMove.size() - * admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX); + * admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX) * retries; + waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds); }