From b53a8cf2fc84fc6db18c4528407762f2f909196a Mon Sep 17 00:00:00 2001 From: zhenghuan Date: Fri, 9 Jan 2026 18:28:02 +0800 Subject: [PATCH] [CELEBORN-2248] Implement lazy loading for columnar shuffle classes and skew shuffle method using static holder pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR converts the static initialization of columnar shuffle class constructors and skew shuffle method to lazy initialization using the initialization-on-demand holder idiom (static inner class pattern) in SparkUtils.java. Specifically, the following changes were made: 1. Introduced `ColumnarHashBasedShuffleWriterConstructorHolder` static inner class to lazily initialize the constructor for ColumnarHashBasedShuffleWriter 2. Introduced `ColumnarShuffleReaderConstructorHolder` static inner class to lazily initialize the constructor for CelebornColumnarShuffleReader 3. Introduced `CelebornSkewShuffleMethodHolder` static inner class to lazily initialize the `isCelebornSkewedShuffle` method reference 4. Modified `createColumnarHashBasedShuffleWriter()`, `createColumnarShuffleReader()`, and `isCelebornSkewShuffleOrChildShuffle()` methods to use the holder pattern for lazy initialization 5. Added JavaDoc comments explaining the lazy loading mechanism ### Why are the changes needed? The current implementation statically initializes columnar shuffle class constructors and the skew shuffle method at SparkUtils class loading time, which means these classes/methods are loaded regardless of whether they are actually used. This lazy loading approach ensures that: - Columnar shuffle classes are only loaded when actually needed (when `celeborn.columnarShuffle.enabled` is true and the create methods are called) - CelebornShuffleState class is only loaded when skew shuffle detection is needed - Reduces unnecessary class loading overhead for users not using these features - Improves startup performance and memory footprint - Aligns with the conditional usage pattern already present in SparkShuffleManager The static holder pattern (initialization-on-demand holder idiom) provides several advantages: - Thread-safe without explicit synchronization (guaranteed by JVM class loading mechanism) - No synchronization overhead at runtime (no volatile reads or lock acquisition) - Simpler and more concise code compared to double-checked locking - Recommended by Effective Java (Item 83) for lazy initialization ### Does this PR resolve a correctness bug? No, this is a performance optimization. ### Does this PR introduce any user-facing change? No. This change only affects when certain classes are loaded internally. The functionality and API remain unchanged. ### How was this patch tested? - Code review to verify correct implementation of the initialization-on-demand holder pattern - Verified that JVM class loading guarantees thread safety (JLS §12.4.2) - Analyzed existing columnar shuffle and skew shuffle test coverage in the codebase - The changes are backward compatible and don't alter functionality, only initialization timing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- .../spark/shuffle/celeborn/SparkUtils.java | 86 ++++++++++++------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java b/client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java index b50d3546e4d..6ec67252207 100644 --- a/client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java +++ b/client-spark/spark-3/src/main/java/org/apache/spark/shuffle/celeborn/SparkUtils.java @@ -228,17 +228,24 @@ public static ShuffleReader getReader( public static final String COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CLASS = "org.apache.spark.shuffle.celeborn.ColumnarHashBasedShuffleWriter"; - static final DynConstructors.Builder COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CONSTRUCTOR_BUILDER = - DynConstructors.builder() - .impl( - COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CLASS, - int.class, - CelebornShuffleHandle.class, - TaskContext.class, - CelebornConf.class, - ShuffleClient.class, - ShuffleWriteMetricsReporter.class, - SendBufferPool.class); + + /** + * Lazy holder for ColumnarHashBasedShuffleWriter constructor builder. The builder is initialized + * only when this class is first accessed, ensuring lazy loading without explicit synchronization. + */ + private static class ColumnarHashBasedShuffleWriterConstructorBuilderHolder { + private static final DynConstructors.Builder INSTANCE = + DynConstructors.builder() + .impl( + COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CLASS, + int.class, + CelebornShuffleHandle.class, + TaskContext.class, + CelebornConf.class, + ShuffleClient.class, + ShuffleWriteMetricsReporter.class, + SendBufferPool.class); + } public static HashBasedShuffleWriter createColumnarHashBasedShuffleWriter( int shuffleId, @@ -248,26 +255,33 @@ public static HashBasedShuffleWriter createColumnarHashBasedS ShuffleClient client, ShuffleWriteMetricsReporter metrics, SendBufferPool sendBufferPool) { - return COLUMNAR_HASH_BASED_SHUFFLE_WRITER_CONSTRUCTOR_BUILDER + return ColumnarHashBasedShuffleWriterConstructorBuilderHolder.INSTANCE .build() .invoke(null, shuffleId, handle, taskContext, conf, client, metrics, sendBufferPool); } public static final String COLUMNAR_SHUFFLE_READER_CLASS = "org.apache.spark.shuffle.celeborn.CelebornColumnarShuffleReader"; - static final DynConstructors.Builder COLUMNAR_SHUFFLE_READER_CONSTRUCTOR_BUILDER = - DynConstructors.builder() - .impl( - COLUMNAR_SHUFFLE_READER_CLASS, - CelebornShuffleHandle.class, - int.class, - int.class, - int.class, - int.class, - TaskContext.class, - CelebornConf.class, - ShuffleReadMetricsReporter.class, - ExecutorShuffleIdTracker.class); + + /** + * Lazy holder for CelebornColumnarShuffleReader constructor builder. The builder is initialized + * only when this class is first accessed, ensuring lazy loading without explicit synchronization. + */ + private static class ColumnarShuffleReaderConstructorBuilderHolder { + private static final DynConstructors.Builder INSTANCE = + DynConstructors.builder() + .impl( + COLUMNAR_SHUFFLE_READER_CLASS, + CelebornShuffleHandle.class, + int.class, + int.class, + int.class, + int.class, + TaskContext.class, + CelebornConf.class, + ShuffleReadMetricsReporter.class, + ExecutorShuffleIdTracker.class); + } public static CelebornShuffleReader createColumnarShuffleReader( CelebornShuffleHandle handle, @@ -279,7 +293,7 @@ public static CelebornShuffleReader createColumnarShuffleReader( CelebornConf conf, ShuffleReadMetricsReporter metrics, ExecutorShuffleIdTracker shuffleIdTracker) { - return COLUMNAR_SHUFFLE_READER_CONSTRUCTOR_BUILDER + return ColumnarShuffleReaderConstructorBuilderHolder.INSTANCE .build() .invoke( null, @@ -541,15 +555,21 @@ public static void addSparkListener(SparkListener listener) { } } - private static final DynMethods.UnboundMethod isCelebornSkewShuffle_METHOD = - DynMethods.builder("isCelebornSkewedShuffle") - .hiddenImpl("org.apache.spark.celeborn.CelebornShuffleState", Integer.TYPE) - .orNoop() - .build(); + /** + * Lazy holder for isCelebornSkewedShuffle method. The method is initialized only when this class + * is first accessed, ensuring lazy loading without explicit synchronization. + */ + private static class CelebornSkewShuffleMethodHolder { + private static final DynMethods.UnboundMethod INSTANCE = + DynMethods.builder("isCelebornSkewedShuffle") + .hiddenImpl("org.apache.spark.celeborn.CelebornShuffleState", Integer.TYPE) + .orNoop() + .build(); + } public static boolean isCelebornSkewShuffleOrChildShuffle(int appShuffleId) { - if (!isCelebornSkewShuffle_METHOD.isNoop()) { - return isCelebornSkewShuffle_METHOD.asStatic().invoke(appShuffleId); + if (!CelebornSkewShuffleMethodHolder.INSTANCE.isNoop()) { + return CelebornSkewShuffleMethodHolder.INSTANCE.asStatic().invoke(appShuffleId); } else { return false; }