Add build variants with conditional compilation #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

5000user5000 merged 1 commit into main from feature/multi-version-build

Nov 9, 2025

Makefile

-Original file line number
+Diff line change
@@ -1,5 +1,5 @@
     CXX := g++
-    CXXFLAGS := -std=c++17 -O3 -fPIC -march=native -fopenmp
+    BASE_CXXFLAGS := -std=c++17 -O3 -fPIC
     # Python / pybind11 include flags
     PYBIND11_INCLUDES := $(shell python3 -m pybind11 --includes)
@@ Expand Down Expand Up / @@ -28,35 +28,116 @@ SOURCES := \ @@
     # Extension suffix (.so or .cpython-XYm-x86_64-linux-gnu.so, etc.)
     EXT_SUFFIX := $(shell python3-config --extension-suffix)
-    TARGET := build/zenann$(EXT_SUFFIX)
     # Platform‐specific linker flags
     UNAME_S := $(shell uname -s)
     ifeq ($(UNAME_S),Darwin)
         # on macOS use dynamic_lookup
-        LDFLAGS := -undefined dynamic_lookup
+        BASE_LDFLAGS := -undefined dynamic_lookup
     else
         # on Linux embed rpath to pick up our extern/faiss libfaiss.so
-        LDFLAGS := -Wl,-rpath,$$ORIGIN/../extern/faiss/build/install/lib
+        BASE_LDFLAGS := -Wl,-rpath,$$ORIGIN/../extern/faiss/build/install/lib
     endif
-    # Add OpenMP linking
-    LDFLAGS += -fopenmp
+    # ============================================================================
+    # Version-specific build configurations
+    # ============================================================================
+    # Output target (all versions output to the same file)
+    TARGET := build/zenann$(EXT_SUFFIX)
+    # NAIVE: No parallelization (baseline)
+    NAIVE_CXXFLAGS := $(BASE_CXXFLAGS)
+    NAIVE_LDFLAGS := $(BASE_LDFLAGS)
+    # OPENMP: Only OpenMP parallelization
+    OPENMP_CXXFLAGS := $(BASE_CXXFLAGS) -fopenmp -DENABLE_OPENMP
+    OPENMP_LDFLAGS := $(BASE_LDFLAGS) -fopenmp
+    # SIMD: Only SIMD vectorization (AVX2)
+    SIMD_CXXFLAGS := $(BASE_CXXFLAGS) -march=native -DENABLE_SIMD
+    SIMD_LDFLAGS := $(BASE_LDFLAGS)
-    .PHONY: all clean prepare
+    # FULL: OpenMP + SIMD (fully optimized)
+    FULL_CXXFLAGS := $(BASE_CXXFLAGS) -march=native -fopenmp -DENABLE_OPENMP -DENABLE_SIMD
+    FULL_LDFLAGS := $(BASE_LDFLAGS) -fopenmp
-    all: prepare $(TARGET)
+    # CUDA: CUDA acceleration (placeholder for future)
+    CUDA_CXXFLAGS := $(BASE_CXXFLAGS) -DENABLE_CUDA
+    CUDA_LDFLAGS := $(BASE_LDFLAGS) -lcuda -lcudart
+    # ============================================================================
+    # Targets
+    # ============================================================================
+    .PHONY: all clean prepare naive openmp simd full cuda help
+    # Default target: build full version
+    all: full
     prepare:
-    	mkdir -p build
+    	@mkdir -p build
-    # Build the Python extension, linking against our Faiss
-    $(TARGET): $(SOURCES)
-    	$(CXX) $(CXXFLAGS) $(ALL_INCLUDES) -shared -o $@ \
+    # Build naive version (no parallelization)
+    naive: prepare
+    	$(CXX) $(NAIVE_CXXFLAGS) $(ALL_INCLUDES) -shared -o $(TARGET) \
     	    $(SOURCES) \
     	    -L$(FAISS_ROOT)/lib -lfaiss \
     	    $(ALL_LIBS) \
-    	    $(LDFLAGS)
+    	    $(NAIVE_LDFLAGS)
+    	@echo "✓ Built NAIVE version: $(TARGET)"
+    # Build OpenMP-only version
+    openmp: prepare
+    	$(CXX) $(OPENMP_CXXFLAGS) $(ALL_INCLUDES) -shared -o $(TARGET) \
+    	    $(SOURCES) \
+    	    -L$(FAISS_ROOT)/lib -lfaiss \
+    	    $(ALL_LIBS) \
+    	    $(OPENMP_LDFLAGS)
+    	@echo "✓ Built OPENMP version: $(TARGET)"
+    # Build SIMD-only version
+    simd: prepare
+    	$(CXX) $(SIMD_CXXFLAGS) $(ALL_INCLUDES) -shared -o $(TARGET) \
+    	    $(SOURCES) \
+    	    -L$(FAISS_ROOT)/lib -lfaiss \
+    	    $(ALL_LIBS) \
+    	    $(SIMD_LDFLAGS)
+    	@echo "✓ Built SIMD version: $(TARGET)"
+    # Build full version (OpenMP + SIMD)
+    full: prepare
+    	$(CXX) $(FULL_CXXFLAGS) $(ALL_INCLUDES) -shared -o $(TARGET) \
+    	    $(SOURCES) \
+    	    -L$(FAISS_ROOT)/lib -lfaiss \
+    	    $(ALL_LIBS) \
+    	    $(FULL_LDFLAGS)
+    	@echo "✓ Built FULL version: $(TARGET)"
+    # Build CUDA version (placeholder)
+    cuda: prepare
+    	@echo "CUDA version not yet implemented"
+    	@echo "Will output to: $(TARGET)"
+    # Clean all builds
     clean:
     	rm -rf build
+    # Help message
+    help:
+    	@echo "ZenANN Build System - Multiple Optimization Versions"
+    	@echo ""
+    	@echo "Available targets:"
+    	@echo "  make naive   - Build naive version (no parallelization)"
+    	@echo "  make openmp  - Build OpenMP-only version"
+    	@echo "  make simd    - Build SIMD-only version (AVX2)"
+    	@echo "  make full    - Build fully optimized version (OpenMP + SIMD)"
+    	@echo "  make cuda    - Build CUDA version (not yet implemented)"
+    	@echo "  make all     - Build full version (default)"
+    	@echo "  make clean   - Remove all built files"
+    	@echo ""
+    	@echo "Note: All versions output to build/zenann.so"
+    	@echo "Each build will overwrite the previous one."
+    	@echo ""
+    	@echo "Usage:"
+    	@echo "  import build.zenann as zenann    # Always works regardless of version"

include/zenann/SimdUtils.h

-Original file line number
+Diff line change
@@ -1,12 +1,16 @@
     #pragma once
     #include <cstddef>
+    #if defined(ENABLE_SIMD)
     #include <immintrin.h>
+    #endif
     namespace zenann {
-    inline float l2_simd(const float* __restrict a,
-                         const float* __restrict b,
-                         size_t dim) {
-    #if defined(__AVX2__)
+    // L2 distance calculation with optional SIMD optimization
+    inline float l2_distance(const float* a, const float* b, size_t dim) {
+    #if defined(ENABLE_SIMD) && defined(__AVX2__)
+        // SIMD version using AVX2
         const size_t step = 8;            // 8 × 32-bit floats
         __m256 acc       = _mm256_setzero_ps();
         size_t i         = 0;
@@ Expand All / @@ -27,6 +31,7 @@ inline float l2_simd(const float* __restrict a, @@
         }
         return d;
     #else
+        // Naive version (no SIMD or AVX2 not available)
         float d = 0.f;
         for (size_t i = 0; i < dim; ++i) {
             float diff = a[i] - b[i];
@@ Expand Down @@

src/IVFFlatIndex.cpp

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,6 +1,8 @@
  
    #include "IVFFlatIndex.h"

    #include "SimdUtils.h"

    #ifdef ENABLE_OPENMP

    #include <omp.h>

    #endif

    #include <limits>

    #include <random>

    #include <algorithm>

    @@ -48,10 +50,12 @@ SearchResult IVFFlatIndex::search(const Vector& query, size_t k) const {
  
        std::vector<Pair> cdist(nlist_);

        std::vector<Pair> heap;

        // Calculate distance from query to all centroids (parallelized)

        // Calculate distance from query to all centroids

    #ifdef ENABLE_OPENMP

        #pragma omp parallel for schedule(static)

    #endif

        for (size_t c = 0; c < nlist_; ++c) {

            float d = l2_simd(query.data(), centroids_[c].data(), dimension_);

            float d = l2_distance(query.data(), centroids_[c].data(), dimension_);

            cdist[c] = {d, c};

        }

    @@ -62,23 +66,27 @@ SearchResult IVFFlatIndex::search(const Vector& query, size_t k) const {
  
            }

        );

        // Probe nprobe nearest lists in parallel

        // Each thread maintains a local heap, then merges into global heap

        // Probe nprobe nearest lists

        heap.reserve(k);

        const auto& data = datastore_->getAll();

    #ifdef ENABLE_OPENMP

        #pragma omp parallel for schedule(dynamic)

    #endif

        for (size_t pi = 0; pi < nprobe_; ++pi) {

            size_t c = cdist[pi].second;

    #ifdef ENABLE_OPENMP

            // Thread-local heap for this cluster

            std::vector<Pair> local;

            local.reserve(k);

    #endif

            // Search within this cluster's inverted list

            for (size_t id : lists_[c]) {

                float dist = l2_simd(query.data(), data[id].data(), dimension_);

                float dist = l2_distance(query.data(), data[id].data(), dimension_);

    #ifdef ENABLE_OPENMP

                if (local.size() < k) {

                    local.emplace_back(dist, id);

                    if (local.size() == k) {

    @@ -89,8 +97,21 @@ SearchResult IVFFlatIndex::search(const Vector& query, size_t k) const {
  
                    local.back() = {dist, id};

                    std::push_heap(local.begin(), local.end());

                }

    #else

                if (heap.size() < k) {

                    heap.emplace_back(dist, id);

                    if (heap.size() == k) {

                        std::make_heap(heap.begin(), heap.end());

                    }

                } else if (dist < heap.front().first) {

                    std::pop_heap(heap.begin(), heap.end());

                    heap.back() = {dist, id};

                    std::push_heap(heap.begin(), heap.end());

                }

    #endif

            }

    #ifdef ENABLE_OPENMP

            // Merge local results into global heap (thread-safe)

            #pragma omp critical

            {

    @@ -107,6 +128,7 @@ SearchResult IVFFlatIndex::search(const Vector& query, size_t k) const {
  
                    }

                }

            }

    #endif

        }

        std::sort(heap.begin(), heap.end(),

    @@ -134,8 +156,10 @@ std::vector<SearchResult> IVFFlatIndex::search_batch(const Dataset& queries, siz
  
        const size_t nq = queries.size();

        std::vector<SearchResult> results(nq);

        // Parallel batch search with dynamic scheduling

        // Batch search with optional parallelization

    #ifdef ENABLE_OPENMP

        #pragma omp parallel for schedule(dynamic)

    #endif

        for (size_t i = 0; i < nq; ++i) {

            results[i] = search(queries[i], k);

        }

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add build variants with conditional compilation #11

Uh oh!

Diff view

Diff view

There are no files selected for viewing

5000user5000 Nov 9, 2025

Uh oh!

Add build variants with conditional compilation #11

Uh oh!

Add build variants with conditional compilation #11

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

5000user5000 Nov 9, 2025

Choose a reason for hiding this comment

Uh oh!