ooples · ooples · Nov 8, 2025 · Dec 14, 2025 · Dec 14, 2025 · Dec 15, 2025
@@ -6,6 +6,7 @@
     <ImplicitUsings>enable</ImplicitUsings>
     <Nullable>enable</Nullable>
 	  <LangVersion>latest</LangVersion>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <!-- CA1822: BenchmarkDotNet requires instance methods for benchmarks -->
     <NoWarn>$(NoWarn);CA1822</NoWarn>
   </PropertyGroup>

@@ -0,0 +1,135 @@
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Jobs;
+using AiDotNet.InferenceOptimization;
+using AiDotNet.InferenceOptimization.Kernels;
+using AiDotNet.LinearAlgebra;
+using System;
+
+namespace AiDotNetBenchmarkTests.InferenceOptimization
+{
+    /// <summary>
+    /// Benchmarks for fused attention kernel
+    /// </summary>
+    [SimpleJob(RuntimeMoniker.Net80)]
+    [MemoryDiagnoser]
+    [CsvExporter]
+    [HtmlExporter]
+    public class AttentionBenchmark
+    {
+        private Tensor<float> _q;
+        private Tensor<float> _k;
+        private Tensor<float> _v;
+        private AttentionKernel _attentionKernel;
+
+        [Params(64, 128, 256)]
+        public int SequenceLength { get; set; }
+
+        [Params(32, 64)]
+        public int FeatureDim { get; set; }
+
+        [GlobalSetup]
+        public void Setup()
+        {
+            OptimizationInitializer.Initialize(enableProfiling: false);
+
+            _attentionKernel = new AttentionKernel();
+
+            // Initialize Q, K, V tensors
+            var random = new Random(42);
+            _q = new Tensor<float>(new[] { 1, SequenceLength, FeatureDim });
+            _k = new Tensor<float>(new[] { 1, SequenceLength, FeatureDim });
+            _v = new Tensor<float>(new[] { 1, SequenceLength, FeatureDim });
+
+            for (int i = 0; i < _q.Data.Length; i++)
+            {
+                _q.Data[i] = (float)random.NextDouble();
+            }
+
+            for (int i = 0; i < _k.Data.Length; i++)
+            {
+                _k.Data[i] = (float)random.NextDouble();
+            }
+
+            for (int i = 0; i < _v.Data.Length; i++)
+            {
+                _v.Data[i] = (float)random.NextDouble();
+            }
+        }
+
+        [Benchmark(Baseline = true)]
+        public Tensor<float> NaiveAttention()
+        {
+            // Naive implementation: QK^T, softmax, multiply by V
+            float scale = 1.0f / MathF.Sqrt(FeatureDim);
+
+            // Compute attention scores
+            var scores = new float[SequenceLength * SequenceLength];
+
+            for (int i = 0; i < SequenceLength; i++)
+            {
+                for (int j = 0; j < SequenceLength; j++)
+                {
+                    float score = 0.0f;
+                    for (int k = 0; k < FeatureDim; k++)
+                    {
+                        score += _q.Data[i * FeatureDim + k] * _k.Data[j * FeatureDim + k];
+                    }
+                    scores[i * SequenceLength + j] = score * scale;
+                }
+            }
+
+            // Apply softmax
+            for (int i = 0; i < SequenceLength; i++)
+            {
+                float maxVal = float.NegativeInfinity;
+                for (int j = 0; j < SequenceLength; j++)
+                {
+                    if (scores[i * SequenceLength + j] > maxVal)
+                        maxVal = scores[i * SequenceLength + j];
+                }
+
+                float sum = 0.0f;
+                for (int j = 0; j < SequenceLength; j++)
+                {
+                    scores[i * SequenceLength + j] = MathF.Exp(scores[i * SequenceLength + j] - maxVal);
+                    sum += scores[i * SequenceLength + j];
+                }
+
+                for (int j = 0; j < SequenceLength; j++)
+                {
+                    scores[i * SequenceLength + j] /= sum;
+                }
+            }
+
+            // Multiply by V
+            var result = new Tensor<float>(new[] { 1, SequenceLength, FeatureDim });
+
+            for (int i = 0; i < SequenceLength; i++)
+            {
+                for (int j = 0; j < FeatureDim; j++)
+                {
+                    float sum = 0.0f;
+                    for (int k = 0; k < SequenceLength; k++)
+                    {
+                        sum += scores[i * SequenceLength + k] * _v.Data[k * FeatureDim + j];
+                    }
+                    result.Data[i * FeatureDim + j] = sum;
+                }
+            }
+
+            return result;
+        }
+
+        [Benchmark]
+        public Tensor<float> OptimizedAttention()
+        {
+            return _attentionKernel.Execute(_q, _k, _v);
+        }
+
+        [Benchmark]
+        public Tensor<float> MultiHeadAttention()
+        {
+            return _attentionKernel.MultiHeadAttention(_q, _k, _v, numHeads: 8);
+        }
+    }
+}
@@ -0,0 +1,84 @@
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Jobs;
+using AiDotNet.InferenceOptimization;
+using AiDotNet.InferenceOptimization.Kernels;
+using AiDotNet.LinearAlgebra;
+using System;
+
+namespace AiDotNetBenchmarkTests.InferenceOptimization
+{
+    /// <summary>
+    /// Benchmarks for GEMM (General Matrix Multiplication) kernel
+    /// Tests optimized implementation against naive implementation
+    /// </summary>
+    [SimpleJob(RuntimeMoniker.Net80)]
+    [MemoryDiagnoser]
+    [CsvExporter]
+    [HtmlExporter]
+    public class GemmBenchmark
+    {
+        private Tensor<float> _matrixA;
+        private Tensor<float> _matrixB;
+        private GemmKernel _gemmKernel;
+
+        [Params(64, 128, 256, 512, 1024)]
+        public int MatrixSize { get; set; }
+
+        [GlobalSetup]
+        public void Setup()
+        {
+            OptimizationInitializer.Initialize(enableProfiling: false);
+
+            _gemmKernel = new GemmKernel();
+
+            // Initialize matrices with random data
+            var random = new Random(42);
+            _matrixA = new Tensor<float>(new[] { MatrixSize, MatrixSize });
+            _matrixB = new Tensor<float>(new[] { MatrixSize, MatrixSize });
+
+            for (int i = 0; i < _matrixA.Data.Length; i++)
+            {
+                _matrixA.Data[i] = (float)random.NextDouble();
+            }
+
+            for (int i = 0; i < _matrixB.Data.Length; i++)
+            {
+                _matrixB.Data[i] = (float)random.NextDouble();
+            }
+        }
+
+        [Benchmark(Baseline = true)]
+        public Tensor<float> NaiveGemm()
+        {
+            // Naive triple-nested loop implementation
+            var result = new Tensor<float>(new[] { MatrixSize, MatrixSize });
+
+            for (int i = 0; i < MatrixSize; i++)
+            {
+                for (int j = 0; j < MatrixSize; j++)
+                {
+                    float sum = 0.0f;
+                    for (int k = 0; k < MatrixSize; k++)
+                    {
+                        sum += _matrixA.Data[i * MatrixSize + k] * _matrixB.Data[k * MatrixSize + j];
+                    }
+                    result.Data[i * MatrixSize + j] = sum;
+                }
+            }
+
+            return result;
+        }
+
+        [Benchmark]
+        public Tensor<float> OptimizedGemm()
+        {
+            return _gemmKernel.Execute(_matrixA, _matrixB);
+        }
+
+        [Benchmark]
+        public Tensor<float> OptimizedGemmTranspose()
+        {
+            return _gemmKernel.GemmTransposeB(_matrixA, _matrixB);
+        }
+    }
+}