diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/Constants.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/Constants.h
index cc3e4d258a4c2..69b627e3b9e9d 100644
--- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/Constants.h
+++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/Constants.h
@@ -33,6 +33,9 @@ class Constants
 #else
   static constexpr int MAXGLOBALPADROW = 152; // Correct number of pad rows in Run 3
 #endif
+
+  // number of LHC bunch crossings per TPC time bin (40 MHz / 5 MHz)
+  static constexpr int LHCBCPERTIMEBIN = 8;
 };
 } // namespace tpc
 } // namespace o2
diff --git a/Detectors/Raw/CMakeLists.txt b/Detectors/Raw/CMakeLists.txt
index c11821ac551da..7d05ca8c4c81c 100644
--- a/Detectors/Raw/CMakeLists.txt
+++ b/Detectors/Raw/CMakeLists.txt
@@ -11,49 +11,45 @@
 o2_add_library(DetectorsRaw
                SOURCES src/RawFileReader.cxx
                        src/RawFileWriter.cxx
-	       	       src/SimpleRawReader.cxx
-	               src/HBFUtils.cxx
-	               src/RDHUtils.cxx
+                       src/SimpleRawReader.cxx
+                       src/HBFUtils.cxx
+                       src/RDHUtils.cxx
                PUBLIC_LINK_LIBRARIES FairRoot::Base
-		                     O2::Headers
-		                     O2::CommonDataFormat  
+                                     O2::Headers
+                                     O2::CommonDataFormat
                                      O2::DetectorsCommonDataFormats
-				     O2::Framework
-				     FairMQ::FairMQ)
+                                     O2::Framework
+                                     FairMQ::FairMQ)
 
 o2_target_root_dictionary(DetectorsRaw
                           HEADERS include/DetectorsRaw/RawFileReader.h
-                                  include/DetectorsRaw/RawFileWriter.h
-			          include/DetectorsRaw/SimpleRawReader.h
-				  include/DetectorsRaw/HBFUtils.h
-				  include/DetectorsRaw/RDHUtils.h)
+                          include/DetectorsRaw/RawFileWriter.h
+                          include/DetectorsRaw/SimpleRawReader.h
+                          include/DetectorsRaw/HBFUtils.h
+                          include/DetectorsRaw/RDHUtils.h)
 
-
-			
-o2_add_executable(file-check 
+o2_add_executable(file-check
                   COMPONENT_NAME raw
                   SOURCES src/rawfileCheck.cxx
                   PUBLIC_LINK_LIBRARIES O2::DetectorsRaw
-                                        Boost::program_options)
+                  Boost::program_options)
 
 o2_add_executable(file-reader-workflow
                   COMPONENT_NAME raw
                   SOURCES src/rawfile-reader-workflow.cxx
-     		          src/RawFileReaderWorkflow.cxx
+                  src/RawFileReaderWorkflow.cxx
                   PUBLIC_LINK_LIBRARIES O2::DetectorsRaw)
 
-				      
 o2_add_test(HBFUtils
             PUBLIC_LINK_LIBRARIES O2::DetectorsRaw
                                   O2::Steer
             SOURCES test/testHBFUtils.cxx
             COMPONENT_NAME raw
             LABELS raw)
-	  
+
 o2_add_test(RawReaderWriter
             PUBLIC_LINK_LIBRARIES O2::DetectorsRaw
                                   O2::Steer
             SOURCES test/testRawReaderWriter.cxx
             COMPONENT_NAME raw
             LABELS raw)
-	  
diff --git a/Detectors/TPC/simulation/CMakeLists.txt b/Detectors/TPC/simulation/CMakeLists.txt
index 32b5821e05be0..1420c3ce08cce 100644
--- a/Detectors/TPC/simulation/CMakeLists.txt
+++ b/Detectors/TPC/simulation/CMakeLists.txt
@@ -41,6 +41,11 @@ o2_target_root_dictionary(TPCSimulation
                                   include/TPCSimulation/SAMPAProcessing.h
                                   include/TPCSimulation/SpaceCharge.h)
 
+o2_add_executable(digits-to-rawzs
+                  COMPONENT_NAME tpc
+                  PUBLIC_LINK_LIBRARIES O2::TPCBase O2::SimulationDataFormat O2::GPUTracking O2::DetectorsRaw
+                  SOURCES run/convertDigitsToRawZS.cxx)
+
 o2_data_file(COPY files DESTINATION Detectors/TPC)
 o2_data_file(COPY data  DESTINATION Detectors/TPC/simulation)
 
diff --git a/Detectors/TPC/simulation/run/convertDigitsToRawZS.cxx b/Detectors/TPC/simulation/run/convertDigitsToRawZS.cxx
new file mode 100644
index 0000000000000..07920770d9831
--- /dev/null
+++ b/Detectors/TPC/simulation/run/convertDigitsToRawZS.cxx
@@ -0,0 +1,200 @@
+// Copyright CERN and copyright holders of ALICE O2. This software is
+// distributed under the terms of the GNU General Public License v3 (GPL
+// Version 3), copied verbatim in the file "COPYING".
+//
+// See http://alice-o2.web.cern.ch/license for full licensing information.
+//
+// In applying this license CERN does not waive the privileges and immunities
+// granted to it by virtue of its status as an Intergovernmental Organization
+// or submit itself to any jurisdiction.
+
+/// \file convertDigitsToRawZS.cxx
+/// \author Jens Wiechula (Jens.Wiechula@ikf.uni-frankfurt.de)
+
+#include <boost/program_options.hpp>
+
+#include <string_view>
+#include <memory>
+#include <vector>
+#include <fmt/format.h>
+
+#include "TFile.h"
+#include "TTree.h"
+#include "TROOT.h"
+
+#include "GPUO2Interface.h"
+#include "GPUReconstructionConvert.h"
+#include "GPUHostDataTypes.h"
+#include "GPUParam.h"
+#include "Digit.h"
+
+#include "DetectorsRaw/RawFileWriter.h"
+#include "SimulationDataFormat/MCCompLabel.h"
+#include "TPCBase/Digit.h"
+#include "TPCBase/Sector.h"
+#include "DataFormatsTPC/ZeroSuppression.h"
+#include "DataFormatsTPC/Helpers.h"
+#include "DetectorsRaw/HBFUtils.h"
+
+namespace bpo = boost::program_options;
+
+using namespace o2::tpc;
+using namespace o2::gpu;
+using o2::MCCompLabel;
+
+constexpr static size_t NSectors = o2::tpc::Sector::MAXSECTOR;
+constexpr static size_t NEndpoints = o2::gpu::GPUTrackingInOutZS::NENDPOINTS;
+using DigitArray = std::array<gsl::span<const o2::tpc::Digit>, Sector::MAXSECTOR>;
+using MCLabelContainer = o2::dataformats::MCTruthContainer<MCCompLabel>;
+
+struct ProcessAttributes {
+  std::unique_ptr<unsigned long long int[]> zsoutput;
+  std::vector<unsigned int> sizes;
+  MCLabelContainer mctruthArray;
+  std::unique_ptr<o2::gpu::GPUReconstructionConvert> zsEncoder;
+  std::vector<int> inputIds;
+  bool zs12bit = true;
+  bool verify = false;
+  int verbosity = 1;
+};
+
+void convert(DigitArray& inputDigits, ProcessAttributes* processAttributes, o2::raw::RawFileWriter& writer);
+#include "DetectorsRaw/HBFUtils.h"
+void convertDigitsToZSfinal(std::string_view digitsFile, std::string_view outputPath)
+{
+
+  // ===| open file and get tree |==============================================
+  std::unique_ptr<TFile> o2simDigits(TFile::Open(digitsFile.data()));
+  auto treeSim = (TTree*)o2simDigits->Get("o2sim");
+
+  gROOT->cd();
+
+  // ===| set up branch addresses |=============================================
+  MCLabelContainer* vLabelContainers[Sector::MAXSECTOR];             // label container per sector
+  std::vector<Digit>* vDigitsPerSectorCollection[Sector::MAXSECTOR]; // container that keeps Digits per sector
+
+  for (int iSec = 0; iSec < Sector::MAXSECTOR; ++iSec) {
+    vDigitsPerSectorCollection[iSec] = nullptr;
+    treeSim->SetBranchAddress(TString::Format("TPCDigit_%d", iSec), &vDigitsPerSectorCollection[iSec]);
+
+    vLabelContainers[iSec] = nullptr;
+    treeSim->SetBranchAddress(TString::Format("TPCDigitMCTruth_%d", iSec), &vLabelContainers[iSec]);
+  }
+
+  DigitArray inputDigits;
+  ProcessAttributes attr;
+
+  // raw data output
+  o2::raw::RawFileWriter writer;
+
+  const unsigned int defaultLink = 15;
+
+  // set up raw writer
+  std::string outDir{outputPath};
+  if (outDir.empty()) {
+    outDir = "./";
+  }
+  if (outDir.back() != '/') {
+    outDir += '/';
+  }
+  for (unsigned int i = 0; i < NSectors; i++) {
+    for (unsigned int j = 0; j < NEndpoints; j++) {
+      const unsigned int cruInSector = j / 2;
+      const unsigned int cruID = i * 10 + cruInSector;
+      const unsigned int feeid = (cruID << 7) | ((j & 1) << 6) | (defaultLink & 0x3F);
+      writer.registerLink(feeid, cruID, defaultLink, j % 2, fmt::format("{}cru{}.raw", outDir, cruID));
+    }
+  }
+  for (Long64_t ievent = 0; ievent < treeSim->GetEntries(); ++ievent) {
+    treeSim->GetEntry(ievent);
+
+    for (int iSec = 0; iSec < Sector::MAXSECTOR; ++iSec) {
+      inputDigits[iSec] = *vDigitsPerSectorCollection[iSec]; //????
+    }
+    convert(inputDigits, &attr, writer);
+  }
+  // for further use we write the configuration file for the output
+  writer.writeConfFile("TPC", "RAWDATA", fmt::format("{}tpcraw.cfg", outDir));
+}
+
+void convert(DigitArray& inputDigits, ProcessAttributes* processAttributes, o2::raw::RawFileWriter& writer)
+{
+  auto& zsEncoder = processAttributes->zsEncoder;
+  const auto verify = processAttributes->verify;
+  const auto zs12bit = processAttributes->zs12bit;
+  GPUParam _GPUParam;
+  _GPUParam.SetDefaults(5.00668);
+  const GPUParam mGPUParam = _GPUParam;
+
+  std::vector<deprecated::PackedDigit> gpuDigits[NSectors];
+  GPUTrackingInOutDigits gpuDigitsMap;
+
+  //convert to GPU digits
+  const float zsThreshold = 0;
+  for (int i = 0; i < NSectors; i++) {
+    const auto& d = inputDigits[i];
+    gpuDigits[i].reserve(d.size());
+    for (int j = 0; j < d.size(); j++) {
+      if (d[j].getChargeFloat() >= zsThreshold) {
+        gpuDigits[i].emplace_back(
+          deprecated::PackedDigit{
+            d[j].getChargeFloat(),
+            (Timestamp)d[j].getTimeStamp(),
+            (Pad)d[j].getPad(),
+            (Row)d[j].getRow()});
+      }
+    }
+
+    gpuDigitsMap.tpcDigits[i] = gpuDigits[i].data();
+    gpuDigitsMap.nTPCDigits[i] = gpuDigits[i].size();
+  }
+
+  const GPUTrackingInOutDigits gpuDigitsMap2 = std::move(gpuDigitsMap);
+  o2::InteractionRecord ir = o2::raw::HBFUtils::Instance().getFirstIR();
+
+  zsEncoder->RunZSEncoder(&gpuDigitsMap, nullptr, nullptr, &writer, &ir, mGPUParam, zs12bit, verify);
+}
+
+int main(int argc, char** argv)
+{
+  bpo::variables_map vm;
+  bpo::options_description opt_general("Usage:\n  " + std::string(argv[0]) +
+                                       " <cmds/options>\n"
+                                       "  Tool will convert simulation digits to raw zero suppressed data\n"
+                                       "Commands / Options");
+  bpo::options_description opt_hidden("");
+  bpo::options_description opt_all;
+  bpo::positional_options_description opt_pos;
+
+  try {
+    auto add_option = opt_general.add_options();
+    add_option("help,h", "Print this help message");
+    add_option("verbose,v", bpo::value<uint32_t>()->default_value(0), "Select verbosity level [0 = no output]");
+    add_option("input-file,i", bpo::value<std::string>()->required(), "Specifies input file.");
+    add_option("output-dir,o", bpo::value<std::string>()->default_value("./"), "Specify output directory");
+
+    opt_all.add(opt_general).add(opt_hidden);
+    bpo::store(bpo::command_line_parser(argc, argv).options(opt_all).positional(opt_pos).run(), vm);
+
+    if (vm.count("help") || argc == 1) {
+      std::cout << opt_general << std::endl;
+      exit(0);
+    }
+
+    bpo::notify(vm);
+  } catch (bpo::error& e) {
+    std::cerr << "ERROR: " << e.what() << std::endl
+              << std::endl;
+    std::cerr << opt_general << std::endl;
+    exit(1);
+  } catch (std::exception& e) {
+    std::cerr << e.what() << ", application will now exit" << std::endl;
+    exit(2);
+  }
+
+  convertDigitsToZSfinal(
+    vm["input-file"].as<std::string>(),
+    vm["output-dir"].as<std::string>());
+
+  return 0;
+}
diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h
index d93d2bcf522d3..732a3ad1cfb11 100644
--- a/Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h
+++ b/Detectors/TPC/workflow/include/TPCWorkflow/CATrackerSpec.h
@@ -22,7 +22,7 @@ namespace tpc
 
 /// create a processor spec
 /// read simulated TPC clusters from file and publish
-framework::DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector<int> const& inputIds);
+framework::DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, bool zsDecoder, std::vector<int> const& inputIds);
 
 } // end namespace tpc
 } // end namespace o2
diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/RecoWorkflow.h b/Detectors/TPC/workflow/include/TPCWorkflow/RecoWorkflow.h
index d94998f1c53d9..cbbbea43dc277 100644
--- a/Detectors/TPC/workflow/include/TPCWorkflow/RecoWorkflow.h
+++ b/Detectors/TPC/workflow/include/TPCWorkflow/RecoWorkflow.h
@@ -28,13 +28,14 @@ namespace tpc
 namespace reco_workflow
 {
 /// define input and output types of the workflow
-enum struct InputType { Digitizer, // directly read digits from channel {TPC:DIGITS}
-                        Digits,    // read digits from file
-                        Raw,       // read hardware clusters in raw page format from file
-                        Clusters,  // read native clusters from file
+enum struct InputType { Digitizer,        // directly read digits from channel {TPC:DIGITS}
+                        Digits,           // read digits from file
+                        ClustersHardware, // read hardware clusters in raw page format from file
+                        Clusters,         // read native clusters from file
+                        ZSRaw,
 };
 enum struct OutputType { Digits,
-                         Raw,
+                         ClustersHardware,
                          Clusters,
                          Tracks,
                          DisableWriter,
diff --git a/Detectors/TPC/workflow/src/CATrackerSpec.cxx b/Detectors/TPC/workflow/src/CATrackerSpec.cxx
index 4229ddf634582..fdf8f1c5feee0 100644
--- a/Detectors/TPC/workflow/src/CATrackerSpec.cxx
+++ b/Detectors/TPC/workflow/src/CATrackerSpec.cxx
@@ -26,11 +26,14 @@
 #include "DataFormatsTPC/ClusterNative.h"
 #include "DataFormatsTPC/ClusterNativeHelper.h"
 #include "DataFormatsTPC/Helpers.h"
+#include "DataFormatsTPC/ZeroSuppression.h"
 #include "TPCReconstruction/GPUCATracking.h"
 #include "TPCReconstruction/TPCFastTransformHelperO2.h"
 #include "TPCBase/Digit.h"
 #include "TPCFastTransform.h"
+#include "DPLUtils/DPLRawParser.h"
 #include "DetectorsBase/MatLayerCylSet.h"
+#include "DetectorsRaw/HBFUtils.h"
 #include "GPUO2InterfaceConfiguration.h"
 #include "GPUDisplayBackend.h"
 #ifdef GPUCA_BUILD_EVENT_DISPLAY
@@ -57,7 +60,7 @@ namespace o2
 namespace tpc
 {
 
-DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector<int> const& inputIds)
+DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, bool zsDecoder, std::vector<int> const& inputIds)
 {
   constexpr static size_t NSectors = o2::tpc::Sector::MAXSECTOR;
   using ClusterGroupParser = o2::algorithm::ForwardParser<o2::tpc::ClusterGroupHeader>;
@@ -73,12 +76,19 @@ DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector
     std::unique_ptr<o2::tpc::GPUCATracking> tracker;
     std::unique_ptr<o2::gpu::GPUDisplayBackend> displayBackend;
     std::unique_ptr<TPCFastTransform> fastTransform;
+    std::vector<std::unique_ptr<unsigned char[]>> bufferCache;
+    unsigned int tpcZSmessagesReceived = 0;
+    o2::gpu::GPUTrackingInOutZS tpcZS;
+    std::vector<const void*> tpcZSmetaPointers[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS];
+    std::vector<unsigned int> tpcZSmetaSizes[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS];
+    const void** tpcZSmetaPointers2[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS];
+    const unsigned int* tpcZSmetaSizes2[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS];
     int verbosity = 1;
     std::vector<int> inputIds;
     bool readyToQuit = false;
   };
 
-  auto initFunction = [processMC, caClusterer, inputIds](InitContext& ic) {
+  auto initFunction = [processMC, caClusterer, zsDecoder, inputIds](InitContext& ic) {
     auto options = ic.options().get<std::string>("tracker-options");
 
     auto processAttributes = std::make_shared<ProcessAttributes>();
@@ -255,7 +265,7 @@ DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector
       processAttributes->validMcInputs.reset();
     }
 
-    auto processingFct = [processAttributes, processMC, caClusterer](ProcessingContext& pc) {
+    auto processingFct = [processAttributes, processMC, caClusterer, zsDecoder](ProcessingContext& pc) {
       if (processAttributes->readyToQuit) {
         return;
       }
@@ -267,6 +277,7 @@ DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector
       // FIXME cleanup almost duplicated code
       auto& validMcInputs = processAttributes->validMcInputs;
       auto& mcInputs = processAttributes->mcInputs;
+      std::array<gsl::span<const char>, NSectors> inputs;
       std::array<gsl::span<const o2::tpc::Digit>, NSectors> inputDigits;
       std::array<std::unique_ptr<const MCLabelContainer>, NSectors> inputDigitsMC;
       if (processMC) {
@@ -365,92 +376,171 @@ DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector
         processAttributes->readyToQuit = true;
         return;
       }
-      auto printInputLog = [&verbosity, &validInputs, &activeSectors](auto& r, const char* comment, auto& s) {
-        if (verbosity > 1) {
-          LOG(INFO) << comment << " " << *(r.spec) << ", size " << DataRefUtils::getPayloadSize(r) //
-                    << " for sector " << s                                                         //
-                    << std::endl                                                                   //
-                    << "  input status:   " << validInputs                                         //
-                    << std::endl                                                                   //
-                    << "  active sectors: " << std::bitset<NSectors>(activeSectors);               //
+
+      if (zsDecoder) {
+        std::vector<InputSpec> filter = {{"check", ConcreteDataTypeMatcher{gDataOriginTPC, "RAWDATA"}, Lifetime::Timeframe}};
+        for (auto const& ref : InputRecordWalker(pc.inputs(), filter)) {
+          const o2::header::DataHeader* dh = DataRefUtils::getHeader<o2::header::DataHeader*>(ref);
+          const gsl::span<const char> raw = pc.inputs().get<gsl::span<char>>(ref);
+          o2::framework::RawParser parser(raw.data(), raw.size());
+
+          const unsigned char* ptr = nullptr;
+          int count = 0;
+          int lastFEE = -1;
+          int rawcru = 0;
+          int rawendpoint = 0;
+          size_t totalSize = 0;
+          for (auto it = parser.begin(); it != parser.end(); it++) {
+            const unsigned char* current = it.raw();
+            const o2::header::RAWDataHeader* rdh = (const o2::header::RAWDataHeader*)current;
+            if (current == nullptr || it.size() == 0 || (current - ptr) % TPCZSHDR::TPC_ZS_PAGE_SIZE || rdh->feeId != lastFEE) {
+              if (count) {
+                unsigned char* cache = processAttributes->bufferCache.emplace_back(new unsigned char[totalSize]).get();
+                memcpy(cache, ptr, totalSize);
+                processAttributes->tpcZSmetaPointers[rawcru / 10][(rawcru % 10) * 2 + rawendpoint].emplace_back(cache);
+                processAttributes->tpcZSmetaSizes[rawcru / 10][(rawcru % 10) * 2 + rawendpoint].emplace_back(count);
+              }
+              count = 0;
+              if (it.size() == 0) {
+                ptr = nullptr;
+                continue;
+              }
+              lastFEE = rdh->feeId;
+              rawcru = lastFEE >> 7;
+              rawendpoint = (lastFEE & 64) >> 6;
+              ptr = current;
+            }
+            totalSize = current - ptr + sizeof(o2::header::RAWDataHeader) + it.size();
+            count++;
+          }
+          if (count) {
+            unsigned char* cache = processAttributes->bufferCache.emplace_back(new unsigned char[totalSize]).get();
+            memcpy(cache, ptr, totalSize);
+            processAttributes->tpcZSmetaPointers[rawcru / 10][(rawcru % 10) * 2 + rawendpoint].emplace_back(cache);
+            processAttributes->tpcZSmetaSizes[rawcru / 10][(rawcru % 10) * 2 + rawendpoint].emplace_back(count);
+          }
         }
-      };
-      auto& bufferedInputs = processAttributes->bufferedInputs;
-      if (activeSectors == 0 || (activeSectors & validInputs.to_ulong()) != activeSectors ||
-          (processMC && (activeSectors & validMcInputs.to_ulong()) != activeSectors)) {
-        // not all sectors available, we have to buffer the inputs
-        if (caClusterer) {
-          throw std::runtime_error("Buffering not possible with digits");
+        if (++(processAttributes->tpcZSmessagesReceived) != GPUTrackingInOutZS::NSLICES * GPUTrackingInOutZS::NENDPOINTS) {
+          return; // Didn't receive the full TF and all links yet, continue caching
         }
-        for (auto const& refentry : datarefs) {
-          auto& sector = refentry.first;
-          auto& ref = refentry.second;
-          auto payloadSize = DataRefUtils::getPayloadSize(ref);
-          bufferedInputs[sector].resize(payloadSize);
-          std::copy(ref.payload, ref.payload + payloadSize, bufferedInputs[sector].begin());
-          printInputLog(ref, "buffering", sector);
+        int totalCount = 0;
+        for (unsigned int i = 0; i < GPUTrackingInOutZS::NSLICES; i++) {
+          for (unsigned int j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
+            processAttributes->tpcZSmetaPointers2[i][j] = processAttributes->tpcZSmetaPointers[i][j].data();
+            processAttributes->tpcZSmetaSizes2[i][j] = processAttributes->tpcZSmetaSizes[i][j].data();
+            processAttributes->tpcZS.slice[i].zsPtr[j] = processAttributes->tpcZSmetaPointers2[i][j];
+            processAttributes->tpcZS.slice[i].nZSPtr[j] = processAttributes->tpcZSmetaSizes2[i][j];
+            processAttributes->tpcZS.slice[i].count[j] = processAttributes->tpcZSmetaPointers[i][j].size();
+            totalCount += processAttributes->tpcZSmetaPointers[i][j].size();
+          }
         }
 
-        // not needed to send something, DPL will simply drop this timeslice, whenever the
-        // data for all sectors is available, the output is sent in that time slice
-        return;
-      }
-      assert(processMC == false || validMcInputs == validInputs);
-      std::array<gsl::span<const char>, NSectors> inputs;
-      auto inputStatus = validInputs;
-      for (auto const& refentry : datarefs) {
-        auto& sector = refentry.first;
-        auto& ref = refentry.second;
-        inputs[sector] = gsl::span(ref.payload, DataRefUtils::getPayloadSize(ref));
-        inputStatus.reset(sector);
-        printInputLog(ref, "received", sector);
-      }
-      if (inputStatus.any()) {
-        // some of the inputs have been buffered
-        for (size_t sector = 0; sector < inputStatus.size(); ++sector) {
-          if (inputStatus.test(sector)) {
-            inputs[sector] = gsl::span(&bufferedInputs[sector].front(), bufferedInputs[sector].size());
+        /*DPLRawParser parser(pc.inputs(), filter);
+        for (auto it = parser.begin(), end = parser.end(); it != end; ++it) {
+          // retrieving RDH v4
+          auto const* rdh = it.get_if<o2::header::RAWDataHeaderV4>();
+          // retrieving the raw pointer of the page
+          auto const* raw = it.raw();
+          // retrieving payload pointer of the page
+          auto const* payload = it.data();
+          // size of payload
+          size_t payloadSize = it.size();
+          // offset of payload in the raw page
+          size_t offset = it.offset();
+          const auto* dh = it.o2DataHeader();
+          unsigned long subspec = dh->subSpecification;
+          printf("Test: rdh %p, raw %p, payload %p, payloadSize %lld, offset %lld, %s %s %lld\n", rdh, raw, payload, (long long int)payloadSize, (long long int)offset, dh->dataOrigin.as<std::string>().c_str(), dh->dataDescription.as<std::string>().c_str(), (long long int)dh->subSpecification);
+        }*/
+
+      } else {
+        // FIXME: We can have digits input in zs decoder mode for MC labels
+        // This code path should run optionally also for the zs decoder version
+        auto printInputLog = [&verbosity, &validInputs, &activeSectors](auto& r, const char* comment, auto& s) {
+          if (verbosity > 1) {
+            LOG(INFO) << comment << " " << *(r.spec) << ", size " << DataRefUtils::getPayloadSize(r) //
+                      << " for sector " << s                                                         //
+                      << std::endl                                                                   //
+                      << "  input status:   " << validInputs                                         //
+                      << std::endl                                                                   //
+                      << "  active sectors: " << std::bitset<NSectors>(activeSectors);               //
+          }
+        };
+        auto& bufferedInputs = processAttributes->bufferedInputs;
+        if (activeSectors == 0 || (activeSectors & validInputs.to_ulong()) != activeSectors ||
+            (processMC && (activeSectors & validMcInputs.to_ulong()) != activeSectors)) {
+          // not all sectors available, we have to buffer the inputs
+          if (caClusterer) {
+            throw std::runtime_error("Buffering not possible with digits");
+          }
+          for (auto const& refentry : datarefs) {
+            auto& sector = refentry.first;
+            auto& ref = refentry.second;
+            auto payloadSize = DataRefUtils::getPayloadSize(ref);
+            bufferedInputs[sector].resize(payloadSize);
+            std::copy(ref.payload, ref.payload + payloadSize, bufferedInputs[sector].begin());
+            printInputLog(ref, "buffering", sector);
           }
+
+          // not needed to send something, DPL will simply drop this timeslice, whenever the
+          // data for all sectors is available, the output is sent in that time slice
+          return;
+        }
+        assert(processMC == false || validMcInputs == validInputs);
+        auto inputStatus = validInputs;
+        for (auto const& refentry : datarefs) {
+          auto& sector = refentry.first;
+          auto& ref = refentry.second;
+          inputs[sector] = gsl::span(ref.payload, DataRefUtils::getPayloadSize(ref));
+          inputStatus.reset(sector);
+          printInputLog(ref, "received", sector);
         }
-      }
-      if (verbosity > 0) {
         if (inputStatus.any()) {
-          LOG(INFO) << "using buffered data for " << inputStatus.count() << " sector(s)";
+          // some of the inputs have been buffered
+          for (size_t sector = 0; sector < inputStatus.size(); ++sector) {
+            if (inputStatus.test(sector)) {
+              inputs[sector] = gsl::span(&bufferedInputs[sector].front(), bufferedInputs[sector].size());
+            }
+          }
         }
-        // make human readable information from the bitfield
-        std::string bitInfo;
-        auto nActiveBits = validInputs.count();
-        if (((uint64_t)0x1 << nActiveBits) == validInputs.to_ulong() + 1) {
-          // sectors 0 to some upper bound are active
-          bitInfo = "0-" + std::to_string(nActiveBits - 1);
-        } else {
-          int rangeStart = -1;
-          int rangeEnd = -1;
-          for (size_t sector = 0; sector < validInputs.size(); sector++) {
-            if (validInputs.test(sector)) {
-              if (rangeStart < 0) {
-                if (rangeEnd >= 0) {
-                  bitInfo += ",";
+        if (verbosity > 0) {
+          if (inputStatus.any()) {
+            LOG(INFO) << "using buffered data for " << inputStatus.count() << " sector(s)";
+          }
+          // make human readable information from the bitfield
+          std::string bitInfo;
+          auto nActiveBits = validInputs.count();
+          if (((uint64_t)0x1 << nActiveBits) == validInputs.to_ulong() + 1) {
+            // sectors 0 to some upper bound are active
+            bitInfo = "0-" + std::to_string(nActiveBits - 1);
+          } else {
+            int rangeStart = -1;
+            int rangeEnd = -1;
+            for (size_t sector = 0; sector < validInputs.size(); sector++) {
+              if (validInputs.test(sector)) {
+                if (rangeStart < 0) {
+                  if (rangeEnd >= 0) {
+                    bitInfo += ",";
+                  }
+                  bitInfo += std::to_string(sector);
+                  if (nActiveBits == 1) {
+                    break;
+                  }
+                  rangeStart = sector;
                 }
-                bitInfo += std::to_string(sector);
-                if (nActiveBits == 1) {
-                  break;
+                rangeEnd = sector;
+              } else {
+                if (rangeStart >= 0 && rangeEnd > rangeStart) {
+                  bitInfo += "-" + std::to_string(rangeEnd);
                 }
-                rangeStart = sector;
+                rangeStart = -1;
               }
-              rangeEnd = sector;
-            } else {
-              if (rangeStart >= 0 && rangeEnd > rangeStart) {
-                bitInfo += "-" + std::to_string(rangeEnd);
-              }
-              rangeStart = -1;
+            }
+            if (rangeStart >= 0 && rangeEnd > rangeStart) {
+              bitInfo += "-" + std::to_string(rangeEnd);
             }
           }
-          if (rangeStart >= 0 && rangeEnd > rangeStart) {
-            bitInfo += "-" + std::to_string(rangeEnd);
-          }
+          LOG(INFO) << "running tracking for sector(s) " << bitInfo;
         }
-        LOG(INFO) << "running tracking for sector(s) " << bitInfo;
       }
 
       std::vector<TrackTPC> tracks;
@@ -463,11 +553,19 @@ DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector
       ptrs.outputTracks = &tracks;
       ptrs.outputClusRefs = &clusRefs;
       ptrs.outputTracksMCTruth = (processMC ? &tracksMCTruth : nullptr);
+      o2::InteractionRecord ir = raw::HBFUtils::Instance().getFirstIR();
       if (caClusterer) {
-        // Todo: If we have zero-suppressed input, we have to fill this pointer instead: ptrs.tpcZS
-        ptrs.o2Digits = &inputDigits; // TODO: We will also create ClusterNative as output stored in ptrs. Should be added to the output
-        if (processMC) {
-          ptrs.o2DigitsMC = &inputDigitsMC;
+        if (zsDecoder) {
+          processAttributes->tpcZS.ir = &ir;
+          ptrs.tpcZS = &processAttributes->tpcZS;
+          if (processMC) {
+            throw std::runtime_error("Cannot process MC information, none available"); // In fact, passing in MC data with ZS TPC Raw is not yet available
+          }
+        } else {
+          ptrs.o2Digits = &inputDigits; // TODO: We will also create ClusterNative as output stored in ptrs. Should be added to the output
+          if (processMC) {
+            ptrs.o2DigitsMC = &inputDigitsMC;
+          }
         }
       } else {
         memset(&clusterIndex, 0, sizeof(clusterIndex));
@@ -495,6 +593,14 @@ DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector
       //std::vector<o2::tpc::ClusterNative> clusterBuffer; // std::vector that will hold the actual clusters, clustersNativeDecoded will point inside here
       //mDecoder.decompress(clustersCompressed, clustersNativeDecoded, clusterBuffer, param); // Run decompressor
 
+      processAttributes->bufferCache.clear();
+      processAttributes->tpcZSmessagesReceived = 0;
+      for (unsigned int i = 0; i < GPUTrackingInOutZS::NSLICES; i++) {
+        for (unsigned int j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
+          processAttributes->tpcZSmetaPointers[i][j].clear();
+          processAttributes->tpcZSmetaSizes[i][j].clear();
+        }
+      }
       validInputs.reset();
       if (processMC) {
         validMcInputs.reset();
@@ -511,29 +617,39 @@ DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector
   // changing the binding name of the input in order to identify inputs by unique labels
   // in the processing. Think about how the processing can be made agnostic of input size,
   // e.g. by providing a span of inputs under a certain label
-  auto createInputSpecs = [inputIds](bool makeMcInput, bool caClusterer) {
+  auto createInputSpecs = [inputIds](bool makeMcInput, bool caClusterer, bool zsDecoder) {
     Inputs inputs;
     if (caClusterer) {
-      inputs.emplace_back(InputSpec{"input", gDataOriginTPC, "DIGITS", 0, Lifetime::Timeframe});
+      // We accept digits and MC labels also if we run on ZS Raw data, since they are needed for MC label propagation
+      if (!zsDecoder) { // FIXME: We can have digits input in zs decoder mode for MC labels, to be made optional
+        inputs.emplace_back(InputSpec{"input", gDataOriginTPC, "DIGITS", 0, Lifetime::Timeframe});
+      }
     } else {
       inputs.emplace_back(InputSpec{"input", gDataOriginTPC, "CLUSTERNATIVE", 0, Lifetime::Timeframe});
     }
     if (makeMcInput) {
       if (caClusterer) {
         constexpr o2::header::DataDescription datadesc("DIGITSMCTR");
-        inputs.emplace_back(InputSpec{"mclblin", gDataOriginTPC, datadesc, 0, Lifetime::Timeframe});
+        if (!zsDecoder) { // FIXME: We can have digits input in zs decoder mode for MC labels, to be made optional
+          inputs.emplace_back(InputSpec{"mclblin", gDataOriginTPC, datadesc, 0, Lifetime::Timeframe});
+        }
       } else {
         inputs.emplace_back(InputSpec{"mclblin", gDataOriginTPC, "CLNATIVEMCLBL", 0, Lifetime::Timeframe});
       }
     }
 
-    return std::move(mergeInputs(inputs, inputIds.size(),
-                                 [inputIds](InputSpec& input, size_t index) {
-                                   // using unique input names for the moment but want to find
-                                   // an input-multiplicity-agnostic way of processing
-                                   input.binding += std::to_string(inputIds[index]);
-                                   DataSpecUtils::updateMatchingSubspec(input, inputIds[index]);
-                                 }));
+    auto tmp = std::move(mergeInputs(inputs, inputIds.size(),
+                                     [inputIds](InputSpec& input, size_t index) {
+                                       // using unique input names for the moment but want to find
+                                       // an input-multiplicity-agnostic way of processing
+                                       input.binding += std::to_string(inputIds[index]);
+                                       DataSpecUtils::updateMatchingSubspec(input, inputIds[index]);
+                                     }));
+    if (zsDecoder) {
+      // We add this after the mergeInputs, since we need to keep the subspecification
+      tmp.emplace_back(InputSpec{"zsraw", ConcreteDataTypeMatcher{"TPC", "RAWDATA"}, Lifetime::Timeframe});
+    }
+    return tmp;
   };
 
   auto createOutputSpecs = [](bool makeMcOutput) {
@@ -550,7 +666,7 @@ DataProcessorSpec getCATrackerSpec(bool processMC, bool caClusterer, std::vector
   };
 
   return DataProcessorSpec{"tpc-tracker", // process id
-                           {createInputSpecs(processMC, caClusterer)},
+                           {createInputSpecs(processMC, caClusterer, zsDecoder)},
                            {createOutputSpecs(processMC)},
                            AlgorithmSpec(initFunction),
                            Options{
diff --git a/Detectors/TPC/workflow/src/RecoWorkflow.cxx b/Detectors/TPC/workflow/src/RecoWorkflow.cxx
index 75df021ae63d8..f890f3b5828a1 100644
--- a/Detectors/TPC/workflow/src/RecoWorkflow.cxx
+++ b/Detectors/TPC/workflow/src/RecoWorkflow.cxx
@@ -56,13 +56,14 @@ using BranchDefinition = MakeRootTreeWriterSpec::BranchDefinition<T>;
 const std::unordered_map<std::string, InputType> InputMap{
   {"digitizer", InputType::Digitizer},
   {"digits", InputType::Digits},
-  {"raw", InputType::Raw},
+  {"clustershardware", InputType::ClustersHardware},
   {"clusters", InputType::Clusters},
+  {"zsraw", InputType::ZSRaw},
 };
 
 const std::unordered_map<std::string, OutputType> OutputMap{
   {"digits", OutputType::Digits},
-  {"raw", OutputType::Raw},
+  {"clustershardware", OutputType::ClustersHardware},
   {"clusters", OutputType::Clusters},
   {"tracks", OutputType::Tracks},
   {"disable-writer", OutputType::DisableWriter},
@@ -89,18 +90,24 @@ framework::WorkflowSpec getWorkflow(std::vector<int> const& tpcSectors, std::vec
     return std::find(outputTypes.begin(), outputTypes.end(), type) != outputTypes.end();
   };
 
-  if (inputType == InputType::Raw && isEnabled(OutputType::Digits)) { // TODO: We should rename Raw to
-    throw std::invalid_argument("input/output type mismatch, can not produce 'digits' from 'raw'");
+  if (inputType == InputType::ClustersHardware && isEnabled(OutputType::Digits)) {
+    throw std::invalid_argument("input/output type mismatch, can not produce 'digits' from 'clustershardware'");
   }
-  if (inputType == InputType::Clusters && (isEnabled(OutputType::Digits) || isEnabled(OutputType::Raw))) {
-    throw std::invalid_argument("input/output type mismatch, can not produce 'digits', nor 'raw' from 'clusters'");
+  if (inputType == InputType::Clusters && (isEnabled(OutputType::Digits) || isEnabled(OutputType::ClustersHardware))) {
+    throw std::invalid_argument("input/output type mismatch, can not produce 'digits', nor 'clustershardware' from 'clusters'");
+  }
+  if (inputType == InputType::ZSRaw && (isEnabled(OutputType::Clusters) || isEnabled(OutputType::Digits) || isEnabled(OutputType::ClustersHardware))) {
+    throw std::invalid_argument("input/output type mismatch, can not produce 'digits', 'clusters' nor 'clustershardware' from 'zsraw'");
   }
 
-  if (caClusterer && (inputType == InputType::Clusters || inputType == InputType::Raw)) {
+  if (inputType == InputType::ZSRaw && !caClusterer) {
+    throw std::invalid_argument("zsraw input needs caclusterer");
+  }
+  if (caClusterer && (inputType == InputType::Clusters || inputType == InputType::ClustersHardware)) {
     throw std::invalid_argument("ca-clusterer requires digits as input");
   }
-  if (caClusterer && (isEnabled(OutputType::Clusters) || isEnabled(OutputType::Raw))) {
-    throw std::invalid_argument("ca-clusterer cannot produce Clusters or Raw output");
+  if (caClusterer && (isEnabled(OutputType::Clusters) || isEnabled(OutputType::ClustersHardware))) {
+    throw std::invalid_argument("ca-clusterer cannot produce clusters or clustershardware output");
   }
 
   WorkflowSpec specs;
@@ -122,11 +129,11 @@ framework::WorkflowSpec getWorkflow(std::vector<int> const& tpcSectors, std::vec
                                                          laneConfiguration,
                                                        },
                                                        propagateMC));
-  } else if (inputType == InputType::Raw) {
+  } else if (inputType == InputType::ClustersHardware) {
     specs.emplace_back(o2::tpc::getPublisherSpec(PublisherConf{
-                                                   "tpc-raw-cluster-reader",
-                                                   "tpcraw",
-                                                   {"databranch", "TPCClusterHw", "Branch with TPC raw clusters"},
+                                                   "tpc-clusterhardware-reader",
+                                                   "tpcclustershardware",
+                                                   {"databranch", "TPCClusterHw", "Branch with TPC ClustersHardware"},
                                                    {"mcbranch", "TPCClusterHwMCTruth", "MC label branch"},
                                                    OutputSpec{"TPC", "CLUSTERHW"},
                                                    OutputSpec{"TPC", "CLUSTERHWMCLBL"},
@@ -151,11 +158,12 @@ framework::WorkflowSpec getWorkflow(std::vector<int> const& tpcSectors, std::vec
   // output matrix
   bool runTracker = isEnabled(OutputType::Tracks);
   bool runDecoder = !caClusterer && (runTracker || isEnabled(OutputType::Clusters));
-  bool runClusterer = !caClusterer && (runDecoder || isEnabled(OutputType::Raw));
+  bool runClusterer = !caClusterer && (runDecoder || isEnabled(OutputType::ClustersHardware));
+  bool zsDecoder = inputType == InputType::ZSRaw;
 
   // input matrix
   runClusterer &= inputType == InputType::Digitizer || inputType == InputType::Digits;
-  runDecoder &= runClusterer || inputType == InputType::Raw;
+  runDecoder &= runClusterer || inputType == InputType::ClustersHardware;
   runTracker &= caClusterer || (runDecoder || inputType == InputType::Clusters);
 
   WorkflowSpec parallelProcessors;
@@ -299,14 +307,14 @@ framework::WorkflowSpec getWorkflow(std::vector<int> const& tpcSectors, std::vec
 
   //////////////////////////////////////////////////////////////////////////////////////////////
   //
-  // a writer process for raw hardware clusters
+  // a writer process for hardware clusters
   //
-  // selected by output type 'raw'
-  if (isEnabled(OutputType::Raw) && !isEnabled(OutputType::DisableWriter)) {
+  // selected by output type 'clustershardware'
+  if (isEnabled(OutputType::ClustersHardware) && !isEnabled(OutputType::DisableWriter)) {
     using MCLabelContainer = o2::dataformats::MCTruthContainer<o2::MCCompLabel>;
-    specs.push_back(makeWriterSpec("tpc-raw-cluster-writer",
-                                   inputType == InputType::Raw ? "tpc-filtered-raw-clusters.root" : "tpc-raw-clusters.root",
-                                   "tpcraw",
+    specs.push_back(makeWriterSpec("tpc-clusterhardware-writer",
+                                   inputType == InputType::ClustersHardware ? "tpc-filtered-clustershardware.root" : "tpc-clustershardware.root",
+                                   "tpcclustershardware",
                                    BranchDefinition<const char*>{InputSpec{"data", "TPC", "CLUSTERHW", 0},
                                                                  "TPCClusterHw",
                                                                  "databranch"},
@@ -339,7 +347,7 @@ framework::WorkflowSpec getWorkflow(std::vector<int> const& tpcSectors, std::vec
   //
   // selected by output type 'tracks'
   if (runTracker) {
-    specs.emplace_back(o2::tpc::getCATrackerSpec(propagateMC, caClusterer, laneConfiguration));
+    specs.emplace_back(o2::tpc::getCATrackerSpec(propagateMC, caClusterer, zsDecoder, laneConfiguration));
   }
 
   //////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/Detectors/TPC/workflow/src/tpc-reco-workflow.cxx b/Detectors/TPC/workflow/src/tpc-reco-workflow.cxx
index f6fe8af433ed6..5c5c533a7956b 100644
--- a/Detectors/TPC/workflow/src/tpc-reco-workflow.cxx
+++ b/Detectors/TPC/workflow/src/tpc-reco-workflow.cxx
@@ -40,8 +40,8 @@ void customize(std::vector<o2::framework::ConfigParamSpec>& workflowOptions)
   using namespace o2::framework;
 
   std::vector<ConfigParamSpec> options{
-    {"input-type", VariantType::String, "digits", {"digitizer, digits, raw, clusters"}},
-    {"output-type", VariantType::String, "tracks", {"digits, raw, clusters, tracks, disable-writer"}},
+    {"input-type", VariantType::String, "digits", {"digitizer, digits, clustershw, clustersnative, zsraw"}},
+    {"output-type", VariantType::String, "tracks", {"digits, clustershw, clustersnative, tracks, disable-writer"}},
     {"ca-clusterer", VariantType::Bool, false, {"Use clusterer of GPUCATracking"}},
     {"disable-mc", VariantType::Bool, false, {"disable sending of MC information"}},
     {"tpc-sectors", VariantType::String, "0-35", {"TPC sector range, e.g. 5-7,8,9"}},
@@ -90,11 +90,11 @@ using namespace o2::framework;
 /// and contains the following default processors
 /// - digit reader
 /// - clusterer
-/// - cluster raw decoder
+/// - ClusterHardware Decoder
 /// - CA tracker
 ///
 /// The default workflow can be customized by specifying input and output types
-/// e.g. digits, raw, tracks.
+/// e.g. digits, clustershw, tracks.
 ///
 /// MC info is processed by default, disabled by using command line option `--disable-mc`
 ///
@@ -123,10 +123,12 @@ WorkflowSpec defineDataProcessing(ConfigContext const& cfgc)
     // trigger and all messages will be sent out together at end of computation
   } else if (inputType == "digits") {
     gDispatchTrigger = o2::framework::Output{"TPC", "DIGITS"};
-  } else if (inputType == "raw") {
+  } else if (inputType == "clustershw") {
     gDispatchTrigger = o2::framework::Output{"TPC", "CLUSTERHW"};
-  } else if (inputType == "clusters") {
+  } else if (inputType == "clustersnative") {
     gDispatchTrigger = o2::framework::Output{"TPC", "CLUSTERNATIVE"};
+  } else if (inputType == "zsraw") {
+    gDispatchTrigger = o2::framework::Output{"TPC", "RAWDATA"};
   }
   // set up configuration
   o2::conf::ConfigurableParam::updateFromFile(cfgc.options().get<std::string>("configFile"));
diff --git a/GPU/Common/GPUDefGPUParameters.h b/GPU/Common/GPUDefGPUParameters.h
index 84ca7bc2bdc6a..a0b2da2a39dc6 100644
--- a/GPU/Common/GPUDefGPUParameters.h
+++ b/GPU/Common/GPUDefGPUParameters.h
@@ -43,6 +43,7 @@
   #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 6
   #define GPUCA_CONSTRUCTOR_IN_PIPELINE 0
   #define GPUCA_SELECTOR_IN_PIPELINE 0
+  #define GPUCA_NO_ATOMIC_PRECHECK 1
 #elif defined(GPUCA_GPUTYPE_TURING)
   #define GPUCA_WARP_SIZE 32
   #define GPUCA_MINBLOCK_COUNT_CONSTRUCTOR 1
@@ -50,23 +51,25 @@
   #define GPUCA_MINBLOCK_COUNT_HITSSORTER 1
   #define GPUCA_MINBLOCK_COUNT_FINDER 1
   #define GPUCA_MINBLOCK_COUNT_DECODE 4
+  #define GPUCA_MINBLOCK_COUNT_FIT 1
   #define GPUCA_THREAD_COUNT 512
   #define GPUCA_THREAD_COUNT_HITSSORTER 512
   #define GPUCA_THREAD_COUNT_HITSFINDER 512
-  #define GPUCA_THREAD_COUNT_CONSTRUCTOR 512
+  #define GPUCA_THREAD_COUNT_CONSTRUCTOR 384
   #define GPUCA_THREAD_COUNT_SELECTOR 512
-  #define GPUCA_THREAD_COUNT_FINDER 512
+  #define GPUCA_THREAD_COUNT_FINDER 640
   #define GPUCA_THREAD_COUNT_CLEANER 512
   #define GPUCA_THREAD_COUNT_CFDECODE 96
-  #define GPUCA_THREAD_COUNT_FIT 512
+  #define GPUCA_THREAD_COUNT_FIT 256
   #define GPUCA_THREAD_COUNT_COMPRESSION1 128
   #define GPUCA_THREAD_COUNT_COMPRESSION2 448
   #define GPUCA_THREAD_COUNT_CLUSTERER 512
-  #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
+  #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4
   #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20
-  #define GPUCA_CONSTRUCTOR_IN_PIPELINE 0
+  #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1
   #define GPUCA_SELECTOR_IN_PIPELINE 0
   #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1
+  #define GPUCA_NO_ATOMIC_PRECHECK 1
   // #define GPUCA_USE_TEXTURES
 #elif defined(GPUCA_GPUTYPE_OPENCL)
 #elif defined(GPUCA_GPUCODE)
@@ -90,6 +93,9 @@
   #ifndef GPUCA_MINBLOCK_COUNT_HITSSORTER
   #define GPUCA_MINBLOCK_COUNT_HITSSORTER 1
   #endif
+  #ifndef GPUCA_MINBLOCK_COUNT_FIT
+  #define GPUCA_MINBLOCK_COUNT_FIT 1
+  #endif
   #ifndef GPUCA_THREAD_COUNT
   #define GPUCA_THREAD_COUNT 256
   #endif
diff --git a/GPU/GPUTracking/Base/GPUDataTypes.h b/GPU/GPUTracking/Base/GPUDataTypes.h
index af265592a3c4c..9f8f0eace7b31 100644
--- a/GPU/GPUTracking/Base/GPUDataTypes.h
+++ b/GPU/GPUTracking/Base/GPUDataTypes.h
@@ -44,6 +44,7 @@ using CompressedClusters = CompressedClustersPtrs_helper<CompressedClustersCount
 
 namespace o2
 {
+struct InteractionRecord;
 class MCCompLabel;
 namespace base
 {
@@ -191,6 +192,7 @@ struct GPUTrackingInOutZS {
     unsigned int n[NSLICES][NENDPOINTS];
   };
   GPUTrackingInOutZSSlice slice[NSLICES];
+  o2::InteractionRecord* ir;
 };
 
 struct GPUTrackingInOutDigits {
diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
index c3fd87161c0a0..4f4059969fd44 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
+++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx
@@ -11,6 +11,11 @@
 /// \file GPUReconstructionConvert.cxx
 /// \author David Rohr
 
+#ifdef GPUCA_O2_LIB
+#include "DetectorsRaw/HBFUtils.h"
+#include "DetectorsRaw/RawFileWriter.h"
+#endif
+
 #include "GPUReconstructionConvert.h"
 #include "TPCFastTransform.h"
 #include "GPUTPCClusterData.h"
@@ -24,7 +29,9 @@
 #ifdef HAVE_O2HEADERS
 #include "Digit.h"
 #include "DataFormatsTPC/ZeroSuppression.h"
+#include "DataFormatsTPC/Constants.h"
 #include "Headers/RAWDataHeader.h"
+#include "CommonConstants/LHCConstants.h"
 #endif
 
 using namespace GPUCA_NAMESPACE::gpu;
@@ -180,8 +187,13 @@ void GPUReconstructionConvert::ZSstreamOut(unsigned short* bufIn, unsigned int&
   lenIn = 0;
 }
 
-void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, std::unique_ptr<unsigned long long int[]>& outBuffer, unsigned int* outSizes, const GPUParam& param, bool zs12bit, bool verify)
+void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, std::unique_ptr<unsigned long long int[]>* outBuffer, unsigned int* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, bool zs12bit, bool verify)
 {
+  // Pass in either outBuffer / outSizes, to fill standalone output buffers, or raw / ir to use RawFileWriter
+  // ir is the interaction record for time bin 0
+  if (((outBuffer == nullptr) ^ (outSizes == nullptr)) || ((raw == nullptr) ^ (ir == nullptr)) || !((outBuffer == nullptr) ^ (raw == nullptr)) || (raw && verify)) {
+    throw std::runtime_error("Invalid parameters");
+  }
 #ifdef GPUCA_TPC_GEOMETRY_O2
   std::vector<std::array<long long int, TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(long long int)>> buffer[NSLICES][GPUTrackingInOutZS::NENDPOINTS];
   unsigned int totalPages = 0;
@@ -192,6 +204,17 @@ void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, st
 #pragma omp parallel for reduction(+ : totalPages) reduction(+ : nErrors)
   // clang-format on
   for (unsigned int i = 0; i < NSLICES; i++) {
+    std::array<long long int, TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(long long int)> singleBuffer;
+#ifdef GPUCA_O2_LIB
+    int rawlnk = 15;
+    int bcShiftInFirstHBF = ir ? ir->bc : 0;
+#else
+    int bcShiftInFirstHBF = 0;
+#endif
+    int rawcru = 0;
+    int rawendpoint = 0;
+    (void)(rawcru + rawendpoint); // avoid compiler warning
+
     std::vector<deprecated::PackedDigit> tmpBuffer;
     std::array<unsigned short, TPCZSHDR::TPC_ZS_PAGE_SIZE> streamBuffer;
     std::array<unsigned char, TPCZSHDR::TPC_ZS_PAGE_SIZE> streamBuffer8;
@@ -214,6 +237,7 @@ void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, st
       return a.pad <= b.pad;
     });
     int lastEndpoint = -1, lastRow = GPUCA_ROW_COUNT, lastTime = -1;
+    long long int hbf = -1, nexthbf = 0;
     std::array<long long int, TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(long long int)>* page = nullptr;
     TPCZSHDR* hdr = nullptr;
     TPCZSTBHDR* tbHdr = nullptr;
@@ -259,6 +283,12 @@ void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, st
         }
         //sizeChk += ((seqLen + streamSizeChk) * encodeBits + 7) / 8;
         //printf("Endpoint %d (%d), Pos %d, Chk %d, Len %d, rows %d, StreamSize %d %d, time %d (%d), row %d (%d), pad %d\n", endpoint, lastEndpoint, (int) (pagePtr - reinterpret_cast<unsigned char*>(page)), sizeChk, seqLen, nRowsInTB, streamSize8, streamSize, (int) tmpBuffer[k].time, lastTime, (int) tmpBuffer[k].row, lastRow, tmpBuffer[k].pad);
+        if (tmpBuffer[k].time != lastTime) {
+          nexthbf = (bcShiftInFirstHBF + tmpBuffer[k].time * Constants::LHCBCPERTIMEBIN) / o2::constants::lhc::LHCMaxBunches;
+          if (hbf != nexthbf) {
+            lastEndpoint = -1;
+          }
+        }
       }
       if (k >= tmpBuffer.size() || endpoint != lastEndpoint || tmpBuffer[k].time != lastTime) {
         if (pagePtr != reinterpret_cast<unsigned char*>(page)) {
@@ -273,14 +303,31 @@ void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, st
             tbHdr->rowAddr1()[l - 1] += 2 * nRowsInTB;
           }
         }
+        if (page && (k >= tmpBuffer.size() || endpoint != lastEndpoint)) {
+#ifdef GPUCA_O2_LIB
+          if (raw) {
+            const int rawfeeid = (rawcru << 7) | (rawendpoint << 6) | rawlnk;
+            raw->addData(rawfeeid, rawcru, rawlnk, rawendpoint, *ir + hbf * o2::constants::lhc::LHCMaxBunches, gsl::span<char>((char*)page + sizeof(o2::header::RAWDataHeader), (char*)page + TPCZSHDR::TPC_ZS_PAGE_SIZE), true);
+          } else
+#endif
+          {
+            o2::header::RAWDataHeader* rdh = (o2::header::RAWDataHeader*)page;
+            rdh->heartbeatBC = bcShiftInFirstHBF;
+            rdh->heartbeatOrbit = hbf;
+          }
+        }
         if (k >= tmpBuffer.size()) {
           break;
         }
       }
       if (endpoint != lastEndpoint) {
-        buffer[i][endpoint].emplace_back();
-        totalPages++;
-        page = &buffer[i][endpoint].back();
+        if (raw) {
+          page = &singleBuffer;
+        } else {
+          buffer[i][endpoint].emplace_back();
+          page = &buffer[i][endpoint].back();
+        }
+        hbf = nexthbf;
         pagePtr = reinterpret_cast<unsigned char*>(page);
         std::fill(page->begin(), page->end(), 0);
         pagePtr += sizeof(o2::header::RAWDataHeader);
@@ -288,10 +335,13 @@ void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, st
         pagePtr += sizeof(*hdr);
         hdr->version = zs12bit ? 2 : 1;
         hdr->cruID = i * 10 + region;
-        hdr->timeOffset = tmpBuffer[k].time;
+        rawcru = i * 10 + region;
+        rawendpoint = endpoint & 1;
+        hdr->timeOffset = tmpBuffer[k].time - (hbf * o2::constants::lhc::LHCMaxBunches + Constants::LHCBCPERTIMEBIN - 1 - bcShiftInFirstHBF) / Constants::LHCBCPERTIMEBIN;
         lastTime = -1;
         tbHdr = nullptr;
         lastEndpoint = endpoint;
+        totalPages++;
       }
       if (tmpBuffer[k].time != lastTime) {
         if (lastTime != -1) {
@@ -337,6 +387,7 @@ void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, st
         for (unsigned int k = 0; k < buffer[i][j].size(); k++) {
           page = &buffer[i][j][k];
           pagePtr = reinterpret_cast<unsigned char*>(page);
+          const o2::header::RAWDataHeader* rdh = (const o2::header::RAWDataHeader*)pagePtr;
           pagePtr += sizeof(o2::header::RAWDataHeader);
           hdr = reinterpret_cast<TPCZSHDR*>(pagePtr);
           pagePtr += sizeof(*hdr);
@@ -359,6 +410,7 @@ void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, st
           int nRowsRegion = param.tpcGeometry.GetRegionRows(region);
 
           int timeBin = hdr->timeOffset;
+          timeBin += (rdh->heartbeatOrbit * o2::constants::lhc::LHCMaxBunches + Constants::LHCBCPERTIMEBIN - 1 - bcShiftInFirstHBF) / Constants::LHCBCPERTIMEBIN;
           for (int l = 0; l < hdr->nTimeBins; l++) {
             if ((pagePtr - reinterpret_cast<unsigned char*>(page)) & 1) {
               pagePtr++;
@@ -419,13 +471,15 @@ void GPUReconstructionConvert::RunZSEncoder(const GPUTrackingInOutDigits* in, st
     }
   }
 
-  outBuffer.reset(new unsigned long long int[totalPages * TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(unsigned long long int)]);
-  unsigned long long int offset = 0;
-  for (unsigned int i = 0; i < NSLICES; i++) {
-    for (unsigned int j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
-      memcpy((char*)outBuffer.get() + offset, buffer[i][j].data(), buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE);
-      offset += buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE;
-      outSizes[i * GPUTrackingInOutZS::NENDPOINTS + j] = buffer[i][j].size();
+  if (outBuffer) {
+    outBuffer->reset(new unsigned long long int[totalPages * TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(unsigned long long int)]);
+    unsigned long long int offset = 0;
+    for (unsigned int i = 0; i < NSLICES; i++) {
+      for (unsigned int j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) {
+        memcpy((char*)outBuffer->get() + offset, buffer[i][j].data(), buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE);
+        offset += buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE;
+        outSizes[i * GPUTrackingInOutZS::NENDPOINTS + j] = buffer[i][j].size();
+      }
     }
   }
   if (nErrors) {
diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.h b/GPU/GPUTracking/Base/GPUReconstructionConvert.h
index ddf04687a1dd2..7f5bd7b120830 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionConvert.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.h
@@ -19,11 +19,16 @@
 
 namespace o2
 {
+struct InteractionRecord;
 namespace tpc
 {
 struct ClusterNative;
 struct ClusterNativeAccess;
 } // namespace tpc
+namespace raw
+{
+class RawFileWriter;
+} // namespace raw
 } // namespace o2
 
 class AliHLTTPCRawCluster;
@@ -48,7 +53,7 @@ class GPUReconstructionConvert
   constexpr static unsigned int NSLICES = GPUCA_NSLICES;
   static void ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr<GPUTPCClusterData[]>* clusters, unsigned int* nClusters, const TPCFastTransform* transform, int continuousMaxTimeBin = 0);
   static void ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr<o2::tpc::ClusterNative[]>& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, unsigned int* nRawClusters);
-  static void RunZSEncoder(const GPUTrackingInOutDigits* in, std::unique_ptr<unsigned long long int[]>& outBuffer, unsigned int* outSizes, const GPUParam& param, bool zs12bit, bool verify);
+  static void RunZSEncoder(const GPUTrackingInOutDigits* in, std::unique_ptr<unsigned long long int[]>* outBuffer, unsigned int* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, bool zs12bit, bool verify);
   static void RunZSEncoderCreateMeta(const unsigned long long int* buffer, const unsigned int* sizes, void** ptrs, GPUTrackingInOutZS* out);
   static void RunZSFilter(std::unique_ptr<deprecated::PackedDigit[]>* buffers, const deprecated::PackedDigit* const* ptrs, size_t* nsb, const size_t* ns, const GPUParam& param, bool zs12bit);
   static int GetMaxTimeBin(const o2::tpc::ClusterNativeAccess& native);
diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
index 8926ab21a9ed2..57b3865a189e8 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
@@ -34,11 +34,16 @@
 #ifndef GPUCA_KRNL_REG
 #define GPUCA_KRNL_REG(...)
 #endif
+#ifndef GPUCA_KRNL_CUSTOM
+#define GPUCA_KRNL_CUSTOM(...)
+#endif
 #ifndef GPUCA_KRNL_BACKEND_XARGS
 #define GPUCA_KRNL_BACKEND_XARGS
 #endif
 #define GPUCA_ATTRRES_REG(reg, num, ...) GPUCA_KRNL_REG(num) GPUCA_ATTRRES2(__VA_ARGS__)
 #define GPUCA_ATTRRES2_REG(reg, num, ...) GPUCA_KRNL_REG(num) GPUCA_ATTRRES3(__VA_ARGS__)
+#define GPUCA_ATTRRES_CUSTOM(custom, args, ...) GPUCA_KRNL_CUSTOM(args) GPUCA_ATTRRES2(__VA_ARGS__)
+#define GPUCA_ATTRRES2_CUSTOM(custom, args, ...) GPUCA_KRNL_CUSTOM(args) GPUCA_ATTRRES3(__VA_ARGS__)
 #define GPUCA_ATTRRES_NONE(...)
 #define GPUCA_ATTRRES2_NONE(...)
 #define GPUCA_ATTRRES_(...)
diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h
index 0002038a71c33..7460e3981821a 100644
--- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h
+++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h
@@ -24,7 +24,7 @@ GPUCA_KRNL((GPUTPCTrackletConstructor, allSlices         ), (single, REG, (GPUCA
 GPUCA_KRNL((GPUTPCTrackletSelector                       ), (both, REG, (GPUCA_THREAD_COUNT_SELECTOR, GPUCA_MINBLOCK_COUNT_SELECTOR)), (), ())
 GPUCA_KRNL((GPUMemClean16                                ), (simple, REG, (GPUCA_THREAD_COUNT, 1)), (, GPUPtr1(void*, ptr), unsigned long size), (, GPUPtr2(void*, ptr), size))
 #if !defined(GPUCA_OPENCL1) && (!defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE))
-GPUCA_KRNL((GPUTPCGMMergerTrackFit                       ), (simple, REG, (GPUCA_THREAD_COUNT_FIT, 1)), (, int mode), (, mode))
+GPUCA_KRNL((GPUTPCGMMergerTrackFit                       ), (simple, REG, (GPUCA_THREAD_COUNT_FIT, GPUCA_MINBLOCK_COUNT_FIT)), (, int mode), (, mode))
 #ifdef HAVE_O2HEADERS
 GPUCA_KRNL((GPUTRDTrackerKernels                         ), (simple, REG, (GPUCA_THREAD_COUNT_TRD, 1)), (), ())
 GPUCA_KRNL((GPUITSFitterKernel                           ), (simple, REG, (GPUCA_THREAD_COUNT_ITS, 1)), (), ())
@@ -46,7 +46,7 @@ GPUCA_KRNL((GPUTPCCFStreamCompaction,   nativeScanUp     ), (single, REG, (GPUCA
 GPUCA_KRNL((GPUTPCCFStreamCompaction,   nativeScanTop    ), (single, REG, (GPUCA_THREAD_COUNT_SCAN, 1)), (, int iBuf, int nElems), (, iBuf, nElems))
 GPUCA_KRNL((GPUTPCCFStreamCompaction,   nativeScanDown   ), (single, REG, (GPUCA_THREAD_COUNT_SCAN, 1)), (, int iBuf, unsigned int offset, int nElems), (, iBuf, offset, nElems))
 GPUCA_KRNL((GPUTPCCFStreamCompaction,   compact          ), (single, REG, (GPUCA_THREAD_COUNT_SCAN, 1)), (, int iBuf, int stage, GPUPtr1(ChargePos*, in), GPUPtr1(ChargePos*, out)), (, iBuf, stage, GPUPtr2(ChargePos*, in), GPUPtr2(ChargePos*, out)))
-GPUCA_KRNL((GPUTPCCFDecodeZS                             ), (single, REG, (GPUCA_THREAD_COUNT_CFDECODE, GPUCA_MINBLOCK_COUNT_DECODE)), (), ())
+GPUCA_KRNL((GPUTPCCFDecodeZS                             ), (single, REG, (GPUCA_THREAD_COUNT_CFDECODE, GPUCA_MINBLOCK_COUNT_DECODE)), (, int bcShiftInFirstHBF), (, bcShiftInFirstHBF))
 #endif
 #endif
 // clang-format on
diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIP.hip.cxx b/GPU/GPUTracking/Base/hip/GPUReconstructionHIP.hip.cxx
index d3b796f37664c..27d3fe0c5bdb3 100644
--- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIP.hip.cxx
+++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIP.hip.cxx
@@ -91,6 +91,8 @@ GPUg() void runKernelHIP(GPUCA_CONSMEM_PTR int iSlice, Args... args)
 
 #undef GPUCA_KRNL_REG
 #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args))
+#undef GPUCA_KRNL_CUSTOM
+#define GPUCA_KRNL_CUSTOM(args) GPUCA_M_STRIP(args)
 #undef GPUCA_KRNL_BACKEND_XARGS
 #define GPUCA_KRNL_BACKEND_XARGS hipEvent_t *start, hipEvent_t *stop,
 #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward) GPUCA_KRNL_WRAP(GPUCA_KRNL_, x_class, x_attributes, x_arguments, x_forward)
diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt
index 8eff814b5e8ef..37d2e105f86d2 100644
--- a/GPU/GPUTracking/CMakeLists.txt
+++ b/GPU/GPUTracking/CMakeLists.txt
@@ -242,6 +242,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2")
                                        O2::TRDBase
                                        O2::ITStracking
                                        O2::TPCFastTransformation
+                                       O2::DetectorsRaw
                                        ${DEBUGGUI_TARGET}
                  PUBLIC_INCLUDE_DIRECTORIES SliceTracker
                                             Base
diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx
index 3bcd0906b1420..b2c00b4826c50 100644
--- a/GPU/GPUTracking/Global/GPUChainTracking.cxx
+++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx
@@ -11,6 +11,9 @@
 /// \file GPUChainTracking.cxx
 /// \author David Rohr
 
+#ifdef GPUCA_O2_LIB
+#include "CommonDataFormat/InteractionRecord.h"
+#endif
 #ifdef HAVE_O2HEADERS
 #include "SimulationDataFormat/MCCompLabel.h"
 #include "SimulationDataFormat/MCTruthContainer.h"
@@ -38,9 +41,9 @@
 #include "GPUReconstructionConvert.h"
 #include "GPUMemorySizeScalers.h"
 #include "GPUTrackingInputProvider.h"
-#include "Digit.h"
 
 #ifdef HAVE_O2HEADERS
+#include "Digit.h"
 #include "GPUTPCClusterStatistics.h"
 #include "DataFormatsTPC/ZeroSuppression.h"
 #include "Headers/RAWDataHeader.h"
@@ -61,6 +64,7 @@ using namespace o2::trd;
 
 GPUChainTracking::GPUChainTracking(GPUReconstruction* rec, unsigned int maxTPCHits, unsigned int maxTRDTracklets) : GPUChain(rec), mIOPtrs(processors()->ioPtrs), mInputsHost(new GPUTrackingInputProvider), mInputsShadow(new GPUTrackingInputProvider), mClusterNativeAccess(new ClusterNativeAccess), mMaxTPCHits(maxTPCHits), mMaxTRDTracklets(maxTRDTracklets)
 {
+  ClearIOPointers();
   mFlatObjectsShadow.mChainTracking = this;
   mFlatObjectsDevice.mChainTracking = this;
 }
@@ -465,7 +469,7 @@ void GPUChainTracking::AllocateIOMemory()
     AllocateIOMemoryHelper(mIOPtrs.nSliceOutClusters[i], mIOPtrs.sliceOutClusters[i], mIOMem.sliceOutClusters[i]);
   }
   AllocateIOMemoryHelper(mClusterNativeAccess->nClustersTotal, mClusterNativeAccess->clustersLinear, mIOMem.clustersNative);
-  mIOPtrs.clustersNative = mClusterNativeAccess.get();
+  mIOPtrs.clustersNative = mClusterNativeAccess->nClustersTotal ? mClusterNativeAccess.get() : nullptr;
   AllocateIOMemoryHelper(mIOPtrs.nMCLabelsTPC, mIOPtrs.mcLabelsTPC, mIOMem.mcLabelsTPC);
   AllocateIOMemoryHelper(mIOPtrs.nMCInfosTPC, mIOPtrs.mcInfosTPC, mIOMem.mcInfosTPC);
   AllocateIOMemoryHelper(mIOPtrs.nMergedTracks, mIOPtrs.mergedTracks, mIOMem.mergedTracks);
@@ -571,7 +575,7 @@ void GPUChainTracking::ConvertZSEncoder(bool zs12bit)
   mTPCZSSizes.reset(new unsigned int[NSLICES * GPUTrackingInOutZS::NENDPOINTS]);
   mTPCZSPtrs.reset(new void*[NSLICES * GPUTrackingInOutZS::NENDPOINTS]);
   mTPCZS.reset(new GPUTrackingInOutZS);
-  GPUReconstructionConvert::RunZSEncoder(mIOPtrs.tpcPackedDigits, mTPCZSBuffer, mTPCZSSizes.get(), param(), zs12bit, true);
+  GPUReconstructionConvert::RunZSEncoder(mIOPtrs.tpcPackedDigits, &mTPCZSBuffer, mTPCZSSizes.get(), nullptr, nullptr, param(), zs12bit, true);
   GPUReconstructionConvert::RunZSEncoderCreateMeta(mTPCZSBuffer.get(), mTPCZSSizes.get(), mTPCZSPtrs.get(), mTPCZS.get());
   mIOPtrs.tpcZS = mTPCZS.get();
   if (GetDeviceProcessingSettings().registerStandaloneInputMemory) {
@@ -870,7 +874,12 @@ int GPUChainTracking::RunTPCClusterizer()
       }
 
       if (mIOPtrs.tpcZS) {
-        runKernel<GPUTPCCFDecodeZS, GPUTPCCFDecodeZS::decodeZS>({doGPU ? clusterer.mPmemory->counters.nPages : GPUTrackingInOutZS::NENDPOINTS, CFDecodeThreadCount(), lane}, {iSlice}, {});
+#ifdef GPUCA_O2_LIB
+        int bcShiftInFirstHBF = mIOPtrs.tpcZS->ir ? mIOPtrs.tpcZS->ir->bc : 0;
+#else
+        int bcShiftInFirstHBF = 0;
+#endif
+        runKernel<GPUTPCCFDecodeZS, GPUTPCCFDecodeZS::decodeZS>({doGPU ? clusterer.mPmemory->counters.nPages : GPUTrackingInOutZS::NENDPOINTS, CFDecodeThreadCount(), lane}, {iSlice}, {}, bcShiftInFirstHBF);
         TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane);
         SynchronizeStream(lane);
       } else {
diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h
index 6eb886b7c2e5f..0c96f77cef113 100644
--- a/GPU/GPUTracking/Global/GPUChainTracking.h
+++ b/GPU/GPUTracking/Global/GPUChainTracking.h
@@ -208,7 +208,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega
   std::unique_ptr<unsigned long long int[]> mTPCZSBuffer;             // Memory to store TPC ZS pages
   std::unique_ptr<unsigned int[]> mTPCZSSizes;                        // Array with TPC ZS numbers of pages
   std::unique_ptr<void*[]> mTPCZSPtrs;                                // Array with pointers to TPC ZS pages
-  std::unique_ptr<o2::gpu::GPUTrackingInOutZS> mTPCZS;                // TPC ZS Data Structure
+  std::unique_ptr<GPUTrackingInOutZS> mTPCZS;                         // TPC ZS Data Structure
 
   // Upper bounds for memory allocation
   unsigned int mMaxTPCHits;
diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
index 93e153970d69d..2644b5953ddfc 100644
--- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
+++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx
@@ -472,28 +472,34 @@ GPUd() void GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict
   row.Grid().GetBinArea(Y, Z + zOffset, tube, tube, bin, ny, nz);
   float sy2 = tube * tube, sz2 = tube * tube;
 
+  const int nBinsY = row.Grid().Ny();
+  const int idOffset = tracker.Data().ClusterIdOffset();
+  const int* ids = &(tracker.Data().ClusterDataIndex()[row.HitNumberOffset()]);
+  unsigned int myWeight = Merger->TrackOrderAttach()[iTrack] | GPUTPCGMMerger::attachAttached | GPUTPCGMMerger::attachTube;
+  GPUAtomic(unsigned int)* const weights = Merger->ClusterAttachment();
+  if (goodLeg) {
+    myWeight |= GPUTPCGMMerger::attachGoodLeg;
+  }
   for (int k = 0; k <= nz; k++) {
-    int nBinsY = row.Grid().Ny();
-    int mybin = bin + k * nBinsY;
-    unsigned int hitFst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin);
-    unsigned int hitLst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin + ny + 1);
+    const int mybin = bin + k * nBinsY;
+    const unsigned int hitFst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin);
+    const unsigned int hitLst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin + ny + 1);
     for (unsigned int ih = hitFst; ih < hitLst; ih++) {
-      cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih);
-      int id = tracker.Data().ClusterIdOffset() + tracker.Data().ClusterDataIndex(row, ih);
-      GPUAtomic(unsigned int) * GPUrestrict() weight = &Merger->ClusterAttachment()[id];
-      if (*weight & GPUTPCGMMerger::attachGood) {
+      int id = idOffset + ids[ih];
+      GPUAtomic(unsigned int) * GPUrestrict() const weight = weights + id;
+      ;
+#if !defined(GPUCA_NO_ATOMIC_PRECHECK) && GPUCA_NO_ATOMIC_PRECHECK < 1
+      if (myWeight <= *weight) {
         continue;
       }
-      float y = y0 + hh.x * stepY;
-      float z = z0 + hh.y * stepZ;
-      float dy = y - Y;
-      float dz = z - Z;
+#endif
+      const cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih);
+      const float y = y0 + hh.x * stepY;
+      const float z = z0 + hh.y * stepZ;
+      const float dy = y - Y;
+      const float dz = z - Z;
       if (dy * dy < sy2 && dz * dz < sz2) {
         // CADEBUG(printf("Found Y %f Z %f\n", y, z));
-        int myWeight = Merger->TrackOrderAttach()[iTrack] | GPUTPCGMMerger::attachAttached | GPUTPCGMMerger::attachTube;
-        if (goodLeg) {
-          myWeight |= GPUTPCGMMerger::attachGoodLeg;
-        }
         CAMath::AtomicMax(weight, myWeight);
       }
     }
diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h
index 096fdb4fa7f33..66734b77d31c3 100644
--- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h
+++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h
@@ -124,6 +124,7 @@ class GPUTPCSliceData
  */
   MEM_TEMPLATE()
   GPUhd() int ClusterDataIndex(const MEM_TYPE(GPUTPCRow) & row, unsigned int hitIndex) const;
+  GPUd() GPUglobalref() const int* ClusterDataIndex() const { return mClusterDataIndex; }
 
   /**
  * Return the row object for the given row index.
diff --git a/GPU/GPUTracking/Standalone/display/GPUDisplayKeys.cxx b/GPU/GPUTracking/Standalone/display/GPUDisplayKeys.cxx
index bcc9c6b9e0dfa..b2d3b9cc7fe53 100644
--- a/GPU/GPUTracking/Standalone/display/GPUDisplayKeys.cxx
+++ b/GPU/GPUTracking/Standalone/display/GPUDisplayKeys.cxx
@@ -359,7 +359,7 @@ void GPUDisplay::HandleKeyRelease(unsigned char key)
     FILE* ftmp = fopen("glpos.tmp", "w+b");
     if (ftmp) {
       int retval = fwrite(&mViewMatrix, sizeof(mViewMatrix), 1, ftmp);
-      if (retval != 16) {
+      if (retval != 1) {
         GPUError("Error writing position to file");
       } else {
         GPUInfo("Position stored to file");
diff --git a/GPU/GPUTracking/Standalone/standalone.cxx b/GPU/GPUTracking/Standalone/standalone.cxx
index 0ad660c001da7..4bd56545a4f45 100644
--- a/GPU/GPUTracking/Standalone/standalone.cxx
+++ b/GPU/GPUTracking/Standalone/standalone.cxx
@@ -567,7 +567,7 @@ int main(int argc, char** argv)
           }
         }
 
-        if (configStandalone.overrideMaxTimebin && (chainTracking->mIOPtrs.clustersNative || chainTracking->mIOPtrs.tpcPackedDigits)) {
+        if (configStandalone.overrideMaxTimebin && (chainTracking->mIOPtrs.clustersNative || chainTracking->mIOPtrs.tpcPackedDigits || chainTracking->mIOPtrs.tpcZS)) {
           GPUSettingsEvent ev = rec->GetEventSettings();
           ev.continuousMaxTimeBin = chainTracking->mIOPtrs.tpcZS ? GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.tpcZS) : chainTracking->mIOPtrs.tpcPackedDigits ? GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.tpcPackedDigits) : GPUReconstructionConvert::GetMaxTimeBin(*chainTracking->mIOPtrs.clustersNative);
           rec->UpdateEventSettings(&ev);
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx
index c1f32bdfdff81..b498b2a06a4ab 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx
@@ -33,15 +33,15 @@ GPUdii() void GPUTPCCFClusterizer::Thread<GPUTPCCFClusterizer::computeClusters>(
   GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, chargeMap, clusterer.mPfilteredPeakPositions, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterer.mPclusterByRow);
 }
 
-GPUd() void GPUTPCCFClusterizer::computeClustersImpl(int nBlocks, int nThreads, int iBlock, int iThread,
-                                                     GPUSharedMemory& smem,
-                                                     const Array2D<PackedCharge>& chargeMap,
-                                                     const ChargePos* filteredPeakPositions,
-                                                     MCLabelAccumulator* labelAcc,
-                                                     uint clusternum,
-                                                     uint maxClusterPerRow,
-                                                     uint* clusterInRow,
-                                                     tpc::ClusterNative* clusterByRow)
+GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int nBlocks, int nThreads, int iBlock, int iThread,
+                                                       GPUSharedMemory& smem,
+                                                       const Array2D<PackedCharge>& chargeMap,
+                                                       const ChargePos* filteredPeakPositions,
+                                                       MCLabelAccumulator* labelAcc,
+                                                       uint clusternum,
+                                                       uint maxClusterPerRow,
+                                                       uint* clusterInRow,
+                                                       tpc::ClusterNative* clusterByRow)
 {
   uint idx = get_global_id(0);
 
@@ -148,7 +148,7 @@ GPUd() void GPUTPCCFClusterizer::addLine(
   }
 }
 
-GPUd() void GPUTPCCFClusterizer::updateClusterScratchpadInner(
+GPUdii() void GPUTPCCFClusterizer::updateClusterScratchpadInner(
   ushort lid,
   ushort N,
   const PackedCharge* buf,
@@ -178,7 +178,7 @@ GPUd() void GPUTPCCFClusterizer::updateClusterScratchpadInner(
   GPUbarrier();
 }
 
-GPUd() void GPUTPCCFClusterizer::updateClusterScratchpadOuter(
+GPUdii() void GPUTPCCFClusterizer::updateClusterScratchpadOuter(
   ushort lid,
   ushort N,
   ushort M,
@@ -202,7 +202,7 @@ GPUd() void GPUTPCCFClusterizer::updateClusterScratchpadOuter(
   }
 }
 
-GPUd() void GPUTPCCFClusterizer::buildClusterScratchPad(
+GPUdii() void GPUTPCCFClusterizer::buildClusterScratchPad(
   const Array2D<PackedCharge>& chargeMap,
   ChargePos pos,
   ChargePos* posBcast,
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
index 3cc7d724d59c9..c5bd140ca1423 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
@@ -17,6 +17,7 @@
 #include "Array2D.h"
 #include "PackedCharge.h"
 #include "DataFormatsTPC/ZeroSuppression.h"
+#include "CommonConstants/LHCConstants.h"
 
 #ifndef __OPENCL__
 #include "Headers/RAWDataHeader.h"
@@ -26,7 +27,10 @@ namespace o2
 namespace header
 {
 struct RAWDataHeader {
-  unsigned int words[16];
+  union {
+    unsigned int words[16];
+    int heartbeatOrbit;
+  };
 };
 } // namespace header
 } // namespace o2
@@ -37,12 +41,12 @@ using namespace GPUCA_NAMESPACE::gpu;
 using namespace o2::tpc;
 
 template <>
-GPUdii() void GPUTPCCFDecodeZS::Thread<GPUTPCCFDecodeZS::decodeZS>(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer)
+GPUdii() void GPUTPCCFDecodeZS::Thread<GPUTPCCFDecodeZS::decodeZS>(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer, int bcShiftInFirstHBF)
 {
-  GPUTPCCFDecodeZS::decode(clusterer, smem, nBlocks, nThreads, iBlock, iThread);
+  GPUTPCCFDecodeZS::decode(clusterer, smem, nBlocks, nThreads, iBlock, iThread, bcShiftInFirstHBF);
 }
 
-GPUd() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int nBlocks, int nThreads, int iBlock, int iThread)
+GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int nBlocks, int nThreads, int iBlock, int iThread, int bcShiftInFirstHBF)
 {
   const unsigned int slice = clusterer.mISlice;
 #ifdef GPUCA_GPUCODE
@@ -87,11 +91,12 @@ GPUd() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUSharedMe
       CA_SHARED_CACHE_REF(&s.ZSPage[0], pageSrc, TPCZSHDR::TPC_ZS_PAGE_SIZE, unsigned int, pageCache);
       GPUbarrier();
       const unsigned char* page = (const unsigned char*)pageCache;
+      const o2::header::RAWDataHeader* rdh = (const o2::header::RAWDataHeader*)page;
       const unsigned char* pagePtr = page + sizeof(o2::header::RAWDataHeader);
       const TPCZSHDR* hdr = reinterpret_cast<const TPCZSHDR*>(pagePtr);
       pagePtr += sizeof(*hdr);
       unsigned int mask = (1 << s.decodeBits) - 1;
-      int timeBin = hdr->timeOffset;
+      int timeBin = hdr->timeOffset + (rdh->heartbeatOrbit * o2::constants::lhc::LHCMaxBunches + Constants::LHCBCPERTIMEBIN - 1 - bcShiftInFirstHBF) / Constants::LHCBCPERTIMEBIN;
       for (int l = 0; l < hdr->nTimeBins; l++) { // TODO: Parallelize over time bins
         pagePtr += (pagePtr - page) & 1; //Ensure 16 bit alignment
         const TPCZSTBHDR* tbHdr = reinterpret_cast<const TPCZSTBHDR*>(pagePtr);
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h
index cce07b9c61ccb..ef49cb3222956 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h
@@ -44,7 +44,7 @@ class GPUTPCCFDecodeZS : public GPUKernelTemplate
     decodeZS,
   };
 
-  static GPUd() void decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int nBlocks, int nThreads, int iBlock, int iThread);
+  static GPUd() void decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int nBlocks, int nThreads, int iBlock, int iThread, int bcShiftInFirstHBF);
 
 #ifdef HAVE_O2HEADERS
   typedef GPUTPCClusterFinder processorType;
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx
index 77f3a6bed4c16..b949ab221d90e 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx
@@ -27,11 +27,11 @@ GPUdii() void GPUTPCCFDeconvolution::Thread<GPUTPCCFDeconvolution::countPeaks>(i
   GPUTPCCFDeconvolution::countPeaksImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, isPeakMap, chargeMap, clusterer.mPpositions, clusterer.mPmemory->counters.nDigits);
 }
 
-GPUd() void GPUTPCCFDeconvolution::countPeaksImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
-                                                  const Array2D<uchar>& peakMap,
-                                                  Array2D<PackedCharge>& chargeMap,
-                                                  const ChargePos* positions,
-                                                  const uint digitnum)
+GPUdii() void GPUTPCCFDeconvolution::countPeaksImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
+                                                    const Array2D<uchar>& peakMap,
+                                                    Array2D<PackedCharge>& chargeMap,
+                                                    const ChargePos* positions,
+                                                    const uint digitnum)
 {
   size_t idx = get_global_id(0);
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
index e2c9e1a9c9de9..1df4c101cba28 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx
@@ -35,12 +35,12 @@ GPUdii() void GPUTPCCFNoiseSuppression::Thread<GPUTPCCFNoiseSuppression::updateP
   GPUTPCCFNoiseSuppression::updatePeaksImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer.mPpeakPositions, clusterer.mPisPeak, clusterer.mPmemory->counters.nPeaks, isPeakMap);
 }
 
-GPUd() void GPUTPCCFNoiseSuppression::noiseSuppressionImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
-                                                           const Array2D<PackedCharge>& chargeMap,
-                                                           const Array2D<uchar>& peakMap,
-                                                           const ChargePos* peakPositions,
-                                                           const uint peaknum,
-                                                           uchar* isPeakPredicate)
+GPUdii() void GPUTPCCFNoiseSuppression::noiseSuppressionImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
+                                                             const Array2D<PackedCharge>& chargeMap,
+                                                             const Array2D<uchar>& peakMap,
+                                                             const ChargePos* peakPositions,
+                                                             const uint peaknum,
+                                                             uchar* isPeakPredicate)
 {
   size_t idx = get_global_id(0);
 
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx
index 1b0ea11ac7284..b8c937d3c0e99 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx
@@ -29,7 +29,7 @@ GPUdii() void GPUTPCCFPeakFinder::Thread<GPUTPCCFPeakFinder::findPeaks>(int nBlo
   findPeaksImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, chargeMap, clusterer.mPpositions, clusterer.mPmemory->counters.nDigits, clusterer.mPisPeak, isPeakMap);
 }
 
-GPUd() bool GPUTPCCFPeakFinder::isPeakScratchPad(
+GPUdii() bool GPUTPCCFPeakFinder::isPeakScratchPad(
   GPUSharedMemory& smem,
   Charge q,
   const ChargePos& pos,
@@ -88,7 +88,7 @@ GPUd() bool GPUTPCCFPeakFinder::isPeakScratchPad(
   return peak;
 }
 
-GPUd() bool GPUTPCCFPeakFinder::isPeak(
+GPUdii() bool GPUTPCCFPeakFinder::isPeak(
   Charge myCharge,
   const ChargePos& pos,
   const Array2D<PackedCharge>& chargeMap)
diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx
index 5a11e2420f194..c19c83a5b8815 100644
--- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx
+++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx
@@ -30,10 +30,10 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::nativeS
   nativeScanUpStartImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, clusterer.mPisPeak, clusterer.mPbuf + (iBuf - 1) * clusterer.mBufSize, clusterer.mPbuf + iBuf * clusterer.mBufSize, nElems);
 }
 
-GPUd() void GPUTPCCFStreamCompaction::nativeScanUpStartImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
-                                                            const uchar* predicate,
-                                                            int* sums,
-                                                            int* incr, int nElems)
+GPUdii() void GPUTPCCFStreamCompaction::nativeScanUpStartImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
+                                                              const uchar* predicate,
+                                                              int* sums,
+                                                              int* incr, int nElems)
 {
   int idx = get_global_id(0);
   int pred = predicate[idx];
@@ -61,9 +61,9 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::nativeS
   nativeScanUpImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, clusterer.mPbuf + (iBuf - 1) * clusterer.mBufSize, clusterer.mPbuf + iBuf * clusterer.mBufSize, nElems);
 }
 
-GPUd() void GPUTPCCFStreamCompaction::nativeScanUpImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
-                                                       int* sums,
-                                                       int* incr, int nElems)
+GPUdii() void GPUTPCCFStreamCompaction::nativeScanUpImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
+                                                         int* sums,
+                                                         int* incr, int nElems)
 {
   int idx = get_global_id(0);
   int scanRes = work_group_scan_inclusive_add((idx < nElems) ? sums[idx] : 0);
@@ -89,7 +89,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::nativeS
   nativeScanTopImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, clusterer.mPbuf + (iBuf - 1) * clusterer.mBufSize, nElems);
 }
 
-GPUd() void GPUTPCCFStreamCompaction::nativeScanTopImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, int* incr, int nElems)
+GPUdii() void GPUTPCCFStreamCompaction::nativeScanTopImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, int* incr, int nElems)
 {
   int idx = get_global_id(0);
 
@@ -105,10 +105,10 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::nativeS
   nativeScanDownImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, clusterer.mPbuf + (iBuf - 1) * clusterer.mBufSize, clusterer.mPbuf + iBuf * clusterer.mBufSize, offset, nElems);
 }
 
-GPUd() void GPUTPCCFStreamCompaction::nativeScanDownImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
-                                                         int* sums,
-                                                         const int* incr,
-                                                         unsigned int offset, int nElems)
+GPUdii() void GPUTPCCFStreamCompaction::nativeScanDownImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
+                                                           int* sums,
+                                                           const int* incr,
+                                                           unsigned int offset, int nElems)
 {
   int gid = get_group_id(0);
   int idx = get_global_id(0) + offset;
@@ -136,14 +136,14 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::compact
   }
 }
 
-GPUd() void GPUTPCCFStreamCompaction::compactImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
-                                                  const ChargePos* in,
-                                                  ChargePos* out,
-                                                  const uchar* predicate,
-                                                  int* newIdx,
-                                                  const int* incr,
-                                                  int nElems,
-                                                  size_t bufferSize)
+GPUdii() void GPUTPCCFStreamCompaction::compactImpl(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem,
+                                                    const ChargePos* in,
+                                                    ChargePos* out,
+                                                    const uchar* predicate,
+                                                    int* newIdx,
+                                                    const int* incr,
+                                                    int nElems,
+                                                    size_t bufferSize)
 {
   int gid = get_group_id(0);
   int idx = get_global_id(0);
@@ -170,7 +170,7 @@ GPUd() void GPUTPCCFStreamCompaction::compactImpl(int nBlocks, int nThreads, int
   }
 }
 
-GPUd() int GPUTPCCFStreamCompaction::compactionElems(processorType& clusterer, int stage)
+GPUdii() int GPUTPCCFStreamCompaction::compactionElems(processorType& clusterer, int stage)
 {
   return (stage) ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nDigits;
 }
diff --git a/cmake/O2AddTestWrapper.cmake b/cmake/O2AddTestWrapper.cmake
index ca2b6d9b27326..df37179f07fb5 100644
--- a/cmake/O2AddTestWrapper.cmake
+++ b/cmake/O2AddTestWrapper.cmake
@@ -81,11 +81,11 @@ function(o2_add_test_wrapper)
     endif()
   endif()
 
-  if("${A_MAX_ATTEMPTS}" GREATER 1)
-    # Warn only for tests where retry has been requested
-    message(
-      WARNING "Test ${testName} will be retried max ${A_MAX_ATTEMPTS} times")
-  endif()
+#  if("${A_MAX_ATTEMPTS}" GREATER 1)
+#    # Warn only for tests where retry has been requested
+#    message(
+#      WARNING "Test ${testName} will be retried max ${A_MAX_ATTEMPTS} times")
+#  endif()
   if(A_NON_FATAL)
     message(WARNING "Failure of test ${testName} will not be fatal")
   endif()