diff --git a/.gitignore b/.gitignore
index 0cce01a..994c73d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+# cpp
 build/
 *out*
 *.o
@@ -5,3 +6,8 @@ build/
 *run_*
 *optim.dat
 main
+
+# Ignore IDE configurations
+.idea
+.vscode
+tags
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..102b81d
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,17 @@
+# Changelog
+All notable high-level changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+## [1.0.1] - 2019.07.09
+### Added
+- This CHANGELOG file.
+- Format project using `clang-project`
+- README contains WIP elements, which will be eliminated after completion
+
+## [1.0.0] - 2019.07.09
+### Added
+- This project as of state of the following publication: TODO
diff --git a/Readme.md b/README.md
similarity index 68%
rename from Readme.md
rename to README.md
index 58c4b2b..e7765df 100644
--- a/Readme.md
+++ b/README.md
@@ -1,19 +1,26 @@
-# Layer-parallel training of deep residual neural networks 
+# Layer-parallel training of deep residual neural networks
 
 This code performs layer-parallel training of deep neural networks of residual type. It utilizes the parallel-in-time software library [XBraid](https://github.com/XBraid/xbraid) to distribute layers of the network to different compute units. Instead of sequential forward and backward propagation through the network, iterative multigrid udpates are performed in parallel to solve for the network propagation and the training simultaneously. See the paper [Guenther et al.](https://arxiv.org/pdf/1812.04352.pdf) for a describtion of the method and all details.
 
 ## Build
 
-The repository includes XBraid as a submodule. To clone both, use either `git clone --recurse-submodules [...]` for Git version >= 2.13, or `git clone [...]` followed by `cd xbraid`, `git submodule init` and `git submodule update` for older Git versions. 
+The repository includes XBraid as a submodule. To clone both, use either `git clone --recurse-submodules [...]` for Git version \>= 2.13, or `git clone [...]` followed by `cd xbraid`, `git submodule init` and `git submodule update` for older Git versions.
 
-Type `make` in the main directory to build both the code and the XBraid library. 
+Type `make` in the main directory to build both the code and the XBraid library.
 
 ## Run
 
-Test cases are located in the 'examples/' subfolder. Each example contains a `*.cfg` that holds configuration options for the current example dataset, the layer-parallelization with XBraid, and the optimization method and parameters. 
+Test cases are located in the 'examples/' subfolder. Each example contains a `*.cfg` that holds configuration options for the current example dataset, the layer-parallelization with XBraid, and the optimization method and parameters.
 
-Run the test cases by callying './main' with the corresponding configuration file, e.g. `./main examples/peaks/peaks.cfg`
+Run the test cases by callying './main' with the corresponding configuration file, e.g. `./main examples/peaks/peaks.cfg`
 
 ## Output
-An optimization history file 'optim.dat' will be flushed to the examples subfolder. 
 
+An optimization history file 'optim.dat' will be flushed to the examples subfolder.
+
+## Contributors
+
+* Stefanie Guenther <guenther5@llnl.gov>
+* Eric C. Cyr <eccyr@sandia.gov>
+* J.B. Schroder <jbschroder@unm.edu>
+* Roland A. Siegbert <roland.siegbert@rwth-aachen.de>
diff --git a/examples/peaks/peaks.cfg b/examples/peaks/peaks.cfg
index fb0af31..542c003 100644
--- a/examples/peaks/peaks.cfg
+++ b/examples/peaks/peaks.cfg
@@ -3,7 +3,7 @@
 ################################
 
 # relative data folder location 
-datafolder = examples/peaks
+datafolder = ./
 # filename of training data feature vectors
 ftrain_ex = features_training.dat
 # filename of training data labels/classes
@@ -35,7 +35,7 @@ nchannels = 8
 # number of layers (including opening layer and classification layer) (nlayer >= 3 !)
 nlayers = 32    
 # final time
-T = 5.0
+T = 1.0
 # Activation function ("tanh" or "ReLu" or "SmoothReLu")
 activation = SmoothReLu
 # Type of network ("dense" the default, or "convolutional")
@@ -47,7 +47,7 @@ type_openlayer = activate
 # factor for scaling initial opening layer weights and bias
 weights_open_init = 1e-3
 # factor for scaling initial weights and bias of intermediate layers
-weights_init = 0e-3
+weights_init = 1e-3
 # factor for scaling initial classification weights and bias 
 weights_class_init = 1e-3
 
@@ -66,7 +66,7 @@ braid_maxlevels = 10
 # minimum allowed coarse time time grid size (values in 10-30 are usually best)
 braid_mincoarse = 10
 # maximum number of iterations
-braid_maxiter = 15
+braid_maxiter = 2
 # absolute tolerance
 braid_abstol = 1e-15
 # absolute adjoint tolerance
@@ -88,13 +88,19 @@ braid_nrelax0 = 0
 # Optimization
 ####################################
 # Type of batch selection ("deterministic" or "stochastic")
+# deterministic:
+# fixes batch size => trains on this one
+#
+# stochastic: uses some (dead) pool
+# batch elements are randomly chosen in each iteration during training
+# smaller batch size makes sense
 batch_type = deterministic
 # Batch size
 nbatch = 5000
 # relaxation param for tikhonov term
 gamma_tik = 1e-7
 # relaxation param for time-derivative term
-gamma_ddt = 1e-7
+gamma_ddt = 1e-5
 # relaxation param for tikhonov term of classification weights 
 gamma_class = 1e-7
 # stepsize selection type ("fixed" or "backtrackingLS" or "oneoverk")
@@ -106,19 +112,19 @@ stepsize_type = backtrackingLS
 # initial stepsize
 stepsize = 1.0
 # maximum number of optimization iterations
-optim_maxiter = 10
+optim_maxiter = 130
 # absolute stopping criterion for the gradient norm
 gtol = 1e-4
 # maximum number of linesearch iterations
-ls_maxiter = 20
+ls_maxiter = 15
 # factor for modifying the stepsize within a linesearch iteration
 ls_factor = 0.5
 # Hessian Approximation ("BFGS", "L-BFGS" or "Identity")
 hessian_approx = L-BFGS
 # number of stages for l-bfgs method 
-lbfgs_stages = 20
+lbfgs_stages = 10
 # level for validation computation: 
 #  -1 = never validate
 #   0 = validate only after optimization finishes. 
 #   1 = validate in each optimization iteration
-validationlevel = 0
+validationlevel = 1
diff --git a/include/braid_wrapper.hpp b/include/braid_wrapper.hpp
index f0465de..5af32a3 100644
--- a/include/braid_wrapper.hpp
+++ b/include/braid_wrapper.hpp
@@ -1,213 +1,179 @@
-#include <stdlib.h>
 #include <stdio.h>
+#include <stdlib.h>
 
-#include "defs.hpp"
 #include "braid.hpp"
+#include "defs.hpp"
 // #include "_braid.h"
-#include "network.hpp"
-#include "layer.hpp"
 #include "dataset.hpp"
+#include "layer.hpp"
+#include "network.hpp"
 #pragma once
 
-/** 
- * Define the state vector at one time-step 
+/**
+ * Define the network state one layer. It contains the transformed data batch in the vector **state, and a pointer to the actual layer. 
+ */
+class myBraidVector {
+ protected:
+  int nbatch;    /* Number of examples */
+  int nchannels; /* Number of channels */
+
+  MyReal **state;   /* Network state at one layer, dimensions: nbatch * nchannels */
+  Layer *layer; /* Pointer to layer information */
+
+  /* Flag that determines if the layer and state have just been received and
+   * thus should be free'd after usage (flag > 0) */
+  MyReal sendflag;
+
+ public:
+  /* Get dimensions */
+  int getnBatch();
+  int getnChannels();
+
+  /* Get Pointer to the state at example exampleID */
+  MyReal *getState(int exampleID);
+
+  /* Get pointer to the full state matrix */
+  MyReal **getState();
+
+  /* Get and set pointer to the layer */
+  Layer *getLayer();
+  void setLayer(Layer *layer);
+
+  /* Get and set the sendflag */
+  MyReal getSendflag();
+  void setSendflag(MyReal value);
+
+  /* Constructor */
+  myBraidVector(int nChannels, int nBatch);
+  /* Destructor */
+  ~myBraidVector();
+};
+
+/**
+ * Wrapper for the primal braid app. Most important routines are the Step function, which applies the layer transformations (and hence steps forward to the next layer), the SetInitialCondition, which applies the opening layer and the EvaluateObjective function, which (surprise!) evaluates the loss function and adds the regularization terms to get the objective function value.
+ * 
+ * The adjoint braid app inherits from this class, and overwrites those with the corresponding derivatives. 
  */
-class myBraidVector 
-{
-    protected:
-        int    nbatch;       /* Number of examples */
-        int    nchannels;    /* Number of channels */
+class myBraidApp : public BraidApp {
+ protected:
+  // BraidApp defines tstart, tstop, ntime and comm_t
+  int myid;         /* Processor rank*/
+  Network *network; /* Pointer to the DNN Network Block (local layer storage) */
+  DataSet *data;    /* Pointer to the Data set */
+
+  BraidCore *core; /* Braid core for running PinT simulation */
+
+  /* Output */
+  MyReal objective; /* Objective function */
+
+ public:
+  /* Constructor */
+  myBraidApp(DataSet *Data, Network *Network, Config *Config, MPI_Comm Comm);
+
+  /* Destructor */
+  ~myBraidApp();
+
+  /* Return objective function */
+  MyReal getObjective();
+
+  /* Return the core */
+  BraidCore *getCore();
+
+  /* Get xbraid's grid distribution */
+  void GetGridDistribution(int *ilower_ptr, int *iupper_ptr);
+
+  /* Return the time step index of current time t */
+  braid_Int GetTimeStepIndex(MyReal t);
+
+  /* Apply one time step */
+  virtual braid_Int Step(braid_Vector u_, braid_Vector ustop_,
+                         braid_Vector fstop_, BraidStepStatus &pstatus);
 
-        MyReal **state;   /* Network state at one layer, dimensions: nbatch * nchannels */
-        Layer* layer;     /* Pointer to layer information */
+  /* Compute residual: Does nothing. */
+  braid_Int Residual(braid_Vector u_, braid_Vector r_,
+                     BraidStepStatus &pstatus);
 
-        /* Flag that determines if the layer and state have just been received and thus should be free'd after usage (flag > 0) */
-        MyReal sendflag;  
+  /* Allocate a new vector in *v_ptr, which is a deep copy of u_. */
+  braid_Int Clone(braid_Vector u_, braid_Vector *v_ptr);
 
-    public:
-        /* Get dimensions */
-        int getnBatch();
-        int getnChannels();
+  /* Allocate a new vector in *u_ptr and initialize it with an
+ initial guess appropriate for time t. */
+  virtual braid_Int Init(braid_Real t, braid_Vector *u_ptr);
 
-        /* Get Pointer to the state at example exampleID */
-        MyReal* getState(int exampleID);
+  /* De-allocate the vector @a u_. */
+  braid_Int Free(braid_Vector u_);
 
-        /* Get pointer to the full state matrix */
-        MyReal** getState();
+  /* Perform the operation: y_ = alpha * x_ + beta * @a y_. */
+  braid_Int Sum(braid_Real alpha, braid_Vector x_, braid_Real beta,
+                braid_Vector y_);
 
-        /* Get and set pointer to the layer */
-        Layer* getLayer();
-        void   setLayer(Layer* layer);
+  /* Compute in @a *norm_ptr an appropriate spatial norm of @a u_. */
+  braid_Int SpatialNorm(braid_Vector u_, braid_Real *norm_ptr);
 
-        /* Get and set the sendflag */
-        MyReal getSendflag();
-        void   setSendflag(MyReal value);
+  /* @see braid_PtFcnAccess. */
+  braid_Int Access(braid_Vector u_, BraidAccessStatus &astatus);
 
-        /* Constructor */
-        myBraidVector(int nChannels, 
-                      int nBatch);
-        /* Destructor */
-        ~myBraidVector();
-}; 
+  /* @see braid_PtFcnBufSize. */
+  virtual braid_Int BufSize(braid_Int *size_ptr, BraidBufferStatus &bstatus);
 
+  /* @see braid_PtFcnBufPack. */
+  virtual braid_Int BufPack(braid_Vector u_, void *buffer,
+                            BraidBufferStatus &bstatus);
+
+  /* @see braid_PtFcnBufUnpack. */
+  virtual braid_Int BufUnpack(void *buffer, braid_Vector *u_ptr,
+                              BraidBufferStatus &bstatus);
 
+  /* Set the initial condition */
+  virtual braid_Int SetInitialCondition();
 
+  /* evaluate objective function */
+  virtual braid_Int EvaluateObjective();
+
+  /* Run Braid drive, return norm */
+  MyReal run();
+};
 
 /**
- * Wrapper for the primal braid app. 
- * virtual function are overwritten from the adjoint app class 
+ * Adjoint braid App for solving adjoint eqations with xbraid.
  */
-class myBraidApp : public BraidApp
-{
-    protected:
-        // BraidApp defines tstart, tstop, ntime and comm_t
-        int        myid;       /* Processor rank*/
-        Network*   network;    /* Pointer to the DNN Network Block (local layer storage) */
-        DataSet*   data;       /* Pointer to the Data set */
-        
-        BraidCore* core;       /* Braid core for running PinT simulation */
-
-        /* Output */
-        MyReal objective;       /* Objective function */
-
-    public:
-
-        /* Constructor */
-        myBraidApp(DataSet* Data,
-                   Network* Network,
-                   Config*  Config,
-                   MPI_Comm Comm);
-
-      
-        /* Destructor */
-        ~myBraidApp();
-
-        /* Return objective function */
-        MyReal getObjective();
-
-        /* Return the core */
-        BraidCore* getCore();
-
-        /* Get xbraid's grid distribution */
-        void GetGridDistribution(int *ilower_ptr, 
-                                 int *iupper_ptr);
- 
-        /* Return the time step index of current time t */
-        braid_Int GetTimeStepIndex(MyReal t);
-
-        /* Apply one time step */
-        virtual braid_Int Step(braid_Vector     u_,
-                               braid_Vector     ustop_,
-                               braid_Vector     fstop_,
-                               BraidStepStatus &pstatus);
-
-        /* Compute residual: Does nothing. */ 
-        braid_Int Residual(braid_Vector     u_,
-                              braid_Vector  r_,
-                              BraidStepStatus &pstatus);
-
-        /* Allocate a new vector in *v_ptr, which is a deep copy of u_. */
-        braid_Int Clone(braid_Vector  u_,
-                        braid_Vector *v_ptr);
-
-        /* Allocate a new vector in *u_ptr and initialize it with an
-       initial guess appropriate for time t. */
-        virtual braid_Int Init(braid_Real    t,
-                               braid_Vector *u_ptr);
-
-        /* De-allocate the vector @a u_. */
-        braid_Int Free(braid_Vector u_);
-
-        /* Perform the operation: y_ = alpha * x_ + beta * @a y_. */
-         braid_Int Sum(braid_Real   alpha,
-                       braid_Vector x_,
-                       braid_Real   beta,
-                       braid_Vector y_);
-
-        /* Compute in @a *norm_ptr an appropriate spatial norm of @a u_. */
-        braid_Int SpatialNorm(braid_Vector  u_,
-                               braid_Real   *norm_ptr);
-
-        /* @see braid_PtFcnAccess. */
-        braid_Int Access(braid_Vector       u_,
-                         BraidAccessStatus &astatus);
-
-        /* @see braid_PtFcnBufSize. */
-        virtual braid_Int BufSize(braid_Int         *size_ptr,
-                                  BraidBufferStatus &bstatus);
-
-        /* @see braid_PtFcnBufPack. */
-        virtual braid_Int BufPack(braid_Vector       u_,
-                                  void              *buffer,
-                                  BraidBufferStatus &bstatus);
-
-        /* @see braid_PtFcnBufUnpack. */
-        virtual braid_Int BufUnpack(void              *buffer,
-                                    braid_Vector      *u_ptr,
-                                    BraidBufferStatus &bstatus);
-
-        /* Set the initial condition */
-        virtual braid_Int SetInitialCondition();
-
-        /* evaluate objective function */
-        virtual braid_Int EvaluateObjective();
-
-        /* Run Braid drive, return norm */
-        MyReal run();
-};
+class myAdjointBraidApp : public myBraidApp {
+ protected:
+  BraidCore
+      *primalcore; /* pointer to primal core for accessing primal states */
 
+ public:
+  myAdjointBraidApp(DataSet *Data, Network *Network, Config *config,
+                    BraidCore *Primalcoreptr, MPI_Comm comm);
 
+  ~myAdjointBraidApp();
 
+  /* Get the storage index of primal (reversed) */
+  int GetPrimalIndex(int ts);
 
+  /* Apply one time step */
+  braid_Int Step(braid_Vector u_, braid_Vector ustop_, braid_Vector fstop_,
+                 BraidStepStatus &pstatus);
 
-/** 
- * Adjoint braid App for solving adjoint eqations with xbraid. 
- */
-class myAdjointBraidApp : public myBraidApp
-{
-    protected:
-        BraidCore* primalcore; /* pointer to primal core for accessing primal states */
-
-    public:
-        myAdjointBraidApp(DataSet*   Data,
-                          Network*   Network,
-                          Config*    config,
-                          BraidCore* Primalcoreptr,
-                          MPI_Comm   comm);
-
-       ~myAdjointBraidApp();
-
-        /* Get the storage index of primal (reversed) */
-        int GetPrimalIndex(int ts);
-
-        /* Apply one time step */
-        braid_Int Step(braid_Vector     u_,
-                       braid_Vector     ustop_,
-                       braid_Vector     fstop_,
-                       BraidStepStatus &pstatus);
-
-        /* Allocate a new vector in *u_ptr and initialize it with an
-       initial guess appropriate for time t. */
-        braid_Int Init(braid_Real    t,
-                       braid_Vector *u_ptr);
-
-        /* @see braid_PtFcnBufSize. */
-        braid_Int BufSize(braid_Int         *size_ptr,
-                          BraidBufferStatus &bstatus);
-
-        /* @see braid_PtFcnBufPack. */
-        braid_Int BufPack(braid_Vector       u_,
-                          void              *buffer,
-                          BraidBufferStatus &bstatus);
-
-        /* @see braid_PtFcnBufUnpack. */
-        braid_Int BufUnpack(void              *buffer,
-                            braid_Vector      *u_ptr,
-                            BraidBufferStatus &bstatus);
+  /* Allocate a new vector in *u_ptr and initialize it with an
+ initial guess appropriate for time t. */
+  braid_Int Init(braid_Real t, braid_Vector *u_ptr);
+
+  /* @see braid_PtFcnBufSize. */
+  braid_Int BufSize(braid_Int *size_ptr, BraidBufferStatus &bstatus);
+
+  /* @see braid_PtFcnBufPack. */
+  braid_Int BufPack(braid_Vector u_, void *buffer, BraidBufferStatus &bstatus);
+
+  /* @see braid_PtFcnBufUnpack. */
+  braid_Int BufUnpack(void *buffer, braid_Vector *u_ptr,
+                      BraidBufferStatus &bstatus);
 
-        /* Set the adjoint initial condition (derivative of primal objective function) */
-        braid_Int SetInitialCondition();
+  /* Set the adjoint initial condition (derivative of primal objective function)
+   */
+  braid_Int SetInitialCondition();
 
-        /* evaluate objective function (being just the derivative of the opening layer) */
-        braid_Int EvaluateObjective();
-};  
\ No newline at end of file
+  /* evaluate objective function (being just the derivative of the opening
+   * layer) */
+  braid_Int EvaluateObjective();
+};
\ No newline at end of file
diff --git a/include/config.hpp b/include/config.hpp
index 01447d3..9eec554 100644
--- a/include/config.hpp
+++ b/include/config.hpp
@@ -7,105 +7,103 @@
 #define CONFIG_ARG_MAX_BYTES 128
 
 /* Available activation functions */
-enum activation{TANH, RELU, SMRELU};  
+enum activation { TANH, RELU, SMRELU };
 
 /* Available network types */
-enum networkType{DENSE, CONVOLUTIONAL}; 
+enum networkType { DENSE, CONVOLUTIONAL };
 
 /* Available batch types */
-enum batchtype{DETERMINISTIC, STOCHASTIC};
+enum batchtype { DETERMINISTIC, STOCHASTIC };
 
 /* Available hessian approximation types */
-enum hessiantype{BFGS_SERIAL, LBFGS, IDENTITY};
+enum hessiantype { BFGS_SERIAL, LBFGS, IDENTITY };
 
 /* Available stepsize selection methods */
-enum stepsizetype{FIXED, BACKTRACKINGLS, ONEOVERK};
+enum stepsizetype { FIXED, BACKTRACKINGLS, ONEOVERK };
 
 class Config {
-
-   private:
-
-      /* Linked list for reading config options */
-      struct config_option {
-         struct config_option* prev;
-         //  config_option_t prev;
-         char key[CONFIG_ARG_MAX_BYTES];
-         char value[CONFIG_ARG_MAX_BYTES];
-      };
-
-      /* Helper function: Parse the config file */
-      config_option* parsefile(char* path);
-
-   public:                             /* List all configuration options here */
-      /* Data set */
-      const char* datafolder;        
-      const char* ftrain_ex;
-      const char* ftrain_labels;
-      const char* fval_ex;
-      const char* fval_labels;
-      const char* weightsopenfile;
-      const char* weightsclassificationfile;
-
-      int ntraining;
-      int nvalidation;
-      int nfeatures;
-      int nclasses;
-
-      /* Neural Network */
-      int    nchannels;
-      int    nlayers;
-      MyReal T;
-      int    activation;
-      int    network_type;
-      int    openlayer_type;
-      MyReal weights_open_init;
-      MyReal weights_init;
-      MyReal weights_class_init;
-
-      /* XBraid */
-      int    braid_cfactor0; 
-      int    braid_cfactor;
-      int    braid_maxlevels;
-      int    braid_mincoarse;
-      int    braid_maxiter;
-      MyReal braid_abstol;
-      MyReal braid_abstoladj;
-      int    braid_printlevel;
-      int    braid_accesslevel;
-      int    braid_setskip;
-      int    braid_fmg;
-      int    braid_nrelax;
-      int    braid_nrelax0;
-
-      /* Optimization */
-      int    batch_type;
-      int    nbatch;
-      MyReal gamma_tik;
-      MyReal gamma_ddt;
-      MyReal gamma_class;
-      int    stepsize_type;
-      MyReal stepsize_init;
-      int    maxoptimiter;
-      MyReal gtol;
-      int    ls_maxiter;
-      MyReal ls_factor;
-      int    hessianapprox_type;
-      int    lbfgs_stages;
-      int    validationlevel;
-
-
-      /* Constructor sets default values */
-      Config();
-
-      /* Destructor */
-      ~Config();
-
-      /* Reads the config options from file */
-      int readFromFile(char* configfilename);
-
-      /* Writes config options to the file (File must be open!) */
-      int writeToFile(FILE* outfile);
-
-      /* Returns a stepsize, depending on the selected stepsize type and current optimization iteration */
-      MyReal getStepsize(int optimiter);
+ private:
+  /* Linked list for reading config options */
+  struct config_option {
+    struct config_option *prev;
+    //  config_option_t prev;
+    char key[CONFIG_ARG_MAX_BYTES];
+    char value[CONFIG_ARG_MAX_BYTES];
+  };
+
+  /* Helper function: Parse the config file */
+  config_option *parsefile(char *path);
+
+ public: /* List all configuration options here */
+  /* Data set */
+  const char *datafolder;
+  const char *ftrain_ex;
+  const char *ftrain_labels;
+  const char *fval_ex;
+  const char *fval_labels;
+  const char *weightsopenfile;
+  const char *weightsclassificationfile;
+
+  int ntraining;
+  int nvalidation;
+  int nfeatures;
+  int nclasses;
+
+  /* Neural Network */
+  int nchannels;
+  int nlayers;
+  MyReal T;
+  int activation;
+  int network_type;
+  int openlayer_type;
+  MyReal weights_open_init;
+  MyReal weights_init;
+  MyReal weights_class_init;
+
+  /* XBraid */
+  int braid_cfactor0;
+  int braid_cfactor;
+  int braid_maxlevels;
+  int braid_mincoarse;
+  int braid_maxiter;
+  MyReal braid_abstol;
+  MyReal braid_abstoladj;
+  int braid_printlevel;
+  int braid_accesslevel;
+  int braid_setskip;
+  int braid_fmg;
+  int braid_nrelax;
+  int braid_nrelax0;
+
+  /* Optimization */
+  int batch_type;
+  int nbatch;
+  MyReal gamma_tik;
+  MyReal gamma_ddt;
+  MyReal gamma_class;
+  int stepsize_type;
+  MyReal stepsize_init;
+  int maxoptimiter;
+  MyReal gtol;
+  int ls_maxiter;
+  MyReal ls_factor;
+  int hessianapprox_type;
+  int lbfgs_stages;
+  int validationlevel;
+
+  /* Constructor sets default values */
+  Config();
+
+  /* Destructor */
+  ~Config();
+
+  /* Reads the config options from file */
+  int readFromFile(char *configfilename);
+
+  /* Writes config options to the file (File must be open!) */
+  int writeToFile(FILE *outfile);
+
+  /* Returns a stepsize, depending on the selected stepsize type and current
+   * optimization iteration */
+  MyReal getStepsize(int optimiter);
 };
diff --git a/include/dataset.hpp b/include/dataset.hpp
index 686eca6..e2008c2 100644
--- a/include/dataset.hpp
+++ b/include/dataset.hpp
@@ -1,65 +1,58 @@
 #include <assert.h>
-#include "util.hpp"
-#include "defs.hpp"
-#include "config.hpp"
 #include <mpi.h>
+#include "config.hpp"
+#include "defs.hpp"
+#include "util.hpp"
 #pragma once
 
 class DataSet {
+ protected:
+  int nelements; /* Number of data elements */
+  int nfeatures; /* Number of features per element */
+  int nlabels;   /* Number of different labels (i.e. classes) per element */
 
+  MyReal **examples; /* Array of Feature vectors (dim: nelements x nfeatures) */
+  MyReal **labels;   /* Array of Label vectors (dim: nelements x nlabels) */
 
-   protected:
-
-      int nelements;         /* Number of data elements */
-      int nfeatures;         /* Number of features per element */
-      int nlabels;           /* Number of different labels (i.e. classes) per element */
-      
-      MyReal **examples;    /* Array of Feature vectors (dim: nelements x nfeatures) */
-      MyReal **labels;      /* Array of Label vectors (dim: nelements x nlabels) */
-
-      int  nbatch;          /* Size of the batch */
-      int *batchIDs;        /* Array of batch indicees */
-
-      int MPIsize;           /* Size of the global communicator */
-      int MPIrank;           /* Processors rank */
-
-      int* availIDs;          /* Auxilliary: holding available batchIDs when generating a batch */
-      int  navail;            /* Auxilliary: holding number of currently available batchIDs */
-
-   public: 
+  int nbatch;    /* Size of the batch */
+  int *batchIDs; /* Array of batch indicees */
 
-      /* Default constructor */
-      DataSet();
+  int MPIsize; /* Size of the global communicator */
+  int MPIrank; /* Processors rank */
 
-      /* Destructor */
-      ~DataSet();
+  int *availIDs; /* Auxilliary: holding available batchIDs when generating a
+                    batch */
+  int navail; /* Auxilliary: holding number of currently available batchIDs */
 
-      void initialize(int      nElements, 
-                      int      nFeatures, 
-                      int      nLabels,
-                      int      nBatch,
-                      MPI_Comm Comm);
+ public:
+  /* Default constructor */
+  DataSet();
 
+  /* Destructor */
+  ~DataSet();
 
-      /* Return the batch size*/
-      int getnBatch();
+  void initialize(int nElements, int nFeatures, int nLabels, int nBatch,
+                  MPI_Comm Comm);
 
-      /* Return the feature vector of a certain batchID. If not stored on this processor, return NULL */
-      MyReal* getExample(int id);
+  /* Return the batch size*/
+  int getnBatch();
 
-      /* Return the label vector of a certain batchID. If not stored on this processor, return NULL */
-      MyReal* getLabel(int id);
+  /* Return the feature vector of a certain batchID. If not stored on this
+   * processor, return NULL */
+  MyReal *getExample(int id);
 
-      /* Read data from file */
-      void readData(const char* datafolder,
-                    const char* examplefile,
-                    const char* labelfile);
+  /* Return the label vector of a certain batchID. If not stored on this
+   * processor, return NULL */
+  MyReal *getLabel(int id);
 
-      /* Select the current batch from all available IDs, either deterministic or stochastic */
-      void selectBatch(int      batch_type,
-                       MPI_Comm comm);
+  /* Read data from file */
+  void readData(const char *datafolder, const char *examplefile,
+                const char *labelfile);
 
+  /* Select the current batch from all available IDs, either deterministic or
+   * stochastic */
+  void selectBatch(int batch_type, MPI_Comm comm);
 
-      /* print current batch to screen */
-      void printBatch();
+  /* print current batch to screen */
+  void printBatch();
 };
\ No newline at end of file
diff --git a/include/defs.hpp b/include/defs.hpp
index d5e9b31..bdd6900 100644
--- a/include/defs.hpp
+++ b/include/defs.hpp
@@ -1,8 +1,9 @@
 #include <mpi.h>
 #pragma once
 
-/* 
- * Switch between single (float) and double precision by un-/commenting the corresponding lines. 
+/*
+ * Switch between single (float) and double precision by un-/commenting the
+ * corresponding lines.
  */
 
 // typedef float MyReal;
diff --git a/include/hessianApprox.hpp b/include/hessianApprox.hpp
index bd1dce6..356bb61 100644
--- a/include/hessianApprox.hpp
+++ b/include/hessianApprox.hpp
@@ -1,114 +1,85 @@
 #include <stdio.h>
-#include "linalg.hpp"
 #include "defs.hpp"
+#include "linalg.hpp"
 
 #pragma once
 
 class HessianApprox {
-
-   protected:
-      int      dimN;          /* Dimension of the gradient vector */
-      MPI_Comm MPIcomm;       /* MPI communicator for parallel L-BFGS updates */
-
-   public:
-
-      HessianApprox(MPI_Comm comm);
-      virtual ~HessianApprox();
-
-      /**
-       * Compute the BFGS descent direction 
-       */
-      virtual void computeAscentDir(int     k, 
-                                    MyReal* gradient,
-                                    MyReal* ascentdir) = 0;   
-
-      /**
-       * Update the BFGS memory (like s, y, rho, H0...)
-       */
-      virtual void updateMemory(int     k,
-                                MyReal* design,
-                                MyReal* gradient) = 0;
-
+ protected:
+  int dimN;         /* Dimension of the gradient vector */
+  MPI_Comm MPIcomm; /* MPI communicator for parallel L-BFGS updates */
+
+ public:
+  HessianApprox(MPI_Comm comm);
+  virtual ~HessianApprox();
+
+  /**
+   * Compute the BFGS descent direction
+   */
+  virtual void computeAscentDir(int k, MyReal *gradient, MyReal *ascentdir) = 0;
+
+  /**
+   * Update the BFGS memory (like s, y, rho, H0...)
+   */
+  virtual void updateMemory(int k, MyReal *design, MyReal *gradient) = 0;
 };
 
-
 class L_BFGS : public HessianApprox {
+ protected:
+  int M; /* Length of the l-bfgs memory (stages) */
 
-   protected:
-      int M;                  /* Length of the l-bfgs memory (stages) */
-
-      /* L-BFGS memory */
-      MyReal** s;             /* storing M (x_{k+1} - x_k) vectors */
-      MyReal** y;             /* storing M (\nabla f_{k+1} - \nabla f_k) vectors */
-      MyReal*  rho;           /* storing M 1/y^Ts values */
-      MyReal   H0;            /* Initial Hessian scaling factor */
-      MyReal*  design_old;    /* Design at previous iteration */
-      MyReal*  gradient_old;  /* Gradient at previous iteration */
+  /* L-BFGS memory */
+  MyReal **s;           /* storing M (x_{k+1} - x_k) vectors */
+  MyReal **y;           /* storing M (\nabla f_{k+1} - \nabla f_k) vectors */
+  MyReal *rho;          /* storing M 1/y^Ts values */
+  MyReal H0;            /* Initial Hessian scaling factor */
+  MyReal *design_old;   /* Design at previous iteration */
+  MyReal *gradient_old; /* Gradient at previous iteration */
 
-   public:
-      L_BFGS(MPI_Comm comm,
-             int dimN,        /* Local design dimension */
-             int stage);        
-      ~L_BFGS();                
+ public:
+  L_BFGS(MPI_Comm comm, int dimN, /* Local design dimension */
+         int stage);
+  ~L_BFGS();
 
-      void computeAscentDir(int     k, 
-                             MyReal* gradient, 
-                             MyReal* ascentdir);
+  void computeAscentDir(int k, MyReal *gradient, MyReal *ascentdir);
 
-      void updateMemory(int     k,
-                        MyReal* design,
-                        MyReal* gradient);
-
-      
+  void updateMemory(int k, MyReal *design, MyReal *gradient);
 };
 
-
 class BFGS : public HessianApprox {
+ private:
+  MyReal *A;
+  MyReal *B;
+  MyReal *Hy;
 
-   private:
-      MyReal* A;
-      MyReal* B;
-      MyReal* Hy;
-   
-   protected:
-      MyReal* s;          
-      MyReal* y; 
-      MyReal* Hessian;        /* Storing the Hessian approximation (flattened: dimN*dimN) */
-      MyReal* design_old;    /* Design at previous iteration */
-      MyReal* gradient_old;  /* Gradient at previous iteration */
-
-   public:
-      BFGS(MPI_Comm comm, int N);
-      ~BFGS();
-
-      void setIdentity();                    
-
-      void computeAscentDir(int     k, 
-                             MyReal* gradient, 
-                             MyReal* ascentdir);
-
-      void updateMemory(int     k,
-                        MyReal* design,
-                        MyReal* gradient);
-};
+ protected:
+  MyReal *s;
+  MyReal *y;
+  MyReal
+      *Hessian; /* Storing the Hessian approximation (flattened: dimN*dimN) */
+  MyReal *design_old;   /* Design at previous iteration */
+  MyReal *gradient_old; /* Gradient at previous iteration */
 
+ public:
+  BFGS(MPI_Comm comm, int N);
+  ~BFGS();
 
+  void setIdentity();
 
-/**
- * No second order: Use Identity for Hessian Approximation 
- */ 
-class Identity : public HessianApprox{
+  void computeAscentDir(int k, MyReal *gradient, MyReal *ascentdir);
 
-   public: 
-      Identity(MPI_Comm comm, int N);
-      ~Identity();
+  void updateMemory(int k, MyReal *design, MyReal *gradient);
+};
 
-      void computeAscentDir(int     k, 
-                             MyReal* currgrad, 
-                             MyReal* ascentdir);
+/**
+ * No second order: Use Identity for Hessian Approximation
+ */
+class Identity : public HessianApprox {
+ public:
+  Identity(MPI_Comm comm, int N);
+  ~Identity();
 
-      void updateMemory(int     k,
-                        MyReal* design,
-                        MyReal* gradient);
+  void computeAscentDir(int k, MyReal *currgrad, MyReal *ascentdir);
 
+  void updateMemory(int k, MyReal *design, MyReal *gradient);
 };
diff --git a/include/layer.hpp b/include/layer.hpp
index b33bcca..bc0ea6c 100644
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -1,194 +1,184 @@
-#include <stdlib.h>
+#include <math.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <algorithm>
-#include <math.h>
-#include "linalg.hpp"
-#include "defs.hpp"
 #include "config.hpp"
-
+#include "defs.hpp"
+#include "linalg.hpp"
 
 #pragma once
 
 /**
- * Abstract base class for the network layers 
+ * Abstract base class for the network layers
  * Subclasses implement
- *    - applyFWD: Forward propagation of data 
- *    - applyBWD: Backward propagation of data 
+ *    - applyFWD: Forward propagation of data
+ *    - applyBWD: Backward propagation of data
  */
-class Layer 
-{
-   protected:
-      int dim_In;                          /* Dimension of incoming data */
-      int dim_Out;                         /* Dimension of outgoing data */
-      int dim_Bias;                        /* Dimension of the bias vector */
-      int nweights;                        /* Number of weights */
-      int ndesign;                         /* Total number of design variables */
-
-      int nconv;
-      int csize;
-
-      int     index;                       /* Number of the layer */
-      MyReal  dt;                          /* Step size for Layer update */
-      MyReal* weights;                     /* Weight matrix, flattened as a vector */
-      MyReal* weights_bar;                 /* Derivative of the Weight matrix*/
-      MyReal* bias;                        /* Bias */
-      MyReal* bias_bar;                    /* Derivative of bias */
-      MyReal  gamma_tik;                   /* Parameter for Tikhonov regularization of weights and bias */
-      MyReal  gamma_ddt;                   /* Parameter for DDT regularization of weights and bias */
-      int     activ;                       /* Activaation function (enum element) */
-      int     type;                        /* Type of the layer (enum element) */
-
-      MyReal *update;                      /* Auxilliary for computing fwd update */
-      MyReal *update_bar;                  /* Auxilliary for computing bwd update */
-
-   public:
-
-      /* Available layer types */
-      enum layertype{OPENZERO=0, OPENDENSE=1, DENSE=2, CLASSIFICATION=3, OPENCONV=4, OPENCONVMNIST=5, CONVOLUTION=6};
-
-      Layer();
-      Layer(int     idx,
-            int     Type,
-            int     dimI,
-            int     dimO,
-            int     dimB,
-            int     dimW,   // number of weights
-            MyReal  deltaT,
-            int     Activ,
-            MyReal  gammatik,
-            MyReal  gammaddt);
-
-      virtual ~Layer();
-
-      /* Set time step size */
-      void setDt(MyReal DT);
-
-      /* Set design and gradient memory location */
-      void setMemory(MyReal* design_memloc, 
-                     MyReal* gradient_memloc);
-
-      /* Some Get..() functions */
-      MyReal getDt();
-      MyReal getGammaTik();
-      MyReal getGammaDDT();
-      int    getActivation();
-      int    getType();
-
-
-      /* Get pointer to the weights bias*/
-      MyReal* getWeights();
-      MyReal* getBias();
-
-      /* Get pointer to the weights bias bar */
-      MyReal* getWeightsBar();
-      MyReal* getBiasBar();
-
-      /* Get the dimensions */
-      int getDimIn();
-      int getDimOut();
-      int getDimBias();
-      int getnWeights();
-      int getnDesign();
-
-      int getnConv();
-      int getCSize();
-
-      /* Get the layer index (i.e. the time step) */
-      int getIndex();
-
-        /* Prints to screen */
-      void print_data(MyReal* data_Out);
-
-      /* Activation function and derivative */
-      MyReal activation(MyReal x);
-      MyReal dactivation(MyReal x);
-
-
-      /**
-       * Pack weights and bias into a buffer 
-       */
-      void packDesign(MyReal* buffer,
-                      int     size);
-
-      /**
-       * Unpack weights and bias from a buffer 
-       */
-      void unpackDesign(MyReal* buffer);
-
-
-      /* Scales the weights by a factor and resets the gradient to zero. */
-      void scaleDesign(MyReal  factor);
-
-      /**
-       * Sets the bar variables to zero 
-       */
-      void resetBar();
-
-      /**
-       * Evaluate Tikhonov Regularization
-       * Returns 1/2 * \|weights||^2 + 1/2 * \|bias\|^2
-       */
-      MyReal evalTikh();
-
-      /**
-       * Derivative of Tikhonov Regularization
-       */
-      void evalTikh_diff(MyReal regul_bar);
-
-     
-      /**
-       * Regularization for the time-derivative of the layer weights
-       */
-      MyReal evalRegulDDT(Layer* layer_prev,
-                          MyReal deltat);
-
-      /**
-       * Derivative of ddt-regularization term 
-       */
-      void evalRegulDDT_diff(Layer* layer_prev,
-                             Layer* layer_next,
-                             MyReal deltat);
-
-
-      /**
-       * In opening layers: set pointer to the current example
-       */
-      virtual void setExample(MyReal* example_ptr);
-
-      /**
-       * In classification layers: set pointer to the current label 
-       */
-      virtual void setLabel(MyReal* label_ptr);
-
-      /**
-       * Forward propagation of an example 
-       * In/Out: vector holding the current propagated example 
-       */
-      virtual void applyFWD(MyReal* state) = 0;
-
-
-      /**
-       * Backward propagation of an example 
-       * In:     data     - current example data
-       * In/Out: data_bar - adjoint example data that is to be propagated backwards 
-       * In:     compute_gradient - flag to determin if gradient should be computed (i.e. if weights_bar,bias_bar should be updated or not. In general, update is only done on the finest layer-grid.)
-       */
-      virtual void applyBWD(MyReal* state,
-                            MyReal* state_bar,
-                            int     compute_gradient) = 0;
-
-      /* ReLu Activation and derivative */
-      MyReal ReLu_act(MyReal x);
-      MyReal dReLu_act(MyReal x);
-        
-      /* Smooth ReLu activation: Uses a quadratic approximation around zero (range: default 0.1) */
-      MyReal SmoothReLu_act(MyReal x);
-      MyReal dSmoothReLu_act(MyReal x);
-
-      /* tanh Activation and derivative */
-      MyReal tanh_act(MyReal x);
-      MyReal dtanh_act(MyReal x);
-
+class Layer {
+ protected:
+  int dim_In;   /* Dimension of incoming data */
+  int dim_Out;  /* Dimension of outgoing data */
+  int dim_Bias; /* Dimension of the bias vector */
+  int nweights; /* Number of weights */
+  int ndesign;  /* Total number of design variables */
+
+  int nconv;
+  int csize;
+
+  int index;           /* Number of the layer */
+  MyReal dt;           /* Step size for Layer update */
+  MyReal *weights;     /* Weight matrix, flattened as a vector */
+  MyReal *weights_bar; /* Derivative of the Weight matrix*/
+  MyReal *bias;        /* Bias */
+  MyReal *bias_bar;    /* Derivative of bias */
+  MyReal
+      gamma_tik; /* Parameter for Tikhonov regularization of weights and bias */
+  MyReal gamma_ddt; /* Parameter for DDT regularization of weights and bias */
+  int activ;        /* Activaation function (enum element) */
+  int type;         /* Type of the layer (enum element) */
+
+  MyReal *update;     /* Auxilliary for computing fwd update */
+  MyReal *update_bar; /* Auxilliary for computing bwd update */
+
+ public:
+  /* Available layer types */
+  enum layertype {
+    OPENZERO = 0,
+    OPENDENSE = 1,
+    DENSE = 2,
+    CLASSIFICATION = 3,
+    OPENCONV = 4,
+    OPENCONVMNIST = 5,
+    CONVOLUTION = 6
+  };
+
+  Layer();
+  Layer(int idx, int Type, int dimI, int dimO, int dimB,
+        int dimW,  // number of weights
+        MyReal deltaT, int Activ, MyReal gammatik, MyReal gammaddt);
+
+  virtual ~Layer();
+
+  /* Set time step size */
+  void setDt(MyReal DT);
+
+  /* Set design and gradient memory location.
+   * The design vector is allocated within the Network block. For each layer in the block, the local memory location within the network's design vector is passed here to the layer, and stored as *weights and *bias (and their derivatives weights_bar and bias_bar). */
+  void setMemory(MyReal *design_memloc, MyReal *gradient_memloc);
+
+  /* Some Get..() functions */
+  MyReal getDt();
+  MyReal getGammaTik();
+  MyReal getGammaDDT();
+  int getActivation();
+  int getType();
+
+  /* Get pointer to the weights bias*/
+  MyReal *getWeights();
+  MyReal *getBias();
+
+  /* Get pointer to the weights bias bar */
+  MyReal *getWeightsBar();
+  MyReal *getBiasBar();
+
+  /* Get the dimensions */
+  int getDimIn();
+  int getDimOut();
+  int getDimBias();
+  int getnWeights();
+  int getnDesign();
+
+  int getnConv();
+  int getCSize();
+
+  /* Get the layers ID (i.e. the time step number) */
+  int getIndex();
+
+  /* Prints to screen */
+  void print_data(MyReal *data_Out);
+
+  /* Applies the activation function and derivative */
+  MyReal activation(MyReal x);
+  MyReal dactivation(MyReal x);
+
+  /**
+   * Pack weights and bias into a buffer
+   */
+  void packDesign(MyReal *buffer, int size);
+
+  /**
+   * Unpack weights and bias from a buffer
+   */
+  void unpackDesign(MyReal *buffer);
+
+  /* Scales the weights by a factor and resets the gradient to zero. */
+  void scaleDesign(MyReal factor);
+
+  /**
+   * Sets the bar variables to zero
+   */
+  void resetBar();
+
+  /**
+   * Evaluate Tikhonov Regularization
+   * Returns 1/2 * \|weights||^2 + 1/2 * \|bias\|^2
+   */
+  MyReal evalTikh();
+
+  /**
+   * Derivative of Tikhonov Regularization
+   */
+  void evalTikh_diff(MyReal regul_bar);
+
+  /**
+   * Regularization for the time-derivative of the layer weights
+   */
+  MyReal evalRegulDDT(Layer *layer_prev, MyReal deltat);
+
+  /**
+   * Derivative of ddt-regularization term
+   */
+  void evalRegulDDT_diff(Layer *layer_prev, Layer *layer_next, MyReal deltat);
+
+  /**
+   * In opening layers: set pointer to the current example
+   */
+  virtual void setExample(MyReal *example_ptr);
+
+  /**
+   * In classification layers: set pointer to the current label
+   */
+  virtual void setLabel(MyReal *label_ptr);
+
+  /**
+   * Forward propagation of an example
+   * In/Out: vector holding the current propagated example
+   */
+  virtual void applyFWD(MyReal *state) = 0;
+
+  /**
+   * Backward propagation of an example
+   * In:     data     - current example data
+   * In/Out: data_bar - adjoint example data that is to be propagated backwards
+   * In:     compute_gradient - flag to determin if gradient should be computed
+   * (i.e. if weights_bar,bias_bar should be updated or not. In general, update
+   * is only done on the finest layer-grid.)
+   */
+  virtual void applyBWD(MyReal *state, MyReal *state_bar,
+                        int compute_gradient) = 0;
+
+  /* ReLu Activation and derivative */
+  MyReal ReLu_act(MyReal x);
+  MyReal dReLu_act(MyReal x);
+
+  /* Smooth ReLu activation: Uses a quadratic approximation around zero (range:
+   * default 0.1) */
+  MyReal SmoothReLu_act(MyReal x);
+  MyReal dSmoothReLu_act(MyReal x);
+
+  /* tanh Activation and derivative */
+  MyReal tanh_act(MyReal x);
+  MyReal dtanh_act(MyReal x);
 };
 
 /**
@@ -197,226 +187,182 @@ class Layer
  * if not openlayer: requires dimI = dimO !
  */
 class DenseLayer : public Layer {
+ public:
+  DenseLayer(int idx, int dimI, int dimO, MyReal deltaT, int activation,
+             MyReal gammatik, MyReal gammaddt);
+  ~DenseLayer();
 
-  public:
-      DenseLayer(int     idx,
-                 int     dimI,
-                 int     dimO,
-                 MyReal  deltaT,
-                 int     activation,
-                 MyReal  gammatik, 
-                 MyReal  gammaddt);     
-      ~DenseLayer();
-
-      void applyFWD(MyReal* state);
-
-      void applyBWD(MyReal* state,
-                    MyReal* state_bar,
-                    int     compute_gradient);
-};
+  void applyFWD(MyReal *state);
 
+  void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient);
+};
 
 /**
  * Opening Layer using dense weight matrix K \in R^{nxn}
  * Layer transformation: y = sigma(W*y_ex + b)  for examples y_ex \in \R^dimI
  */
 class OpenDenseLayer : public DenseLayer {
+ protected:
+  MyReal *example; /* Pointer to the current example data */
 
-  protected: 
-      MyReal* example;    /* Pointer to the current example data */
-
-  public:
-      OpenDenseLayer(int     dimI,
-                     int     dimO,
-                     int     activation,
-                     MyReal  gammatik);     
-      ~OpenDenseLayer();
+ public:
+  OpenDenseLayer(int dimI, int dimO, int activation, MyReal gammatik);
+  ~OpenDenseLayer();
 
-      void setExample(MyReal* example_ptr);
+  void setExample(MyReal *example_ptr);
 
-      void applyFWD(MyReal* state);
+  void applyFWD(MyReal *state);
 
-      void applyBWD(MyReal* state,
-                    MyReal* state_bar,
-                    int     compute_gradient);
+  void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient);
 };
 
-
-
 /*
  * Opening layer that expands the data by zeros
  */
-class OpenExpandZero : public Layer 
-{
-      protected: 
-            MyReal* example;    /* Pointer to the current example data */
-      public:
-            OpenExpandZero(int dimI,
-                           int dimO);
-            ~OpenExpandZero();
-
-            void setExample(MyReal* example_ptr);
-           
-            void applyFWD(MyReal* state);
-      
-            void applyBWD(MyReal* state,
-                          MyReal* state_bar,
-                          int     compute_gradient);
-};
+class OpenExpandZero : public Layer {
+ protected:
+  MyReal *example; /* Pointer to the current example data */
+ public:
+  OpenExpandZero(int dimI, int dimO);
+  ~OpenExpandZero();
 
+  void setExample(MyReal *example_ptr);
+
+  void applyFWD(MyReal *state);
+
+  void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient);
+};
 
 /**
  * Classification layer
  */
-class ClassificationLayer : public Layer
-{
-      protected: 
-            MyReal* label;                /* Pointer to the current label vector */
-
-            MyReal* probability;          /* vector of pedicted class probabilities */
-            
-      public:
-            ClassificationLayer(int    idx,
-                                int    dimI,
-                                int    dimO,
-                                MyReal gammatik);
-            ~ClassificationLayer();
-
-            void setLabel(MyReal* label_ptr);
-
-            void applyFWD(MyReal* state);
-      
-            void applyBWD(MyReal* state,
-                          MyReal* state_bar,
-                          int     compute_gradient);
-
-            /**
-             * Evaluate the cross entropy function 
-             */
-            MyReal crossEntropy(MyReal *finalstate);
-
-            /** 
-             * Algorithmic derivative of evaluating cross entropy loss
-             */
-            void crossEntropy_diff(MyReal *data_Out, 
-                                   MyReal *data_Out_bar,
-                                   MyReal  loss_bar);
-
-            /**
-             * Compute the class probabilities
-             * return 1 if predicted class was correct, 0 else.
-             * out: *class_id_ptr holding the predicted class 
-             */
-            int prediction(MyReal* data_out, 
-                           int*    class_id_ptr);
-
-            /**
-             * Translate the data: 
-             * Substracts the maximum value from all entries
-             */
-            void normalize(MyReal* data);
-
-            /**
-             * Algorithmic derivative of the normalize funciton 
-             */ 
-            void normalize_diff(MyReal* data, 
-                                MyReal* data_bar);
-
+class ClassificationLayer : public Layer {
+ protected:
+  MyReal *label; /* Pointer to the current label vector */
+
+  MyReal *probability; /* vector of pedicted class probabilities */
+
+ public:
+  ClassificationLayer(int idx, int dimI, int dimO, MyReal gammatik);
+  ~ClassificationLayer();
+
+  void setLabel(MyReal *label_ptr);
+
+  void applyFWD(MyReal *state);
+
+  void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient);
+
+  /**
+   * Evaluate the cross entropy function
+   */
+  MyReal crossEntropy(MyReal *finalstate);
+
+  /**
+   * Algorithmic derivative of evaluating cross entropy loss
+   */
+  void crossEntropy_diff(MyReal *data_Out, MyReal *data_Out_bar,
+                         MyReal loss_bar);
+
+  /**
+   * Compute the class probabilities
+   * return 1 if predicted class was correct, 0 else.
+   * out: *class_id_ptr holding the predicted class
+   */
+  int prediction(MyReal *data_out, int *class_id_ptr);
+
+  /**
+   * Translate the data:
+   * Substracts the maximum value from all entries
+   */
+  void normalize(MyReal *data);
+
+  /**
+   * Algorithmic derivative of the normalize funciton
+   */
+  void normalize_diff(MyReal *data, MyReal *data_bar);
 };
 
-
 /**
- * Layer using a convolution C of size csize X csize, 
- * with nconv total convolutions. 
+ * Layer using a convolution C of size csize X csize,
+ * with nconv total convolutions.
  * Layer transformation: y = y + dt * sigma(W(C) y + b)
  * if not openlayer: requires dimI = dimO !
  */
 class ConvLayer : public Layer {
-
-     int csize2;
-     int fcsize;
-
-     int img_size;
-     int img_size_sqrt;
-
-  public:
-      ConvLayer(int     idx,
-                int     dimI,
-                int     dimO,
-                int     csize_in,
-                int     nconv_in,
-                MyReal  deltaT,
-                int     Activ,
-                MyReal  Gammatik,
-                MyReal  Gammaddt);
-      ~ConvLayer();
-
-      void applyFWD(MyReal* state);
-
-      void applyBWD(MyReal* state,
-                    MyReal* state_bar,
-                    int     compute_gradient);
-
-      inline MyReal apply_conv(MyReal* state,        // state vector to apply convolution to 
-                      int     output_conv,    // output convolution
-                      int     j,              // row index
-                      int     k);             // column index
-
-      inline MyReal apply_conv_trans(MyReal* state,        // state vector to apply convolution to 
-                      int     output_conv,    // output convolution
-                      int     j,              // row index
-                      int     k);             // column index
-
-      /** 
-       * This method is designed to be used only in the applyBWD. It computes the
-       * derivative of the objective with respect to the weights. In particular
-       * if you objective is $g$ and your kernel operator has value tau at index
-       * a,b then
-       *
-       *   weights_bar[magic_index] = d_tau [ g] = \sum_{image j,k} tau state_{j+a,k+b} * update_bar_{j,k}
-       *
-       * Note that we assume that update_bar is 
-       *
-       *   update_bar = dt * dactivation * state_bar
-       *
-       * Where state_bar _must_ be at the old time. Note that the adjoint variable
-       * state_bar carries withit all the information of the objective derivative.
-       *
-       * On exit this method modifies weights_bar
-       */
-      inline MyReal updateWeightDerivative(
-                      MyReal* state,          // state vector
-                      MyReal * update_bar,    // combines derivative and adjoint info (see comments)
-                      int     output_conv,    // output convolution
-                      int     j,              // row index
-                      int     k);             // column index
+  int csize2;
+  int fcsize;
+
+  int img_size;
+  int img_size_sqrt;
+
+ public:
+  ConvLayer(int idx, int dimI, int dimO, int csize_in, int nconv_in,
+            MyReal deltaT, int Activ, MyReal Gammatik, MyReal Gammaddt);
+  ~ConvLayer();
+
+  void applyFWD(MyReal *state);
+
+  void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient);
+
+  inline MyReal apply_conv(
+      MyReal *state,    // state vector to apply convolution to
+      int output_conv,  // output convolution
+      int j,            // row index
+      int k);           // column index
+
+  inline MyReal apply_conv_trans(
+      MyReal *state,    // state vector to apply convolution to
+      int output_conv,  // output convolution
+      int j,            // row index
+      int k);           // column index
+
+  /**
+   * This method is designed to be used only in the applyBWD. It computes the
+   * derivative of the objective with respect to the weights. In particular
+   * if you objective is $g$ and your kernel operator has value tau at index
+   * a,b then
+   *
+   *   weights_bar[magic_index] = d_tau [ g] = \sum_{image j,k} tau
+   * state_{j+a,k+b} * update_bar_{j,k}
+   *
+   * Note that we assume that update_bar is
+   *
+   *   update_bar = dt * dactivation * state_bar
+   *
+   * Where state_bar _must_ be at the old time. Note that the adjoint variable
+   * state_bar carries withit all the information of the objective derivative.
+   *
+   * On exit this method modifies weights_bar
+   */
+  inline MyReal updateWeightDerivative(
+      MyReal *state,  // state vector
+      MyReal
+          *update_bar,  // combines derivative and adjoint info (see comments)
+      int output_conv,  // output convolution
+      int j,            // row index
+      int k);           // column index
 };
 
-
 /**
  * Opening Layer for use with convolutional layers.  Examples are replicated.
  * Layer transformation: y = ([I; I; ... I] y_ex)
  */
 class OpenConvLayer : public Layer {
+ protected:
+  MyReal *example; /* Pointer to the current example data */
 
-  protected: 
-      MyReal* example;    /* Pointer to the current example data */
+ public:
+  OpenConvLayer(int dimI, int dimO);
+  ~OpenConvLayer();
 
-  public:
-      OpenConvLayer(int     dimI,
-                    int     dimO);
-      ~OpenConvLayer();
+  void setExample(MyReal *example_ptr);
 
-      void setExample(MyReal* example_ptr);
+  void applyFWD(MyReal *state);
 
-      void applyFWD(MyReal* state);
-
-      void applyBWD(MyReal* state,
-                    MyReal* state_bar,
-                    int     compute_gradient);
+  void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient);
 };
 
-/** 
+/**
  * Opening Layer for use with convolutional layers.  Examples are replicated
  * and then have an activation function applied.
  *
@@ -426,17 +372,11 @@ class OpenConvLayer : public Layer {
  */
 
 class OpenConvLayerMNIST : public OpenConvLayer {
+ public:
+  OpenConvLayerMNIST(int dimI, int dimO);
+  ~OpenConvLayerMNIST();
 
-  public:
-      OpenConvLayerMNIST(int     dimI,
-                         int     dimO);
-      ~OpenConvLayerMNIST();
+  void applyFWD(MyReal *state);
 
-      void applyFWD(MyReal* state);
-
-      void applyBWD(MyReal* state,
-                    MyReal* state_bar,
-                    int     compute_gradient);
+  void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient);
 };
-
-
diff --git a/include/linalg.hpp b/include/linalg.hpp
index f8f2a77..873d6bc 100644
--- a/include/linalg.hpp
+++ b/include/linalg.hpp
@@ -1,77 +1,55 @@
-#include <stdio.h>
 #include <math.h>
-#include "defs.hpp"
 #include <mpi.h>
+#include <stdio.h>
+#include "defs.hpp"
 #pragma once
 
-
 /**
  * Compute scalar product of two vectors xTy
  * In: dimension dimN
  *     vectors x and y of dimemsion dimN
  * Out: returns xTy
  */
-MyReal vecdot(int     dimN,
-              MyReal* x,
-              MyReal* y);
-
+MyReal vecdot(int dimN, MyReal *x, MyReal *y);
 
 /**
- * Parallel dot-product xTy, invokes an MPI_Allreduce call 
+ * Parallel dot-product xTy, invokes an MPI_Allreduce call
  * In: dimension dimN
  *     vectors x and y of dimemsion dimN
- *     MPI communicator 
+ *     MPI communicator
  * Out: returns global xTy on all procs
  */
-MyReal vecdot_par(int     dimN,
-                  MyReal* x,
-                  MyReal* y,
-                  MPI_Comm comm);
-
+MyReal vecdot_par(int dimN, MyReal *x, MyReal *y, MPI_Comm comm);
 
 /**
- * Return the maximum value of a vector 
+ * Return the maximum value of a vector
  */
-MyReal vecmax(int     dimN,
-              MyReal* x);
-
+MyReal vecmax(int dimN, MyReal *x);
 
 /**
- * Return the index of the maximum entry of the vector 
+ * Return the index of the maximum entry of the vector
  */
-int argvecmax(int     dimN,
-              MyReal* x);
-
+int argvecmax(int dimN, MyReal *x);
 
 /**
  * Computes square of the l2-norm of x
  */
-MyReal vecnormsq(int      dimN,
-                 MyReal   *x);
+MyReal vecnormsq(int dimN, MyReal *x);
 
 /**
  * Parallel l2-norm computation, invokes an MPI_Allreduce x
  */
-MyReal vecnorm_par(int      dimN,
-                   MyReal   *x,
-                   MPI_Comm comm);
-
+MyReal vecnorm_par(int dimN, MyReal *x, MPI_Comm comm);
 
 /**
- * Copy a vector u into u_copy 
+ * Copy a vector u into u_copy
  */
-int vec_copy(int N, 
-             MyReal* u, 
-             MyReal* u_copy);
-
+int vec_copy(int N, MyReal *u, MyReal *u_copy);
 
 /**
  * Compute matrix x* y^T
  */
-void vecvecT(int N,
-             MyReal* x,
-             MyReal* y,
-             MyReal* XYT);
+void vecvecT(int N, MyReal *x, MyReal *y, MyReal *XYT);
 
 /**
  * Compute Matrix-vector product Hx
@@ -80,9 +58,4 @@ void vecvecT(int N,
  *     vector x
  * Out: H*x will be stored in Hx
  */
-void matvec(int     dimN,
-            MyReal* H, 
-            MyReal* x,
-            MyReal* Hx);
-
-
+void matvec(int dimN, MyReal *H, MyReal *x, MyReal *Hx);
diff --git a/include/network.hpp b/include/network.hpp
index a199586..2337263 100644
--- a/include/network.hpp
+++ b/include/network.hpp
@@ -1,142 +1,146 @@
-#include <stdio.h>
-#include "layer.hpp"
-#include <algorithm>
 #include <math.h>
 #include <mpi.h>
+#include <stdio.h>
 #include <string.h>
+#include <algorithm>
 #include "config.hpp"
-#include "util.hpp"
 #include "dataset.hpp"
+#include "layer.hpp"
+#include "util.hpp"
 #pragma once
 
-
-class Network
-{
-   protected:
-      int     nlayers_global;       /* Total number of Layers of the network */
-      int     nlayers_local;        /* Number of Layers in this network block */
-
-      int     nchannels;            /* Width of the network */
-      MyReal  dt;                   /* Time step size */
-      MyReal  loss;                 /* Value of the loss function */
-      MyReal  accuracy;             /* Accuracy of the network prediction (percentage of successfully predicted classes) */
-
-      int     startlayerID;         /* ID of the first layer on that processor */
-      int     endlayerID;           /* ID of the last layer on that processor */
-
-      int     ndesign_global;        /* Global number of design vars  */
-      int     ndesign_local;         /* Number of design vars of this local network block  */
-      int     ndesign_layermax;      /* Max. number of design variables of all hidden layers */
-
-      MyReal* design;               /* Local vector of design variables*/
-      MyReal* gradient;             /* Local Gradient */
-
-      Layer*  openlayer;            /* At first processor: openinglayer, else: NULL */
-      Layer** layers;               /* Array of hidden layers (includes classification layer at last processor */
-      Layer*  layer_left;           /* Copy of last layer of left-neighbouring processor */
-      Layer*  layer_right;          /* Copy of first layer of right-neighbouring processor */
-
-      MPI_Comm comm;                /* MPI communicator */
-   
-   public: 
-
-      Network();
-
-      ~Network();
-
-      void createNetworkBlock(int      StartLayerID, 
-                              int      EndLayerID,
-                              Config*  config,
-                              MPI_Comm Comm); 
-
-      /* Get number of channels */
-      int getnChannels();
-
-      /* Get global number of layers */
-      int getnLayersGlobal();
-
-      /* Get initial time step size */
-      MyReal getDT();
-
-      /* Get local storage index of the a layer */
-      int getLocalID(int ilayer);
-
-      /* Return value of the loss function */
-      MyReal getLoss();
-
-      /* Return accuracy value */
-      MyReal getAccuracy();
- 
-      /* Return a pointer to the design vector */
-      MyReal* getDesign();
-       
-      /* Return a pointer to the gradient vector */
-      MyReal* getGradient();
-
-      /* Get ID of first and last layer on this processor */
-      int getStartLayerID();
-      int getEndLayerID();
-
-      /**
-       *  Return number of design variables (local on this processor or global) */
-      int getnDesignLocal();
-      int getnDesignGlobal();
-
-      /** 
-       * Compute max. number of layer's ndesign on this processor 
-       * excluding opening and classification layer 
-       */
-      int computeLayermax();
-
-      /* Return ndesign_layermax */
-      int getnDesignLayermax();
-
-      /* Return MPI communicator */
-      MPI_Comm getComm();
-
-      /**
-       * Get the layer at a certain layer index, i.e. a certain time step
-       * Returns NULL, if this layer is not stored on this processor 
-       */
-      Layer* getLayer(int layerindex);
-
-
-      /* 
-       * Set an initial guess on the network design:
-       * Random initialization, scaled by given factors
-       * If set, reads in opening and classification weights from file
-       */
-      void setInitialDesign(Config *config);
-
-
-      Layer* createLayer(int     index, 
-                         Config *config);
-    
-        
-      /* Replace the layer with one that is received from the left neighbouring processor */  
-      void MPI_CommunicateNeighbours(MPI_Comm comm);
-
-     /**
-       * Applies the classification and evaluates loss/accuracy 
-       */
-      void evalClassification(DataSet* data, 
-                              MyReal** state,
-                              int      output);
-
-      /**
-       * On classification layer: derivative of evalClassification 
-       */
-      void evalClassification_diff(DataSet* data, 
-                                   MyReal** primalstate,
-                                   MyReal** adjointstate,
-                                   int      compute_gradient);
-
-
-      /**
-       * Update the network design parameters: new_design = old_design + stepsize * direction
-       */
-      void updateDesign(MyReal   stepsize,
-                        MyReal  *direction,
-                        MPI_Comm comm);
+/*
+ * The Network class logically connects the layers. 
+ * Each processor instantiates one object of this class containing 
+ * a sub-block of layers from [startlayerID, endlayerID], where those ID's are anything between -1 (being the opening layer) and nlayers_global-1 (being the classification layer). The distribution for the actual startlayerIDs and endlayerIDs at each processor come from Xbraid.
+ * All layers are stored in the vector **layer, except for the opening layer, which is in *openlayer. 
+ * Each network block contains (and allocates!) the *design and *gradient vector, which are the vectorized weights and biases at each layer (see createNetworkBlock).
+ */
+class Network {
+ protected:
+  int nlayers_global; /* Total number of Layers of the network */
+  int nlayers_local;  /* Number of Layers in this network block */
+
+  int nchannels;   /* Width of the network */
+  MyReal dt;       /* Time step size (distance between two layers).*/
+  MyReal loss;     /* Value of the loss function */
+  MyReal accuracy; /* Accuracy of the network prediction (percentage of
+                      successfully predicted classes) */
+
+  int startlayerID; /* ID of the first layer on that processor */
+  int endlayerID;   /* ID of the last layer on that processor */
+
+  int ndesign_global;   /* Global number of design vars  */
+  int ndesign_local;    /* Number of design vars of this local network block  */
+  int ndesign_layermax; /* Max. number of design variables of all hidden layers
+                         */
+
+  MyReal *design;   /* Local vector of design variables*/
+  MyReal *gradient; /* Local Gradient */
+
+  Layer *openlayer;  /* At first processor: openinglayer, else: NULL */
+  Layer **layers;    /* Array of hidden layers (includes classification layer at
+                        last processor */
+  Layer *layer_left; /* Copy of last layer of left-neighbouring processor */
+  Layer *layer_right; /* Copy of first layer of right-neighbouring processor */
+
+  MPI_Comm comm; /* MPI communicator */
+
+ public:
+  Network();
+
+  ~Network();
+
+  /* 
+   * This calls the layer's constructor for all layers in [StartlayerID, EndLayerID]. 
+   * 
+   * */
+  void createNetworkBlock(int StartLayerID, int EndLayerID, Config *config,
+                          MPI_Comm Comm);
+
+  /* Get number of channels */
+  int getnChannels();
+
+  /* Get global number of layers */
+  int getnLayersGlobal();
+
+  /* Get initial time step size */
+  MyReal getDT();
+
+  /* Get local storage index of the a layer */
+  int getLocalID(int ilayer);
+
+  /* Return value of the loss function */
+  MyReal getLoss();
+
+  /* Return accuracy value */
+  MyReal getAccuracy();
+
+  /* Return a pointer to the design vector */
+  MyReal *getDesign();
+
+  /* Return a pointer to the gradient vector */
+  MyReal *getGradient();
+
+  /* Get ID of first and last layer on this processor */
+  int getStartLayerID();
+  int getEndLayerID();
+
+  /**
+   *  Return number of design variables (local on this processor or global) */
+  int getnDesignLocal();
+  int getnDesignGlobal();
+
+  /**
+   * Compute max. number of layer's ndesign on this processor
+   * excluding opening and classification layer
+   */
+  int computeLayermax();
+
+  /* Return ndesign_layermax */
+  int getnDesignLayermax();
+
+  /* Return MPI communicator */
+  MPI_Comm getComm();
+
+  /**
+   * Get the layer at a certain layer index, i.e. a certain time step
+   * Returns NULL, if this layer is not stored on this processor
+   */
+  Layer *getLayer(int layerindex);
+
+  /*
+   * Set an initial guess on the network design:
+   * Random initialization, scaled by given factors
+   * If set, reads in opening and classification weights from file
+   */
+  void setInitialDesign(Config *config);
+
+  /* Helper function for createNetworkBlock. It basically checks what kind of layer is required at this index and calls the corresponding layer constructor. 
+  */
+  Layer *createLayer(int index, Config *config);
+
+  /* Replace the layer with one that is received from the left neighbouring
+   * processor */
+  void MPI_CommunicateNeighbours(MPI_Comm comm);
+
+  /**
+   * Applies the classification and evaluates loss/accuracy
+   * This routine should only be called at the last processor, which contains the classification layer.
+   * Maybe this one should not be inside the Network class? Don't know.
+   */
+  void evalClassification(DataSet *data, MyReal **state, int output);
+
+  /**
+   * On classification layer: derivative of evalClassification
+   */
+  void evalClassification_diff(DataSet *data, MyReal **primalstate,
+                               MyReal **adjointstate, int compute_gradient);
+
+  /**
+   * Update the network design parameters: new_design = old_design + stepsize *
+   * direction
+   * I guess this might rather be a routine for the optimizer...
+   */
+  void updateDesign(MyReal stepsize, MyReal *direction, MPI_Comm comm);
 };
-
diff --git a/include/util.hpp b/include/util.hpp
index d0cdf1b..2d1793c 100644
--- a/include/util.hpp
+++ b/include/util.hpp
@@ -1,45 +1,32 @@
+#include <mpi.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <mpi.h>
 #include "defs.hpp"
 #pragma once
 
 /**
- * Read data from file 
+ * Read data from file
  */
-void read_matrix(char*    filename, 
-               MyReal** var, 
-               int      dimx, 
-               int      dimy);
+void read_matrix(char *filename, MyReal **var, int dimx, int dimy);
 
 /**
- * Read data from file 
+ * Read data from file
  */
-void read_vector(char*    filename, 
-                 MyReal*  var, 
-                 int      dimy);
+void read_vector(char *filename, MyReal *var, int dimy);
 
 /**
  * Write data to file
  */
-void write_vector(char   *filename,
-                  MyReal  *var, 
-                  int      dimN);
-
+void write_vector(char *filename, MyReal *var, int dimN);
 
 /**
  * Gather a local vector of size localsendcount into global recvbuffer at root
  */
-void MPI_GatherVector(MyReal*  sendbuffer,
-                      int      localsendcount,
-                      MyReal*  recvbuffer,
-                      int      rootprocessID,
-                      MPI_Comm comm); 
+void MPI_GatherVector(MyReal *sendbuffer, int localsendcount,
+                      MyReal *recvbuffer, int rootprocessID, MPI_Comm comm);
 /**
- * Scatter parts of a global vector on root to local vectors on each processor (size localrecvsize)
+ * Scatter parts of a global vector on root to local vectors on each processor
+ * (size localrecvsize)
  */
-void MPI_ScatterVector(MyReal*  sendbuffer,
-                      MyReal*  recvbuffer,
-                      int      localrecvcount,
-                      int      rootprocessID,
-                      MPI_Comm comm);
+void MPI_ScatterVector(MyReal *sendbuffer, MyReal *recvbuffer,
+                       int localrecvcount, int rootprocessID, MPI_Comm comm);
diff --git a/src/braid_wrapper.cpp b/src/braid_wrapper.cpp
index 225823d..6bc8fb4 100644
--- a/src/braid_wrapper.cpp
+++ b/src/braid_wrapper.cpp
@@ -1,819 +1,792 @@
+// Copyright
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Underlying paper:
+//
+// Layer-Parallel Training of Deep Residual Neural Networks
+// S. Guenther, L. Ruthotto, J.B. Schroder, E.C. Czr, and N.R. Gauger
+//
+// Download: https://arxiv.org/pdf/1812.04352.pdf
+//
 #include "braid_wrapper.hpp"
 
 /* ========================================================= */
-myBraidVector::myBraidVector(int nChannels, 
-                             int nBatch)
-{
-    nchannels = nChannels;
-    nbatch    = nBatch;
-    
-
-    state    = NULL;
-    layer    = NULL;
-    sendflag = -1.0;
-
-    /* Allocate the state vector */
-    state = new MyReal*[nbatch];
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        state[iex] = new MyReal[nchannels];
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-            state[iex][ic] = 0.0;
-        }
+myBraidVector::myBraidVector(int nChannels, int nBatch) {
+  nchannels = nChannels;
+  nbatch = nBatch;
+
+  state = NULL;
+  layer = NULL;
+  sendflag = -1.0;
+
+  /* Allocate the state vector */
+  state = new MyReal *[nbatch];
+  for (int iex = 0; iex < nbatch; iex++) {
+    state[iex] = new MyReal[nchannels];
+    for (int ic = 0; ic < nchannels; ic++) {
+      state[iex][ic] = 0.0;
     }
+  }
 }
 
-myBraidVector::~myBraidVector()
-{
-    /* Deallocate the state vector */
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        delete [] state[iex];
-    }
-    delete [] state;
-    state = NULL;
+myBraidVector::~myBraidVector() {
+  /* Deallocate the state vector */
+  for (int iex = 0; iex < nbatch; iex++) {
+    delete[] state[iex];
+  }
+  delete[] state;
+  state = NULL;
 }
 
+int myBraidVector::getnChannels() { return nchannels; }
 
-int myBraidVector::getnChannels()  { return nchannels; }
+int myBraidVector::getnBatch() { return nbatch; }
 
-int myBraidVector::getnBatch()     { return nbatch; }
+MyReal *myBraidVector::getState(int exampleID) { return state[exampleID]; }
 
-MyReal* myBraidVector::getState(int exampleID)  { return state[exampleID]; }
+MyReal **myBraidVector::getState() { return state; }
 
-MyReal** myBraidVector::getState() { return state; }
-
-Layer* myBraidVector::getLayer()                { return layer; }
-void   myBraidVector::setLayer(Layer* layerptr) { layer = layerptr; }
-
-MyReal myBraidVector::getSendflag()              { return sendflag; }
-void   myBraidVector::setSendflag(MyReal value) { sendflag = value; }
+Layer *myBraidVector::getLayer() { return layer; }
+void myBraidVector::setLayer(Layer *layerptr) { layer = layerptr; }
 
+MyReal myBraidVector::getSendflag() { return sendflag; }
+void myBraidVector::setSendflag(MyReal value) { sendflag = value; }
 
 /* ========================================================= */
 /* ========================================================= */
 /* ========================================================= */
-myBraidApp::myBraidApp(DataSet* Data,
-                       Network* Network,
-                       Config*  config,
-                       MPI_Comm comm) : BraidApp(comm, 0.0, config->T, config->nlayers-2)
-{
-    MPI_Comm_rank(comm, &myid);
-    network          = Network;
-    data             = Data;
-    objective        = 0.0;
-
-    /* Initialize XBraid core */
-    core = new BraidCore(comm, this);
-
-   /* Set braid options */
-    core->SetMaxLevels(config->braid_maxlevels);
-    core->SetMinCoarse(config->braid_mincoarse);
-    core->SetPrintLevel(config->braid_printlevel);
-    core->SetCFactor(0, config->braid_cfactor0);
-    core->SetCFactor(-1, config->braid_cfactor);
-    core->SetAccessLevel(config->braid_accesslevel);
-    core->SetMaxIter(config->braid_maxiter);
-    core->SetSkip(config->braid_setskip);
-    if (config->braid_fmg){
-        core->SetFMG();
-    }
-    core->SetNRelax(-1, config->braid_nrelax);
-    core->SetNRelax( 0, config->braid_nrelax0);
-    core->SetAbsTol(config->braid_abstol);
-
+myBraidApp::myBraidApp(DataSet *Data, Network *Network, Config *config,
+                       MPI_Comm comm)
+    : BraidApp(comm, 0.0, config->T, config->nlayers - 2) {
+  MPI_Comm_rank(comm, &myid);
+  network = Network;
+  data = Data;
+  objective = 0.0;
+
+  /* Initialize XBraid core */
+  core = new BraidCore(comm, this);
+
+  /* Set braid options */
+  core->SetMaxLevels(config->braid_maxlevels);
+  core->SetMinCoarse(config->braid_mincoarse);
+  core->SetPrintLevel(config->braid_printlevel);
+  core->SetCFactor(0, config->braid_cfactor0);
+  core->SetCFactor(-1, config->braid_cfactor);
+  core->SetAccessLevel(config->braid_accesslevel);
+  core->SetMaxIter(config->braid_maxiter);
+  core->SetSkip(config->braid_setskip);
+  if (config->braid_fmg) {
+    core->SetFMG();
+  }
+  core->SetNRelax(-1, config->braid_nrelax);
+  core->SetNRelax(0, config->braid_nrelax0);
+  core->SetAbsTol(config->braid_abstol);
 }
 
-myBraidApp::~myBraidApp()
-{
-    /* Delete the core, if drive() has been called */
-    if ( core->GetWarmRestart() ) delete core;
+myBraidApp::~myBraidApp() {
+  /* Delete the core, if drive() has been called */
+  if (core->GetWarmRestart()) delete core;
 }
 
 MyReal myBraidApp::getObjective() { return objective; }
 
-BraidCore* myBraidApp::getCore()  { return core; }
-
-void myBraidApp::GetGridDistribution(int *ilower_ptr, 
-                                     int *iupper_ptr)
-{
-    core->GetDistribution(ilower_ptr, iupper_ptr);
-}                                 
+BraidCore *myBraidApp::getCore() { return core; }
 
+void myBraidApp::GetGridDistribution(int *ilower_ptr, int *iupper_ptr) {
+  core->GetDistribution(ilower_ptr, iupper_ptr);
+}
 
-braid_Int myBraidApp::GetTimeStepIndex(MyReal    t)
-{
-    /* Round to the closes integer */
-    int ts = round(t / network->getDT()) ;
-    return ts;
+braid_Int myBraidApp::GetTimeStepIndex(MyReal t) {
+  /* Round to the closes integer */
+  int ts = round(t / network->getDT());
+  return ts;
 }
 
-braid_Int myBraidApp::Step(braid_Vector     u_,
-                           braid_Vector     ustop_,
-                           braid_Vector     fstop_,
-                           BraidStepStatus &pstatus)
-{
-    int    ts_stop;
-    MyReal tstart, tstop;
-    MyReal deltaT;
+braid_Int myBraidApp::Step(braid_Vector u_, braid_Vector ustop_,
+                           braid_Vector fstop_, BraidStepStatus &pstatus) {
+  int ts_stop;
+  MyReal tstart, tstop;
+  MyReal deltaT;
 
-    myBraidVector *u = (myBraidVector*) u_;
-    int nbatch = data->getnBatch();
+  myBraidVector *u = (myBraidVector *)u_;
+  int nbatch = data->getnBatch();
 
-    /* Get the time-step size and current time index*/
-    pstatus.GetTstartTstop(&tstart, &tstop);
-    ts_stop  = GetTimeStepIndex(tstop); 
-    deltaT   = tstop - tstart;
+  /* Get the time-step size and current time index*/
+  pstatus.GetTstartTstop(&tstart, &tstop);
+  ts_stop = GetTimeStepIndex(tstop);
+  deltaT = tstop - tstart;
 
-    /* Set time step size */
-    u->getLayer()->setDt(deltaT);
+  /* Set time step size */
+  u->getLayer()->setDt(deltaT);
 
-    // printf("%d: step %d,%f -> %d, %f layer %d using %1.14e state %1.14e, %d\n", app->myid, tstart, ts_stop, tstop, u->layer->getIndex(), u->layer->getWeights()[3], u->state[1][1], u->layer->getnDesign());
+  // printf("%d: step %d,%f -> %d, %f layer %d using %1.14e state %1.14e, %d\n",
+  // app->myid, tstart, ts_stop, tstop, u->layer->getIndex(),
+  // u->layer->getWeights()[3], u->state[1][1], u->layer->getnDesign());
 
-    /* apply the layer for all examples */
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        /* Apply the layer */
-        u->getLayer()->applyFWD(u->getState(iex));
-    }
+  /* apply the layer for all examples */
+  for (int iex = 0; iex < nbatch; iex++) {
+    /* Apply the layer */
+    u->getLayer()->applyFWD(u->getState(iex));
+  }
 
+  /* Free the layer, if it has just been send to this processor */
+  if (u->getSendflag() > 0.0) {
+    delete[] u->getLayer()->getWeights();
+    delete[] u->getLayer()->getWeightsBar();
+    delete u->getLayer();
+  }
+  u->setSendflag(-1.0);
 
-    /* Free the layer, if it has just been send to this processor */
-    if (u->getSendflag() > 0.0)
-    {
-        delete [] u->getLayer()->getWeights();
-        delete [] u->getLayer()->getWeightsBar();
-        delete u->getLayer();
-    }
-    u->setSendflag(-1.0);
+  /* Move the layer pointer of u forward to that of tstop */
+  u->setLayer(network->getLayer(ts_stop));
 
-    /* Move the layer pointer of u forward to that of tstop */
-    u->setLayer(network->getLayer(ts_stop));
+  /* no refinement */
+  pstatus.SetRFactor(1);
 
+  return 0;
+}
 
-    /* no refinement */
-    pstatus.SetRFactor(1);
+/* Compute residual: Does nothing. */
+braid_Int myBraidApp::Residual(braid_Vector u_, braid_Vector r_,
+                               BraidStepStatus &pstatus) {
+  printf("\n\n I SHOUD NOT BE CALLED... I AM NOT IMPLEMENTED!\n\n");
 
-    return 0;
-}       
+  return 0;
+}
 
-/* Compute residual: Does nothing. */ 
-braid_Int myBraidApp::Residual(braid_Vector     u_,
-                               braid_Vector  r_,
-                               BraidStepStatus &pstatus)
-{
-    printf("\n\n I SHOUD NOT BE CALLED... I AM NOT IMPLEMENTED!\n\n");
+braid_Int myBraidApp::Clone(braid_Vector u_, braid_Vector *v_ptr) {
+  myBraidVector *u = (myBraidVector *)u_;
 
-    return 0;
-}                           
+  int nchannels = u->getnChannels();
+  int nbatch = u->getnBatch();
 
-braid_Int myBraidApp::Clone(braid_Vector  u_,
-                            braid_Vector *v_ptr)
-{
-    myBraidVector *u     = (myBraidVector*) u_;
+  /* Allocate a new vector */
+  myBraidVector *v = new myBraidVector(nchannels, nbatch);
 
-    int nchannels = u->getnChannels();
-    int nbatch    = u->getnBatch();
+  /* Copy the values */
+  for (int iex = 0; iex < nbatch; iex++) {
+    for (int ic = 0; ic < nchannels; ic++) {
+      v->getState(iex)[ic] = u->getState(iex)[ic];
+    }
+  }
+  v->setLayer(u->getLayer());
+  v->setSendflag(u->getSendflag());
 
-    /* Allocate a new vector */
-    myBraidVector* v = new myBraidVector(nchannels, nbatch);
+  /* Set the return pointer */
+  *v_ptr = (braid_Vector)v;
 
-    /* Copy the values */
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-            v->getState(iex)[ic] = u->getState(iex)[ic];
-        }
-    } 
-    v->setLayer(u->getLayer());
-    v->setSendflag(u->getSendflag());
-
-    /* Set the return pointer */
-    *v_ptr = (braid_Vector) v;
-
-    return 0;
+  return 0;
 }
 
-braid_Int myBraidApp::Init(braid_Real    t,
-                           braid_Vector *u_ptr)
-{
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
+braid_Int myBraidApp::Init(braid_Real t, braid_Vector *u_ptr) {
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
 
-    myBraidVector* u = new myBraidVector(nchannels, nbatch);
+  myBraidVector *u = new myBraidVector(nchannels, nbatch);
 
-    /* Apply the opening layer */
-    if (t == 0)
-    {
-        Layer* openlayer = network->getLayer(-1);
-        // printf("%d: Init %f: layer %d using %1.14e state %1.14e, %d\n", app->myid, t, openlayer->getIndex(), openlayer->getWeights()[3], u->state[1][1], openlayer->getnDesign());
-        for (int iex = 0; iex < nbatch; iex++)
-        {
-            /* set example */
-            openlayer->setExample(data->getExample(iex));
-
-            /* Apply the layer */
-            openlayer->applyFWD(u->getState(iex));
-        }
-    } 
-
-    /* Set the layer pointer */ 
-    if (t >=0 ) // this should always be the case...
-    {
-        int ilayer  = GetTimeStepIndex(t);
-        u->setLayer(network->getLayer(ilayer));  
+  /* Apply the opening layer */
+  if (t == 0) {
+    Layer *openlayer = network->getLayer(-1);
+    // printf("%d: Init %f: layer %d using %1.14e state %1.14e, %d\n",
+    // app->myid, t, openlayer->getIndex(), openlayer->getWeights()[3],
+    // u->state[1][1], openlayer->getnDesign());
+    for (int iex = 0; iex < nbatch; iex++) {
+      /* set example */
+      openlayer->setExample(data->getExample(iex));
+
+      /* Apply the layer */
+      openlayer->applyFWD(u->getState(iex));
     }
+  }
 
-    /* Return the pointer */
-    *u_ptr = (braid_Vector) u;
+  /* Set the layer pointer */
+  if (t >= 0)  // this should always be the case...
+  {
+    int ilayer = GetTimeStepIndex(t);
+    u->setLayer(network->getLayer(ilayer));
+  }
 
-    return 0;
-}                       
+  /* Return the pointer */
+  *u_ptr = (braid_Vector)u;
 
-braid_Int myBraidApp::Free(braid_Vector u_)
-{
-    myBraidVector *u = (myBraidVector*) u_;
-    delete u;
-    return 0;
+  return 0;
 }
 
-braid_Int myBraidApp::Sum(braid_Real   alpha,
-                          braid_Vector x_,
-                          braid_Real   beta,
-                          braid_Vector y_)
-{
-    myBraidVector *x  = (myBraidVector*) x_;
-    myBraidVector *y  = (myBraidVector*) y_;
+braid_Int myBraidApp::Free(braid_Vector u_) {
+  myBraidVector *u = (myBraidVector *)u_;
+  delete u;
+  return 0;
+}
 
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
+braid_Int myBraidApp::Sum(braid_Real alpha, braid_Vector x_, braid_Real beta,
+                          braid_Vector y_) {
+  myBraidVector *x = (myBraidVector *)x_;
+  myBraidVector *y = (myBraidVector *)y_;
 
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-           y->getState(iex)[ic] = alpha*(x->getState(iex)[ic]) + beta*(y->getState(iex)[ic]);
-        }
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
+
+  for (int iex = 0; iex < nbatch; iex++) {
+    for (int ic = 0; ic < nchannels; ic++) {
+      y->getState(iex)[ic] =
+          alpha * (x->getState(iex)[ic]) + beta * (y->getState(iex)[ic]);
     }
+  }
 
-    return 0;
+  return 0;
 }
-                          
-
-braid_Int myBraidApp::SpatialNorm(braid_Vector  u_,
-                                  braid_Real   *norm_ptr)
-{
-    myBraidVector *u = (myBraidVector*) u_;
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
-
-    MyReal dot = 0.0;
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        dot += vecdot(nchannels, u->getState(iex), u->getState(iex));
-    }
-   *norm_ptr = sqrt(dot) / nbatch;
 
+braid_Int myBraidApp::SpatialNorm(braid_Vector u_, braid_Real *norm_ptr) {
+  myBraidVector *u = (myBraidVector *)u_;
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
 
+  MyReal dot = 0.0;
+  for (int iex = 0; iex < nbatch; iex++) {
+    dot += vecdot(nchannels, u->getState(iex), u->getState(iex));
+  }
+  *norm_ptr = sqrt(dot) / nbatch;
 
-    return 0;
+  return 0;
 }
 
-braid_Int myBraidApp::Access(braid_Vector       u_,
-                             BraidAccessStatus &astatus)
-{
-    printf("my_Access: To be implemented...\n");
+braid_Int myBraidApp::Access(braid_Vector u_, BraidAccessStatus &astatus) {
+  printf("my_Access: To be implemented...\n");
 
-    return 0;
+  return 0;
 }
 
-braid_Int myBraidApp::BufSize(braid_Int         *size_ptr,
-                              BraidBufferStatus &bstatus)
-{
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
+braid_Int myBraidApp::BufSize(braid_Int *size_ptr, BraidBufferStatus &bstatus) {
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
 
-    /* Gather number of variables */
-    int nuvector     = nchannels*nbatch;
-    int nlayerinfo   = 12;
-    int nlayerdesign = network->getnDesignLayermax();
+  /* Gather number of variables */
+  int nuvector = nchannels * nbatch;
+  int nlayerinfo = 12;
+  int nlayerdesign = network->getnDesignLayermax();
 
-    /* Set the size */
-    *size_ptr = (nuvector + nlayerinfo + nlayerdesign) * sizeof(MyReal);
+  /* Set the size */
+  *size_ptr = (nuvector + nlayerinfo + nlayerdesign) * sizeof(MyReal);
 
-    return 0;
+  return 0;
 }
 
-braid_Int myBraidApp::BufPack(braid_Vector       u_,
-                              void              *buffer,
-                              BraidBufferStatus &bstatus)
-{
-    int size;
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
-    MyReal *dbuffer  = (MyReal*) buffer;
-    myBraidVector *u = (myBraidVector*) u_;
-    
-    /* Store network state */
-    int idx = 0;
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-           dbuffer[idx] = u->getState(iex)[ic];
-           idx++;
-        }
+braid_Int myBraidApp::BufPack(braid_Vector u_, void *buffer,
+                              BraidBufferStatus &bstatus) {
+  int size;
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
+  MyReal *dbuffer = (MyReal *)buffer;
+  myBraidVector *u = (myBraidVector *)u_;
+
+  /* Store network state */
+  int idx = 0;
+  for (int iex = 0; iex < nbatch; iex++) {
+    for (int ic = 0; ic < nchannels; ic++) {
+      dbuffer[idx] = u->getState(iex)[ic];
+      idx++;
     }
-    size = nchannels*nbatch*sizeof(MyReal);
-
-    int nweights = u->getLayer()->getnWeights();
-    int nbias    = u->getLayer()->getDimBias();
-
-    dbuffer[idx] = u->getLayer()->getType();       idx++;
-    dbuffer[idx] = u->getLayer()->getIndex();      idx++;
-    dbuffer[idx] = u->getLayer()->getDimIn();      idx++;
-    dbuffer[idx] = u->getLayer()->getDimOut();     idx++;
-    dbuffer[idx] = u->getLayer()->getDimBias();    idx++;
-    dbuffer[idx] = u->getLayer()->getnWeights();   idx++;
-    dbuffer[idx] = u->getLayer()->getActivation(); idx++;
-    dbuffer[idx] = u->getLayer()->getnDesign();    idx++;
-    dbuffer[idx] = u->getLayer()->getGammaTik();   idx++;
-    dbuffer[idx] = u->getLayer()->getGammaDDT();   idx++;
-    dbuffer[idx] = u->getLayer()->getnConv();      idx++;
-    dbuffer[idx] = u->getLayer()->getCSize();      idx++;
-    for (int i = 0; i < nweights; i++)
-    {
-        dbuffer[idx] = u->getLayer()->getWeights()[i];     idx++;
-        // dbuffer[idx] = u->layer->getWeightsBar()[i];  idx++;
-    }
-    for (int i = 0; i < nbias; i++)
-    {
-        dbuffer[idx] = u->getLayer()->getBias()[i];     idx++;
-        // dbuffer[idx] = u->layer->getBiasBar()[i];  idx++;
-    }
-    size += (12 + (nweights+nbias))*sizeof(MyReal);
-
-    bstatus.SetSize(size);
-
-    return 0;
+  }
+  size = nchannels * nbatch * sizeof(MyReal);
+
+  int nweights = u->getLayer()->getnWeights();
+  int nbias = u->getLayer()->getDimBias();
+
+  dbuffer[idx] = u->getLayer()->getType();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getIndex();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getDimIn();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getDimOut();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getDimBias();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getnWeights();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getActivation();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getnDesign();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getGammaTik();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getGammaDDT();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getnConv();
+  idx++;
+  dbuffer[idx] = u->getLayer()->getCSize();
+  idx++;
+  for (int i = 0; i < nweights; i++) {
+    dbuffer[idx] = u->getLayer()->getWeights()[i];
+    idx++;
+    // dbuffer[idx] = u->layer->getWeightsBar()[i];  idx++;
+  }
+  for (int i = 0; i < nbias; i++) {
+    dbuffer[idx] = u->getLayer()->getBias()[i];
+    idx++;
+    // dbuffer[idx] = u->layer->getBiasBar()[i];  idx++;
+  }
+  size += (12 + (nweights + nbias)) * sizeof(MyReal);
+
+  bstatus.SetSize(size);
+
+  return 0;
 }
 
+braid_Int myBraidApp::BufUnpack(void *buffer, braid_Vector *u_ptr,
+                                BraidBufferStatus &bstatus) {
+  Layer *tmplayer = 0;
+  MyReal *dbuffer = (MyReal *)buffer;
 
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
 
-braid_Int myBraidApp::BufUnpack(void              *buffer,
-                                braid_Vector      *u_ptr,
-                                BraidBufferStatus &bstatus)
-{
-    Layer *tmplayer = 0;
-    MyReal *dbuffer   = (MyReal*) buffer;
-
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
-    
-    /* Allocate a new vector */
-    myBraidVector* u = new myBraidVector(nchannels, nbatch);
-
-    /* Unpack the buffer */
-    int idx = 0;
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-           u->getState(iex)[ic] = dbuffer[idx]; 
-           idx++;
-        }
-    }
+  /* Allocate a new vector */
+  myBraidVector *u = new myBraidVector(nchannels, nbatch);
 
-    /* Receive and initialize a layer. Set the sendflag */
-    int layertype = dbuffer[idx];  idx++;
-    int index     = dbuffer[idx];  idx++;
-    int dimIn     = dbuffer[idx];  idx++;
-    int dimOut    = dbuffer[idx];  idx++;
-    int dimBias   = dbuffer[idx];  idx++;
-    int nweights  = dbuffer[idx];  idx++;
-    int activ     = dbuffer[idx];  idx++;
-    int nDesign   = dbuffer[idx];  idx++;
-    int gammatik  = dbuffer[idx];  idx++;
-    int gammaddt  = dbuffer[idx];  idx++;
-    int nconv     = dbuffer[idx];  idx++;
-    int csize     = dbuffer[idx];  idx++;
-
-    /* layertype decides on which layer should be created */
-    switch (layertype)
-    {
-        case Layer::OPENZERO:
-            tmplayer = new OpenExpandZero(dimIn, dimOut);
-            break;
-        case Layer::OPENDENSE:
-            tmplayer = new OpenDenseLayer(dimIn, dimOut, activ, gammatik);
-            break;
-        case Layer::DENSE:
-            tmplayer = new DenseLayer(index, dimIn, dimOut, 1.0, activ, gammatik, gammaddt);
-            break;
-        case Layer::CLASSIFICATION:
-            tmplayer = new ClassificationLayer(index, dimIn, dimOut, gammatik);
-            break;
-        case Layer::OPENCONV:
-            tmplayer = new OpenConvLayer(dimIn, dimOut);
-            break;
-        case Layer::OPENCONVMNIST:
-            tmplayer = new OpenConvLayerMNIST(dimIn, dimOut);
-            break;
-        case Layer::CONVOLUTION:
-            tmplayer = new ConvLayer(index, dimIn, dimOut, csize, nconv, 1.0, activ, gammatik, gammaddt);
-            break;
-        default: 
-            printf("\n\n ERROR while unpacking a buffer: Layertype unknown!!\n\n"); 
-    }
-
-    /* Allocate design and gradient */
-    MyReal *design   = new MyReal[nDesign];
-    MyReal *gradient = new MyReal[nDesign];
-    tmplayer->setMemory(design, gradient);
-    /* Set the weights */
-    for (int i = 0; i < nweights; i++)
-    {
-        tmplayer->getWeights()[i]    = dbuffer[idx]; idx++;
-    }
-    for (int i = 0; i < dimBias; i++)
-    {
-        tmplayer->getBias()[i]    = dbuffer[idx];   idx++;
+  /* Unpack the buffer */
+  int idx = 0;
+  for (int iex = 0; iex < nbatch; iex++) {
+    for (int ic = 0; ic < nchannels; ic++) {
+      u->getState(iex)[ic] = dbuffer[idx];
+      idx++;
     }
-    u->setLayer(tmplayer);
-    u->setSendflag(1.0);
-
-    /* Return the pointer */
-    *u_ptr = (braid_Vector) u;
-    return 0;
+  }
+
+  /* Receive and initialize a layer. Set the sendflag */
+  int layertype = dbuffer[idx];
+  idx++;
+  int index = dbuffer[idx];
+  idx++;
+  int dimIn = dbuffer[idx];
+  idx++;
+  int dimOut = dbuffer[idx];
+  idx++;
+  int dimBias = dbuffer[idx];
+  idx++;
+  int nweights = dbuffer[idx];
+  idx++;
+  int activ = dbuffer[idx];
+  idx++;
+  int nDesign = dbuffer[idx];
+  idx++;
+  int gammatik = dbuffer[idx];
+  idx++;
+  int gammaddt = dbuffer[idx];
+  idx++;
+  int nconv = dbuffer[idx];
+  idx++;
+  int csize = dbuffer[idx];
+  idx++;
+
+  /* layertype decides on which layer should be created */
+  switch (layertype) {
+    case Layer::OPENZERO:
+      tmplayer = new OpenExpandZero(dimIn, dimOut);
+      break;
+    case Layer::OPENDENSE:
+      tmplayer = new OpenDenseLayer(dimIn, dimOut, activ, gammatik);
+      break;
+    case Layer::DENSE:
+      tmplayer =
+          new DenseLayer(index, dimIn, dimOut, 1.0, activ, gammatik, gammaddt);
+      break;
+    case Layer::CLASSIFICATION:
+      tmplayer = new ClassificationLayer(index, dimIn, dimOut, gammatik);
+      break;
+    case Layer::OPENCONV:
+      tmplayer = new OpenConvLayer(dimIn, dimOut);
+      break;
+    case Layer::OPENCONVMNIST:
+      tmplayer = new OpenConvLayerMNIST(dimIn, dimOut);
+      break;
+    case Layer::CONVOLUTION:
+      tmplayer = new ConvLayer(index, dimIn, dimOut, csize, nconv, 1.0, activ,
+                               gammatik, gammaddt);
+      break;
+    default:
+      printf("\n\n ERROR while unpacking a buffer: Layertype unknown!!\n\n");
+  }
+
+  /* Allocate design and gradient */
+  MyReal *design = new MyReal[nDesign];
+  MyReal *gradient = new MyReal[nDesign];
+  tmplayer->setMemory(design, gradient);
+  /* Set the weights */
+  for (int i = 0; i < nweights; i++) {
+    tmplayer->getWeights()[i] = dbuffer[idx];
+    idx++;
+  }
+  for (int i = 0; i < dimBias; i++) {
+    tmplayer->getBias()[i] = dbuffer[idx];
+    idx++;
+  }
+  u->setLayer(tmplayer);
+  u->setSendflag(1.0);
+
+  /* Return the pointer */
+  *u_ptr = (braid_Vector)u;
+  return 0;
 }
 
+braid_Int myBraidApp::SetInitialCondition() {
+  Layer *openlayer = network->getLayer(-1);
+  int nbatch = data->getnBatch();
+  braid_BaseVector ubase;
+  myBraidVector *u;
+
+  /* Apply initial condition if warm_restart (otherwise it is set in my_Init()
+   */
+  /* can not be set here if !(warm_restart) because braid_grid is created only
+   * in braid_drive(). */
+  if (core->GetWarmRestart()) {
+    /* Get vector at t == 0 */
+    _braid_UGetVectorRef(core->GetCore(), 0, 0, &ubase);
+    if (ubase != NULL)  // only true on one first processor !
+    {
+      u = (myBraidVector *)ubase->userVector;
 
-braid_Int myBraidApp::SetInitialCondition()
-{
-    Layer* openlayer = network->getLayer(-1);
-    int    nbatch    = data->getnBatch();
-    braid_BaseVector ubase;
-    myBraidVector* u;
+      /* Apply opening layer */
+      for (int iex = 0; iex < nbatch; iex++) {
+        /* set example */
+        openlayer->setExample(data->getExample(iex));
 
-    /* Apply initial condition if warm_restart (otherwise it is set in my_Init() */
-    /* can not be set here if !(warm_restart) because braid_grid is created only in braid_drive(). */
-    if ( core->GetWarmRestart() )
-    {
-        /* Get vector at t == 0 */
-        _braid_UGetVectorRef(core->GetCore(), 0, 0, &ubase);
-        if (ubase != NULL) // only true on one first processor ! 
-        {
-            u = (myBraidVector*) ubase->userVector;
-
-            /* Apply opening layer */
-            for (int iex = 0; iex < nbatch; iex++)
-            {
-                /* set example */
-                openlayer->setExample(data->getExample(iex));
-
-                /* Apply the layer */
-                openlayer->applyFWD(u->getState(iex));
-            } 
-        }
+        /* Apply the layer */
+        openlayer->applyFWD(u->getState(iex));
+      }
     }
+  }
 
-    return 0;
+  return 0;
 }
 
-
-braid_Int myBraidApp::EvaluateObjective()
-{
-
-
-    braid_BaseVector ubase;
-    myBraidVector* u;
-    Layer* layer;
-    MyReal myobjective;
-    MyReal regul;
-
-    /* Get range of locally stored layers */
-    int startlayerID = network->getStartLayerID();
-    int endlayerID   = network->getEndLayerID();
-    if (startlayerID == 0) startlayerID -= 1; // this includes opening layer (id = -1) at first processor 
-
-
-    /* Iterate over the local layers */
-    regul = 0.0;
-    for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++)
-    {
-        /* Get the layer */
-        layer = network->getLayer(ilayer); 
-
-         /* Tikhonov - Regularization*/
-        regul += layer->evalTikh();
-
-        /* DDT - Regularization on intermediate layers */
-        regul += layer->evalRegulDDT(network->getLayer(ilayer-1), network->getDT());
-
-        /* At last layer: Classification and Loss evaluation */ 
-        if (ilayer == network->getnLayersGlobal()-2)
-        {
-            _braid_UGetLast(core->GetCore(), &ubase); 
-            u = (myBraidVector*) ubase->userVector;
-            network->evalClassification(data, u->getState(), 0);
-        }
-        // printf("%d: layerid %d using %1.14e, tik %1.14e, ddt %1.14e, loss %1.14e\n", app->myid, layer->getIndex(), layer->getWeights()[0], regultik, regulddt, loss_loc);
+braid_Int myBraidApp::EvaluateObjective() {
+  braid_BaseVector ubase;
+  myBraidVector *u;
+  Layer *layer;
+  MyReal myobjective;
+  MyReal regul;
+
+  /* Get range of locally stored layers */
+  int startlayerID = network->getStartLayerID();
+  int endlayerID = network->getEndLayerID();
+  if (startlayerID == 0)
+    startlayerID -=
+        1;  // this includes opening layer (id = -1) at first processor
+
+  /* Iterate over the local layers */
+  regul = 0.0;
+  for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++) {
+    /* Get the layer */
+    layer = network->getLayer(ilayer);
+
+    /* Tikhonov - Regularization*/
+    regul += layer->evalTikh();
+
+    /* DDT - Regularization on intermediate layers */
+    regul +=
+        layer->evalRegulDDT(network->getLayer(ilayer - 1), network->getDT());
+
+    /* At last layer: Classification and Loss evaluation */
+    if (ilayer == network->getnLayersGlobal() - 2) {
+      _braid_UGetLast(core->GetCore(), &ubase);
+      u = (myBraidVector *)ubase->userVector;
+      network->evalClassification(data, u->getState(), 0);
     }
-
-
-    /* Collect objective function from all processors */
-    myobjective = network->getLoss() + regul;
-    objective   = 0.0;
-    MPI_Allreduce(&myobjective, &objective, 1, MPI_MyReal, MPI_SUM, MPI_COMM_WORLD);
-
-    return 0;
+    // printf("%d: layerid %d using %1.14e, tik %1.14e, ddt %1.14e, loss
+    // %1.14e\n", app->myid, layer->getIndex(), layer->getWeights()[0],
+    // regultik, regulddt, loss_loc);
+  }
+
+  /* Collect objective function from all processors */
+  myobjective = network->getLoss() + regul;
+  objective = 0.0;
+  MPI_Allreduce(&myobjective, &objective, 1, MPI_MyReal, MPI_SUM,
+                MPI_COMM_WORLD);
+
+  return 0;
 }
 
+MyReal myBraidApp::run() {
+  int nreq = -1;
+  MyReal norm;
 
+  SetInitialCondition();
+  core->Drive();
+  EvaluateObjective();
+  core->GetRNorms(&nreq, &norm);
 
-MyReal myBraidApp::run()
-{
-    int    nreq = -1;
-    MyReal norm;
-
-    SetInitialCondition();
-    core->Drive();
-    EvaluateObjective();
-    core->GetRNorms(&nreq, &norm);
-
-    return norm;
+  return norm;
 }
 
-
-
-
 /* ========================================================= */
 /* ========================================================= */
 /* ========================================================= */
-myAdjointBraidApp::myAdjointBraidApp(DataSet*   Data,
-                                     Network*   Network,
-                                     Config*    config,
-                                     BraidCore* Primalcoreptr,
-                                     MPI_Comm   comm) : myBraidApp(Data, Network, config, comm)
-{
-    primalcore = Primalcoreptr;
-
-    /* Store all primal points */
-    primalcore->SetStorage(0);
-
-    /* Revert processor ranks for solving adjoint with xbraid */
-    core->SetRevertedRanks(1);
-}              
+myAdjointBraidApp::myAdjointBraidApp(DataSet *Data, Network *Network,
+                                     Config *config, BraidCore *Primalcoreptr,
+                                     MPI_Comm comm)
+    : myBraidApp(Data, Network, config, comm) {
+  primalcore = Primalcoreptr;
 
-myAdjointBraidApp::~myAdjointBraidApp(){}
+  /* Store all primal points */
+  primalcore->SetStorage(0);
 
-
-int myAdjointBraidApp::GetPrimalIndex(int ts)
-{
-    int idx = network->getnLayersGlobal() - 2 - ts;
-    return idx;
+  /* Revert processor ranks for solving adjoint with xbraid */
+  core->SetRevertedRanks(1);
 }
 
+myAdjointBraidApp::~myAdjointBraidApp() {}
 
-braid_Int myAdjointBraidApp::Step(braid_Vector     u_,
-                                  braid_Vector     ustop_,
-                                  braid_Vector     fstop_,
-                                  BraidStepStatus &pstatus)
-{
-    int    ts_stop;
-    int    level, compute_gradient;
-    MyReal tstart, tstop;
-    MyReal deltaT;
-    int    finegrid  = 0;
-    int    primaltimestep;
-    braid_BaseVector ubaseprimal;
-    myBraidVector* uprimal;
-
-    int    nbatch = data->getnBatch();
-    myBraidVector *u = (myBraidVector*) u_;
-
-    /* Update gradient only on the finest grid */
-    pstatus.GetLevel(&level);
-    if (level == 0) compute_gradient = 1;
-    else            compute_gradient = 0;
-   
-    /* Get the time-step size and current time index*/
-    pstatus.GetTstartTstop(&tstart, &tstop);
-    ts_stop        = GetTimeStepIndex(tstop); 
-    deltaT         = tstop - tstart;
-    primaltimestep = GetPrimalIndex(ts_stop); 
-
-    /* Get the primal vector from the primal core */
-    _braid_UGetVectorRef(primalcore->GetCore(), finegrid, primaltimestep, &ubaseprimal);
-    uprimal = (myBraidVector*) ubaseprimal->userVector;
-
-    /* Reset gradient before the update */
-    if (compute_gradient) uprimal->getLayer()->resetBar();
-
-    /* Take one step backwards, updates adjoint state and gradient, if desired. */
-    uprimal->getLayer()->setDt(deltaT);
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        uprimal->getLayer()->applyBWD(uprimal->getState(iex), u->getState(iex), compute_gradient); 
-    }
-
-    // printf("%d: level %d step_adj %d->%d using layer %d,%1.14e, primal %1.14e, adj %1.14e, grad[0] %1.14e, %d\n", app->myid, level, ts_stop, uprimal->layer->getIndex(), uprimal->layer->getWeights()[3], uprimal->state[1][1], u->state[1][1], uprimal->layer->getWeightsBar()[0],  uprimal->layer->getnDesign());
+int myAdjointBraidApp::GetPrimalIndex(int ts) {
+  int idx = network->getnLayersGlobal() - 2 - ts;
+  return idx;
+}
 
-    /* Derivative of DDT-Regularization */
-    if (compute_gradient) 
-    {
-        Layer* prev = network->getLayer(primaltimestep - 1); 
-        Layer* next = network->getLayer(primaltimestep + 1); 
-        uprimal->getLayer()->evalRegulDDT_diff(prev, next, network->getDT());
-    }        
+braid_Int myAdjointBraidApp::Step(braid_Vector u_, braid_Vector ustop_,
+                                  braid_Vector fstop_,
+                                  BraidStepStatus &pstatus) {
+  int ts_stop;
+  int level, compute_gradient;
+  MyReal tstart, tstop;
+  MyReal deltaT;
+  int finegrid = 0;
+  int primaltimestep;
+  braid_BaseVector ubaseprimal;
+  myBraidVector *uprimal;
+
+  int nbatch = data->getnBatch();
+  myBraidVector *u = (myBraidVector *)u_;
+
+  /* Update gradient only on the finest grid */
+  pstatus.GetLevel(&level);
+  if (level == 0)
+    compute_gradient = 1;
+  else
+    compute_gradient = 0;
+
+  /* Get the time-step size and current time index*/
+  pstatus.GetTstartTstop(&tstart, &tstop);
+  ts_stop = GetTimeStepIndex(tstop);
+  deltaT = tstop - tstart;
+  primaltimestep = GetPrimalIndex(ts_stop);
+
+  /* Get the primal vector from the primal core */
+  _braid_UGetVectorRef(primalcore->GetCore(), finegrid, primaltimestep,
+                       &ubaseprimal);
+  uprimal = (myBraidVector *)ubaseprimal->userVector;
+
+  /* Reset gradient before the update */
+  if (compute_gradient) uprimal->getLayer()->resetBar();
+
+  /* Take one step backwards, updates adjoint state and gradient, if desired. */
+  uprimal->getLayer()->setDt(deltaT);
+  for (int iex = 0; iex < nbatch; iex++) {
+    uprimal->getLayer()->applyBWD(uprimal->getState(iex), u->getState(iex),
+                                  compute_gradient);
+  }
+
+  // printf("%d: level %d step_adj %d->%d using layer %d,%1.14e, primal %1.14e,
+  // adj %1.14e, grad[0] %1.14e, %d\n", app->myid, level, ts_stop,
+  // uprimal->layer->getIndex(), uprimal->layer->getWeights()[3],
+  // uprimal->state[1][1], u->state[1][1], uprimal->layer->getWeightsBar()[0],
+  // uprimal->layer->getnDesign());
+
+  /* Derivative of DDT-Regularization */
+  if (compute_gradient) {
+    Layer *prev = network->getLayer(primaltimestep - 1);
+    Layer *next = network->getLayer(primaltimestep + 1);
+    uprimal->getLayer()->evalRegulDDT_diff(prev, next, network->getDT());
+  }
+
+  /* Derivative of tikhonov */
+  if (compute_gradient) uprimal->getLayer()->evalTikh_diff(1.0);
+
+  /* no refinement */
+  pstatus.SetRFactor(1);
+
+  return 0;
+}
 
-    /* Derivative of tikhonov */
-    if (compute_gradient) uprimal->getLayer()->evalTikh_diff(1.0);
+braid_Int myAdjointBraidApp::Init(braid_Real t, braid_Vector *u_ptr) {
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
 
-    /* no refinement */
-    pstatus.SetRFactor(1);
+  braid_BaseVector ubaseprimal;
+  myBraidVector *uprimal;
 
-    return 0;
-}                                        
+  // printf("%d: Init %d (primaltimestep %d)\n", app->myid, ilayer,
+  // primaltimestep);
 
+  /* Allocate the adjoint vector and set to zero */
+  myBraidVector *u = new myBraidVector(nchannels, nbatch);
 
-braid_Int myAdjointBraidApp::Init(braid_Real    t,
-                                  braid_Vector *u_ptr)
-{
+  /* Adjoint initial (i.e. terminal) condition is derivative of classification
+   * layer */
+  if (t == 0) {
+    /* Get the primal vector */
+    _braid_UGetLast(primalcore->GetCore(), &ubaseprimal);
+    uprimal = (myBraidVector *)ubaseprimal->userVector;
 
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
+    /* Reset the gradient before updating it */
+    uprimal->getLayer()->resetBar();
 
-    braid_BaseVector ubaseprimal;
-    myBraidVector* uprimal;
+    /* Derivative of classification */
+    network->evalClassification_diff(data, uprimal->getState(), u->getState(),
+                                     1);
 
-    // printf("%d: Init %d (primaltimestep %d)\n", app->myid, ilayer, primaltimestep);
+    /* Derivative of tikhonov regularization) */
+    uprimal->getLayer()->evalTikh_diff(1.0);
 
-    /* Allocate the adjoint vector and set to zero */
-    myBraidVector* u = new myBraidVector(nchannels, nbatch);
+    //    printf("%d: Init_adj Loss at %d, using %1.14e, primal %1.14e, adj
+    //    %1.14e, grad[0] %1.14e\n", app->myid, layer->getIndex(),
+    //    layer->getWeights()[0], primalstate[1][1], u->state[1][1],
+    //    layer->getWeightsBar()[0]);
+  }
 
-    /* Adjoint initial (i.e. terminal) condition is derivative of classification layer */
-    if (t==0)
-    {
-        /* Get the primal vector */
-        _braid_UGetLast(primalcore->GetCore(), &ubaseprimal); 
-        uprimal = (myBraidVector*) ubaseprimal->userVector;
+  *u_ptr = (braid_Vector)u;
 
-        /* Reset the gradient before updating it */
-        uprimal->getLayer()->resetBar();
+  return 0;
+}
 
-        /* Derivative of classification */
-        network->evalClassification_diff(data, uprimal->getState(), u->getState(), 1);
+braid_Int myAdjointBraidApp::BufSize(braid_Int *size_ptr,
+                                     BraidBufferStatus &bstatus) {
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
 
-        /* Derivative of tikhonov regularization) */
-        uprimal->getLayer()->evalTikh_diff(1.0);
- 
-    //    printf("%d: Init_adj Loss at %d, using %1.14e, primal %1.14e, adj %1.14e, grad[0] %1.14e\n", app->myid, layer->getIndex(), layer->getWeights()[0], primalstate[1][1], u->state[1][1], layer->getWeightsBar()[0]);
-    }
+  *size_ptr = nchannels * nbatch * sizeof(MyReal);
+  return 0;
+}
 
-    *u_ptr = (braid_Vector) u;
-
-    return 0;
-}                              
-
-
-braid_Int myAdjointBraidApp::BufSize(braid_Int         *size_ptr,
-                                     BraidBufferStatus &bstatus)
-{
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
-   
-    *size_ptr = nchannels*nbatch*sizeof(MyReal);
-    return 0;
-}                                 
-
-
-braid_Int myAdjointBraidApp::BufPack(braid_Vector       u_,
-                                     void              *buffer,
-                                     BraidBufferStatus &bstatus)
-{
-
-    int size;
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
-    MyReal *dbuffer = (MyReal*) buffer;
-    myBraidVector* u = (myBraidVector*) u_;
-    
-    /* Store network state */
-    int idx = 0;
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-           dbuffer[idx] = u->getState(iex)[ic];
-           idx++;
-        }
-    }
-    size = nchannels*nbatch*sizeof(MyReal);
-
-    bstatus.SetSize(size);
-    return 0;
-}                                 
-
-braid_Int myAdjointBraidApp::BufUnpack(void              *buffer,
-                                       braid_Vector      *u_ptr,
-                                       BraidBufferStatus &bstatus)
-{
-
-    int nchannels = network->getnChannels();
-    int nbatch    = data->getnBatch();
-    MyReal *dbuffer = (MyReal*) buffer;
-    
-    /* Allocate the vector */
-    myBraidVector* u = new myBraidVector(nchannels, nbatch);
-
-    /* Unpack the buffer */
-    int idx = 0;
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-           u->getState(iex)[ic] = dbuffer[idx]; 
-           idx++;
-        }
+braid_Int myAdjointBraidApp::BufPack(braid_Vector u_, void *buffer,
+                                     BraidBufferStatus &bstatus) {
+  int size;
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
+  MyReal *dbuffer = (MyReal *)buffer;
+  myBraidVector *u = (myBraidVector *)u_;
+
+  /* Store network state */
+  int idx = 0;
+  for (int iex = 0; iex < nbatch; iex++) {
+    for (int ic = 0; ic < nchannels; ic++) {
+      dbuffer[idx] = u->getState(iex)[ic];
+      idx++;
     }
-    u->setLayer(NULL);
-    u->setSendflag(-1.0);
+  }
+  size = nchannels * nbatch * sizeof(MyReal);
 
-    *u_ptr = (braid_Vector) u;
-    return 0;
-}                                   
+  bstatus.SetSize(size);
+  return 0;
+}
 
-braid_Int myAdjointBraidApp::SetInitialCondition()
-{
-    braid_BaseVector  ubaseprimal, ubaseadjoint;
-    // braid_Vector      uprimal, uadjoint;
-    myBraidVector *uprimal, *uadjoint;
+braid_Int myAdjointBraidApp::BufUnpack(void *buffer, braid_Vector *u_ptr,
+                                       BraidBufferStatus &bstatus) {
+  int nchannels = network->getnChannels();
+  int nbatch = data->getnBatch();
+  MyReal *dbuffer = (MyReal *)buffer;
+
+  /* Allocate the vector */
+  myBraidVector *u = new myBraidVector(nchannels, nbatch);
+
+  /* Unpack the buffer */
+  int idx = 0;
+  for (int iex = 0; iex < nbatch; iex++) {
+    for (int ic = 0; ic < nchannels; ic++) {
+      u->getState(iex)[ic] = dbuffer[idx];
+      idx++;
+    }
+  }
+  u->setLayer(NULL);
+  u->setSendflag(-1.0);
 
-    /* Only gradient for primal time step N here. Other time steps are in my_Step_adj. */
+  *u_ptr = (braid_Vector)u;
+  return 0;
+}
 
-    /* If warm_restart: set adjoint initial condition here. Otherwise it's set in my_Init_Adj */
-    /* It can not be done here if drive() has not been called before, because the braid grid is allocated only at the beginning of drive() */
-    if ( core->GetWarmRestart() )
+braid_Int myAdjointBraidApp::SetInitialCondition() {
+  braid_BaseVector ubaseprimal, ubaseadjoint;
+  // braid_Vector      uprimal, uadjoint;
+  myBraidVector *uprimal, *uadjoint;
+
+  /* Only gradient for primal time step N here. Other time steps are in
+   * my_Step_adj. */
+
+  /* If warm_restart: set adjoint initial condition here. Otherwise it's set in
+   * my_Init_Adj */
+  /* It can not be done here if drive() has not been called before, because the
+   * braid grid is allocated only at the beginning of drive() */
+  if (core->GetWarmRestart()) {
+    /* Get primal and adjoint state */
+    _braid_UGetLast(primalcore->GetCore(), &ubaseprimal);
+    _braid_UGetVectorRef(core->GetCore(), 0, 0, &ubaseadjoint);
+
+    if (ubaseprimal != NULL &&
+        ubaseadjoint != NULL)  // this is the case at first primal and last
+                               // adjoint time step
     {
-        /* Get primal and adjoint state */
-        _braid_UGetLast(primalcore->GetCore(), &ubaseprimal);
-        _braid_UGetVectorRef(core->GetCore(), 0, 0, &ubaseadjoint);
-
-        if (ubaseprimal != NULL && ubaseadjoint !=NULL)  // this is the case at first primal and last adjoint time step  
-        {
-            uprimal  = (myBraidVector*) ubaseprimal->userVector;
-            uadjoint = (myBraidVector*) ubaseadjoint->userVector;
+      uprimal = (myBraidVector *)ubaseprimal->userVector;
+      uadjoint = (myBraidVector *)ubaseadjoint->userVector;
 
-            /* Reset the gradient before updating it */
-            uprimal->getLayer()->resetBar();
+      /* Reset the gradient before updating it */
+      uprimal->getLayer()->resetBar();
 
-            // printf("%d: objective_diff at ilayer %d using %1.14e primal %1.14e\n", app->myid, uprimal->layer->getIndex(), uprimal->layer->getWeights()[0], uprimal->state[1][1]);
+      // printf("%d: objective_diff at ilayer %d using %1.14e primal %1.14e\n",
+      // app->myid, uprimal->layer->getIndex(), uprimal->layer->getWeights()[0],
+      // uprimal->state[1][1]);
 
-            /* Derivative of classification */
-            network->evalClassification_diff(data, uprimal->getState(), uadjoint->getState(), 1);
+      /* Derivative of classification */
+      network->evalClassification_diff(data, uprimal->getState(),
+                                       uadjoint->getState(), 1);
 
-            /* Derivative of tikhonov regularization) */
-            uprimal->getLayer()->evalTikh_diff(1.0);
-        }
-    }    
+      /* Derivative of tikhonov regularization) */
+      uprimal->getLayer()->evalTikh_diff(1.0);
+    }
+  }
 
-    return 0;
+  return 0;
 }
 
+braid_Int myAdjointBraidApp::EvaluateObjective() {
+  braid_BaseVector ubase;
+  myBraidVector *uadjoint;
 
-braid_Int myAdjointBraidApp::EvaluateObjective()
-{
-    braid_BaseVector ubase;
-    myBraidVector* uadjoint;
+  Layer *openlayer = network->getLayer(-1);
+  int nbatch = data->getnBatch();
 
-    Layer* openlayer = network->getLayer(-1);
-    int    nbatch    = data->getnBatch();
+  /* Get \bar y^0 (which is the LAST xbraid vector, stored on proc 0) */
+  _braid_UGetLast(core->GetCore(), &ubase);
+  if (ubase != NULL)  // This is true only on first processor (reverted ranks!)
+  {
+    uadjoint = (myBraidVector *)ubase->userVector;
 
-    /* Get \bar y^0 (which is the LAST xbraid vector, stored on proc 0) */
-    _braid_UGetLast(core->GetCore(), &ubase); 
-    if (ubase != NULL) // This is true only on first processor (reverted ranks!)
-    {
-        uadjoint = (myBraidVector*) ubase->userVector;
-
-        /* Reset the gradient */
-        openlayer->resetBar();
-        
-        /* Apply opening layer backwards for all examples */ 
-        for (int iex = 0; iex < nbatch; iex++)
-        {
-            openlayer->setExample(data->getExample(iex));
-            /* TODO: Don't feed applyBWD with NULL! */
-            openlayer->applyBWD(NULL, uadjoint->getState(iex), 1); 
-        }
-
-        // printf("%d: Init_diff layerid %d using %1.14e, adj %1.14e grad[0] %1.14e\n", app->myid, openlayer->getIndex(), openlayer->getWeights()[3], ubase->userVector->state[1][1], openlayer->getWeightsBar()[0] );
-
-        /* Derivative of Tikhonov Regularization */
-        openlayer->evalTikh_diff(1.0);
+    /* Reset the gradient */
+    openlayer->resetBar();
+
+    /* Apply opening layer backwards for all examples */
+    for (int iex = 0; iex < nbatch; iex++) {
+      openlayer->setExample(data->getExample(iex));
+      /* TODO: Don't feed applyBWD with NULL! */
+      openlayer->applyBWD(NULL, uadjoint->getState(iex), 1);
     }
 
-    return 0;
-}
\ No newline at end of file
+    // printf("%d: Init_diff layerid %d using %1.14e, adj %1.14e grad[0]
+    // %1.14e\n", app->myid, openlayer->getIndex(), openlayer->getWeights()[3],
+    // ubase->userVector->state[1][1], openlayer->getWeightsBar()[0] );
+
+    /* Derivative of Tikhonov Regularization */
+    openlayer->evalTikh_diff(1.0);
+  }
+
+  return 0;
+}
diff --git a/src/config.cpp b/src/config.cpp
index b610240..76c6830 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -1,571 +1,469 @@
+// Copyright
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Underlying paper:
+//
+// Layer-Parallel Training of Deep Residual Neural Networks
+// S. Guenther, L. Ruthotto, J.B. Schroder, E.C. Czr, and N.R. Gauger
+//
+// Download: https://arxiv.org/pdf/1812.04352.pdf
+//
 #include "config.hpp"
 
+#include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <cstdio>
-
-
-
-Config::Config()
-{
-
-    /* --- Set DEFAULT parameters of the config file options --- */ 
-
-    /* Data st */
-    datafolder                = "NONE";
-    ftrain_ex                 = "NONE";
-    fval_ex                   = "NONE";
-    ftrain_labels             = "NONE";
-    fval_labels               = "NONE";
-    weightsopenfile           = "NONE";
-    weightsclassificationfile = "NONE";
-
-    ntraining    = 5000;
-    nvalidation  = 200;
-    nfeatures    = 2;
-    nclasses     = 5;
-
-    /* Neural Network */
-    nchannels          = 8;
-    nlayers            = 32;
-    T                  = 10.0;
-    activation         = RELU;
-    network_type       = DENSE;
-    openlayer_type     = 0;
-    weights_open_init  = 0.001;
-    weights_init       = 0.0;
-    weights_class_init = 0.001;
-
-    /* XBraid */
-    braid_cfactor0     = 4;
-    braid_cfactor      = 4;
-    braid_maxlevels    = 10;
-    braid_mincoarse    = 10;
-    braid_maxiter      = 3;
-    braid_abstol       = 1e-10;
-    braid_abstoladj    = 1e-06;
-    braid_printlevel   = 1;
-    braid_accesslevel  = 0;
-    braid_setskip      = 0;
-    braid_fmg          = 0;
-    braid_nrelax0      = 1;
-    braid_nrelax       = 1;
-
-    /* Optimization */
-    batch_type         = DETERMINISTIC;
-    nbatch             = ntraining;      // full batch 
-    gamma_tik          = 1e-07;
-    gamma_ddt          = 1e-07;
-    gamma_class        = 1e-07;
-    stepsize_type      = BACKTRACKINGLS;
-    stepsize_init      = 1.0;
-    maxoptimiter       = 500;
-    gtol               = 1e-08;
-    ls_maxiter         = 20;
-    ls_factor          = 0.5;
-    hessianapprox_type = LBFGS;
-    lbfgs_stages       = 20;
-    validationlevel    = 1;
 
+Config::Config() {
+  /* --- Set DEFAULT parameters of the config file options --- */
+
+  /* Data st */
+  datafolder = "NONE";
+  ftrain_ex = "NONE";
+  fval_ex = "NONE";
+  ftrain_labels = "NONE";
+  fval_labels = "NONE";
+  weightsopenfile = "NONE";
+  weightsclassificationfile = "NONE";
+
+  ntraining = 5000;
+  nvalidation = 200;
+  nfeatures = 2;
+  nclasses = 5;
+
+  /* Neural Network */
+  nchannels = 8;
+  nlayers = 32;
+  T = 10.0;
+  activation = RELU;
+  network_type = DENSE;
+  openlayer_type = 0;
+  weights_open_init = 0.001;
+  weights_init = 0.0;
+  weights_class_init = 0.001;
+
+  /* XBraid */
+  braid_cfactor0 = 4;
+  braid_cfactor = 4;
+  braid_maxlevels = 10;
+  braid_mincoarse = 10;
+  braid_maxiter = 3;
+  braid_abstol = 1e-10;
+  braid_abstoladj = 1e-06;
+  braid_printlevel = 1;
+  braid_accesslevel = 0;
+  braid_setskip = 0;
+  braid_fmg = 0;
+  braid_nrelax0 = 1;
+  braid_nrelax = 1;
+
+  /* Optimization */
+  batch_type = DETERMINISTIC;
+  nbatch = ntraining;  // full batch
+  gamma_tik = 1e-07;
+  gamma_ddt = 1e-07;
+  gamma_class = 1e-07;
+  stepsize_type = BACKTRACKINGLS;
+  stepsize_init = 1.0;
+  maxoptimiter = 500;
+  gtol = 1e-08;
+  ls_maxiter = 20;
+  ls_factor = 0.5;
+  hessianapprox_type = LBFGS;
+  lbfgs_stages = 20;
+  validationlevel = 1;
 }
 
-Config::~Config(){}
-
-
-int Config::readFromFile(char* configfilename)
-{
-    /* Parse the config file */
-    config_option* co;
-    if ((co = parsefile(configfilename)) == NULL) {
-        perror("parsefile()");
-        return -1;
+Config::~Config() {}
+
+int Config::readFromFile(char *configfilename) {
+  /* Parse the config file */
+  config_option *co;
+  if ((co = parsefile(configfilename)) == NULL) {
+    perror("parsefile()");
+    return -1;
+  }
+
+  /* Set the config options */
+  while (1) {
+    if (strcmp(co->key, "datafolder") == 0) {
+      datafolder = co->value;
+    } else if (strcmp(co->key, "ftrain_ex") == 0) {
+      ftrain_ex = co->value;
+    } else if (strcmp(co->key, "ftrain_labels") == 0) {
+      ftrain_labels = co->value;
+    } else if (strcmp(co->key, "fval_ex") == 0) {
+      fval_ex = co->value;
+    } else if (strcmp(co->key, "fval_labels") == 0) {
+      fval_labels = co->value;
+    } else if (strcmp(co->key, "ntraining") == 0) {
+      ntraining = atoi(co->value);
+    } else if (strcmp(co->key, "nvalidation") == 0) {
+      nvalidation = atoi(co->value);
+    } else if (strcmp(co->key, "nfeatures") == 0) {
+      nfeatures = atoi(co->value);
+    } else if (strcmp(co->key, "nchannels") == 0) {
+      nchannels = atoi(co->value);
+    } else if (strcmp(co->key, "nclasses") == 0) {
+      nclasses = atoi(co->value);
     }
-
-   /* Set the config options */
-    while(1) {
-
-        if ( strcmp(co->key, "datafolder") == 0 )
-        {
-            datafolder = co->value;
-        }
-        else if ( strcmp(co->key, "ftrain_ex") == 0 )
-        {
-            ftrain_ex = co->value;
-        }
-        else if ( strcmp(co->key, "ftrain_labels") == 0 )
-        {
-            ftrain_labels = co->value;
-        }
-        else if ( strcmp(co->key, "fval_ex") == 0 )
-        {
-            fval_ex = co->value;
-        }
-        else if ( strcmp(co->key, "fval_labels") == 0 )
-        {
-            fval_labels = co->value;
-        }
-        else if ( strcmp(co->key, "ntraining") == 0 )
-        {
-            ntraining = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "nvalidation") == 0 )
-        {
-            nvalidation = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "nfeatures") == 0 )
-        {
-            nfeatures = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "nchannels") == 0 )
-        {
-            nchannels = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "nclasses") == 0 )
-        {
-            nclasses = atoi(co->value);
-        }
-        if ( strcmp(co->key, "weightsopenfile") == 0 )
-        {
-            weightsopenfile = co->value;
-        }
-        if ( strcmp(co->key, "weightsclassificationfile") == 0 )
-        {
-            weightsclassificationfile = co->value;
-        }
-        else if ( strcmp(co->key, "nlayers") == 0 )
-        {
-            nlayers = atoi(co->value);
-
-            if (nlayers < 3)
-            {
-                printf("\n\n ERROR: nlayers=%d too small! Choose minimum three layers (openlayer, one hidden layer, classification layer)!\n\n", nlayers);
-                return -1;
-            }
-        }
-        else if ( strcmp(co->key, "activation") == 0 )
-        {
-            if (strcmp(co->value, "tanh") == 0 )
-            {
-                activation = TANH;
-            }
-            else if ( strcmp(co->value, "ReLu") == 0 )
-            {
-                activation = RELU;
-            }
-            else if (strcmp(co->value, "SmoothReLu") == 0 )
-            {
-                activation = SMRELU;
-            }
-            else
-            {
-                printf("Invalid activation function!");
-                return -1;
-            }
-        }
-        else if ( strcmp(co->key, "network_type") == 0 )
-        {
-            if (strcmp(co->value, "dense") == 0 )
-            {
-                network_type  = DENSE;
-            }
-            else if (strcmp(co->value, "convolutional") == 0 )
-            {
-                network_type  = CONVOLUTIONAL;
-            }
-            else
-            {
-                printf("Invalid network type !");
-                return -1;
-            }
-        }
-        else if ( strcmp(co->key, "T") == 0 )
-        {
-            T = atof(co->value);
-        }
-        else if ( strcmp(co->key, "braid_cfactor") == 0 )
-        {
-           braid_cfactor = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_cfactor0") == 0 )
-        {
-           braid_cfactor0 = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_maxlevels") == 0 )
-        {
-           braid_maxlevels = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_mincoarse") == 0 )
-        {
-           braid_mincoarse = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_maxiter") == 0 )
-        {
-           braid_maxiter = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_abstol") == 0 )
-        {
-           braid_abstol = atof(co->value);
-        }
-        else if ( strcmp(co->key, "braid_adjtol") == 0 )
-        {
-           braid_abstoladj = atof(co->value);
-        }
-        else if ( strcmp(co->key, "braid_printlevel") == 0 )
-        {
-           braid_printlevel = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_accesslevel") == 0 )
-        {
-           braid_accesslevel = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_setskip") == 0 )
-        {
-           braid_setskip = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_fmg") == 0 )
-        {
-           braid_fmg = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_nrelax") == 0 )
-        {
-           braid_nrelax = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "braid_nrelax0") == 0 )
-        {
-           braid_nrelax0 = atoi(co->value);
-        }
-        else if (strcmp(co->key, "batch_type") == 0 )
-        {
-            if ( strcmp(co->value, "deterministic") == 0 )
-            {
-                batch_type = DETERMINISTIC;
-            }
-            else if (strcmp(co->value, "stochastic") == 0 )
-            {
-                batch_type = STOCHASTIC;
-            }
-            else
-            {
-                printf("Invalid optimization type! Should be either 'deterministic' or 'stochastic'!");
-                return -1;
-            }
-        }
-        else if ( strcmp(co->key, "nbatch") == 0 )
-        {
-           nbatch = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "gamma_tik") == 0 )
-        {
-            gamma_tik = atof(co->value);
-        }
-        else if ( strcmp(co->key, "gamma_ddt") == 0 )
-        {
-            gamma_ddt = atof(co->value);
-        }
-        else if ( strcmp(co->key, "gamma_class") == 0 )
-        {
-            gamma_class= atof(co->value);
-        }
-        else if (strcmp(co->key, "stepsize_type") == 0 )
-        {
-            if ( strcmp(co->value, "fixed") == 0 )
-            {
-                stepsize_type = FIXED;
-            }
-            else if (strcmp(co->value, "backtrackingLS") == 0 )
-            {
-                stepsize_type = BACKTRACKINGLS;
-            }
-            else if (strcmp(co->value, "oneoverk") == 0 )
-            {
-                stepsize_type = ONEOVERK;
-            }
-            else
-            {
-                printf("Invalid stepsize type! Should be either 'fixed' or 'backtrackingLS' or 'oneoverk' !");
-                return -1;
-            }
-        }
-        else if ( strcmp(co->key, "stepsize") == 0 )
-        {
-            stepsize_init = atof(co->value);
-        }
-        else if ( strcmp(co->key, "optim_maxiter") == 0 )
-        {
-           maxoptimiter = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "gtol") == 0 )
-        {
-           gtol = atof(co->value);
-        }
-        else if ( strcmp(co->key, "ls_maxiter") == 0 )
-        {
-           ls_maxiter = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "ls_factor") == 0 )
-        {
-           ls_factor = atof(co->value);
-        }
-        else if ( strcmp(co->key, "weights_open_init") == 0 )
-        {
-           weights_open_init = atof(co->value);
-        }
-        else if ( strcmp(co->key, "type_openlayer") == 0 )
-        {
-            if (strcmp(co->value, "replicate") == 0 )
-            {
-                openlayer_type = 0; 
-            }
-            else if ( strcmp(co->value, "activate") == 0 )
-            {
-                openlayer_type = 1; 
-            }
-            else
-            {
-                printf("Invalid type_openlayer!\n");
-                MPI_Finalize();
-                return(0);
-            }
-        }
-        else if ( strcmp(co->key, "weights_init") == 0 )
-        {
-           weights_init = atof(co->value);
-        }
-        else if ( strcmp(co->key, "weights_class_init") == 0 )
-        {
-           weights_class_init = atof(co->value);
-        }
-        else if ( strcmp(co->key, "hessian_approx") == 0 )
-        {
-            if ( strcmp(co->value, "BFGS") == 0 )
-            {
-                hessianapprox_type = BFGS_SERIAL;
-            }
-            else if (strcmp(co->value, "L-BFGS") == 0 )
-            {
-                hessianapprox_type = LBFGS;
-            }
-            else if (strcmp(co->value, "Identity") == 0 )
-            {
-                hessianapprox_type = IDENTITY;
-            }
-            else
-            {
-                printf("Invalid Hessian approximation!");
-                return -1;
-            }
-        }
-        else if ( strcmp(co->key, "lbfgs_stages") == 0 )
-        {
-           lbfgs_stages = atoi(co->value);
-        }
-        else if ( strcmp(co->key, "validationlevel") == 0 )
-        {
-           validationlevel = atoi(co->value);
-        }
-        if (co->prev != NULL) {
-            co = co->prev;
-        } else {
-            break;
-        }
+    if (strcmp(co->key, "weightsopenfile") == 0) {
+      weightsopenfile = co->value;
     }
-
-
-    /* Sanity check */
-    if (nfeatures > nchannels ||
-        nclasses  > nchannels)
-    {
-        printf("ERROR! Choose a wider netword!\n");
-        printf(" -- nFeatures = %d\n", nfeatures);
-        printf(" -- nChannels = %d\n", nchannels);
-        printf(" -- nClasses = %d\n",  nclasses);
-        exit(1);
+    if (strcmp(co->key, "weightsclassificationfile") == 0) {
+      weightsclassificationfile = co->value;
+    } else if (strcmp(co->key, "nlayers") == 0) {
+      nlayers = atoi(co->value);
+
+      if (nlayers < 3) {
+        printf(
+            "\n\n ERROR: nlayers=%d too small! Choose minimum three layers "
+            "(openlayer, one hidden layer, classification layer)!\n\n",
+            nlayers);
+        return -1;
+      }
+    } else if (strcmp(co->key, "activation") == 0) {
+      if (strcmp(co->value, "tanh") == 0) {
+        activation = TANH;
+      } else if (strcmp(co->value, "ReLu") == 0) {
+        activation = RELU;
+      } else if (strcmp(co->value, "SmoothReLu") == 0) {
+        activation = SMRELU;
+      } else {
+        printf("Invalid activation function!");
+        return -1;
+      }
+    } else if (strcmp(co->key, "network_type") == 0) {
+      if (strcmp(co->value, "dense") == 0) {
+        network_type = DENSE;
+      } else if (strcmp(co->value, "convolutional") == 0) {
+        network_type = CONVOLUTIONAL;
+      } else {
+        printf("Invalid network type !");
+        return -1;
+      }
+    } else if (strcmp(co->key, "T") == 0) {
+      T = atof(co->value);
+    } else if (strcmp(co->key, "braid_cfactor") == 0) {
+      braid_cfactor = atoi(co->value);
+    } else if (strcmp(co->key, "braid_cfactor0") == 0) {
+      braid_cfactor0 = atoi(co->value);
+    } else if (strcmp(co->key, "braid_maxlevels") == 0) {
+      braid_maxlevels = atoi(co->value);
+    } else if (strcmp(co->key, "braid_mincoarse") == 0) {
+      braid_mincoarse = atoi(co->value);
+    } else if (strcmp(co->key, "braid_maxiter") == 0) {
+      braid_maxiter = atoi(co->value);
+    } else if (strcmp(co->key, "braid_abstol") == 0) {
+      braid_abstol = atof(co->value);
+    } else if (strcmp(co->key, "braid_adjtol") == 0) {
+      braid_abstoladj = atof(co->value);
+    } else if (strcmp(co->key, "braid_printlevel") == 0) {
+      braid_printlevel = atoi(co->value);
+    } else if (strcmp(co->key, "braid_accesslevel") == 0) {
+      braid_accesslevel = atoi(co->value);
+    } else if (strcmp(co->key, "braid_setskip") == 0) {
+      braid_setskip = atoi(co->value);
+    } else if (strcmp(co->key, "braid_fmg") == 0) {
+      braid_fmg = atoi(co->value);
+    } else if (strcmp(co->key, "braid_nrelax") == 0) {
+      braid_nrelax = atoi(co->value);
+    } else if (strcmp(co->key, "braid_nrelax0") == 0) {
+      braid_nrelax0 = atoi(co->value);
+    } else if (strcmp(co->key, "batch_type") == 0) {
+      if (strcmp(co->value, "deterministic") == 0) {
+        batch_type = DETERMINISTIC;
+      } else if (strcmp(co->value, "stochastic") == 0) {
+        batch_type = STOCHASTIC;
+      } else {
+        printf(
+            "Invalid optimization type! Should be either 'deterministic' or "
+            "'stochastic'!");
+        return -1;
+      }
+    } else if (strcmp(co->key, "nbatch") == 0) {
+      nbatch = atoi(co->value);
+    } else if (strcmp(co->key, "gamma_tik") == 0) {
+      gamma_tik = atof(co->value);
+    } else if (strcmp(co->key, "gamma_ddt") == 0) {
+      gamma_ddt = atof(co->value);
+    } else if (strcmp(co->key, "gamma_class") == 0) {
+      gamma_class = atof(co->value);
+    } else if (strcmp(co->key, "stepsize_type") == 0) {
+      if (strcmp(co->value, "fixed") == 0) {
+        stepsize_type = FIXED;
+      } else if (strcmp(co->value, "backtrackingLS") == 0) {
+        stepsize_type = BACKTRACKINGLS;
+      } else if (strcmp(co->value, "oneoverk") == 0) {
+        stepsize_type = ONEOVERK;
+      } else {
+        printf(
+            "Invalid stepsize type! Should be either 'fixed' or "
+            "'backtrackingLS' or 'oneoverk' !");
+        return -1;
+      }
+    } else if (strcmp(co->key, "stepsize") == 0) {
+      stepsize_init = atof(co->value);
+    } else if (strcmp(co->key, "optim_maxiter") == 0) {
+      maxoptimiter = atoi(co->value);
+    } else if (strcmp(co->key, "gtol") == 0) {
+      gtol = atof(co->value);
+    } else if (strcmp(co->key, "ls_maxiter") == 0) {
+      ls_maxiter = atoi(co->value);
+    } else if (strcmp(co->key, "ls_factor") == 0) {
+      ls_factor = atof(co->value);
+    } else if (strcmp(co->key, "weights_open_init") == 0) {
+      weights_open_init = atof(co->value);
+    } else if (strcmp(co->key, "type_openlayer") == 0) {
+      if (strcmp(co->value, "replicate") == 0) {
+        openlayer_type = 0;
+      } else if (strcmp(co->value, "activate") == 0) {
+        openlayer_type = 1;
+      } else {
+        printf("Invalid type_openlayer!\n");
+        MPI_Finalize();
+        return (0);
+      }
+    } else if (strcmp(co->key, "weights_init") == 0) {
+      weights_init = atof(co->value);
+    } else if (strcmp(co->key, "weights_class_init") == 0) {
+      weights_class_init = atof(co->value);
+    } else if (strcmp(co->key, "hessian_approx") == 0) {
+      if (strcmp(co->value, "BFGS") == 0) {
+        hessianapprox_type = BFGS_SERIAL;
+      } else if (strcmp(co->value, "L-BFGS") == 0) {
+        hessianapprox_type = LBFGS;
+      } else if (strcmp(co->value, "Identity") == 0) {
+        hessianapprox_type = IDENTITY;
+      } else {
+        printf("Invalid Hessian approximation!");
+        return -1;
+      }
+    } else if (strcmp(co->key, "lbfgs_stages") == 0) {
+      lbfgs_stages = atoi(co->value);
+    } else if (strcmp(co->key, "validationlevel") == 0) {
+      validationlevel = atoi(co->value);
     }
-
-    return 0;
+    if (co->prev != NULL) {
+      co = co->prev;
+    } else {
+      break;
+    }
+  }
+
+  /* Sanity check */
+  if (nfeatures > nchannels || nclasses > nchannels) {
+    printf("ERROR! Choose a wider netword!\n");
+    printf(" -- nFeatures = %d\n", nfeatures);
+    printf(" -- nChannels = %d\n", nchannels);
+    printf(" -- nClasses = %d\n", nclasses);
+    exit(1);
+  }
+
+  return 0;
 }
 
-
-
-Config::config_option* Config::parsefile(char* path) {
-    FILE* fp;
-    
-    if ((fp = fopen(path, "r+")) == NULL) {
-        perror("fopen()");
-        return NULL;
-    }
-    
-    config_option* last_co_addr = NULL;
-    
-    while(1) {
-        config_option* co = NULL;
-        if ((co = (config_option*) calloc(1, sizeof(config_option))) == NULL)
-            continue;
-        memset(co, 0, sizeof(struct config_option));
-        co->prev = last_co_addr;
-        
-        if (fscanf(fp, "%s = %s", &co->key[0], &co->value[0]) != 2) {
-            if (feof(fp)) {
-                break;
-            }
-            if (co->key[0] == '#') {
-                while (fgetc(fp) != '\n') {
-                    // Do nothing (to move the cursor to the end of the line).
-                }
-                free(co);
-                continue;
-            }
-            perror("fscanf()");
-            free(co);
-            continue;
-        }
-        //printf("Key: %s\nValue: %s\n", co->key, co->value);
-        last_co_addr = co;
+Config::config_option *Config::parsefile(char *path) {
+  FILE *fp;
+
+  if ((fp = fopen(path, "r+")) == NULL) {
+    perror("fopen()");
+    return NULL;
+  }
+
+  config_option *last_co_addr = NULL;
+
+  while (1) {
+    config_option *co = NULL;
+    if ((co = (config_option *)calloc(1, sizeof(config_option))) == NULL)
+      continue;
+    memset(co, 0, sizeof(struct config_option));
+    co->prev = last_co_addr;
+
+    if (fscanf(fp, "%s = %s", &co->key[0], &co->value[0]) != 2) {
+      if (feof(fp)) {
+        break;
+      }
+      if (co->key[0] == '#') {
+        while (fgetc(fp) != '\n') {
+          // Do nothing (to move the cursor to the end of the line).
+        }
+        free(co);
+        continue;
+      }
+      perror("fscanf()");
+      free(co);
+      continue;
     }
-    return last_co_addr;
+    // printf("Key: %s\nValue: %s\n", co->key, co->value);
+    last_co_addr = co;
+  }
+  return last_co_addr;
 }
 
-
-
-int Config::writeToFile(FILE* outfile)
-{
-   const char *activname, *networktypename, *hessetypename, *optimtypename, *stepsizetypename;
-
-   /* Get names of some int options */
-   switch (activation)
-   {
-      case TANH:
-         activname = "tanh";
-         break;
-      case RELU:
-         activname = "ReLu";
-         break;
-      case SMRELU:
-         activname = "SmoothReLU";
-         break;
-      default:
-         activname = "invalid!";
-   }
-   switch (network_type)
-   {
-      case DENSE:
-         networktypename = "dense";
-         break;
-      case CONVOLUTIONAL:
-         networktypename = "convolutional";
-         break;
-      default:
-         networktypename = "invalid!";
-   }
-   switch (hessianapprox_type)
-   {
-      case BFGS_SERIAL:
-         hessetypename = "BFGS";
-         break;
-      case LBFGS:
-         hessetypename = "L-BFGS";
-         break;
-      case IDENTITY:
-         hessetypename = "Identity";
-         break;
-      default:
-         hessetypename = "invalid!";
-   }
-   switch (batch_type)
-   {
-      case DETERMINISTIC:
-         optimtypename = "deterministic";
-         break;
-      case STOCHASTIC:
-         optimtypename = "stochastic";
-         break;
-      default:
-         optimtypename = "invalid!";
-   }
-   switch (stepsize_type)
-   {
-      case FIXED:
-         stepsizetypename = "fixed";
-         break;
-      case BACKTRACKINGLS:
-         stepsizetypename = "backtracking line-search";
-         break;
-      case ONEOVERK:
-         stepsizetypename = "1/k";
-         break;
-      default:
-         stepsizetypename = "invalid!";
-   }
-
-
-   /* print config option */ 
-   fprintf(outfile, "# Problem setup: datafolder           %s \n",   datafolder);
-   fprintf(outfile, "#                training examples    %s \n",   ftrain_ex);
-   fprintf(outfile, "#                training labels      %s \n",   ftrain_labels);
-   fprintf(outfile, "#                validation examples  %s \n",   fval_ex);
-   fprintf(outfile, "#                validation labels    %s \n",   fval_labels);
-   fprintf(outfile, "#                ntraining            %d \n",   ntraining);
-   fprintf(outfile, "#                nvalidation          %d \n",   nvalidation);
-   fprintf(outfile, "#                nfeatures            %d \n",   nfeatures);
-   fprintf(outfile, "#                nclasses             %d \n",   nclasses);
-   fprintf(outfile, "#                nchannels            %d \n",   nchannels);
-   fprintf(outfile, "#                nlayers              %d \n",   nlayers);
-   fprintf(outfile, "#                T                    %f \n",   T);
-   fprintf(outfile, "#                network type         %s \n",   networktypename);
-   fprintf(outfile, "#                Activation           %s \n",   activname);
-   fprintf(outfile, "#                openlayer type       %d \n",   openlayer_type);
-   fprintf(outfile, "# XBraid setup:  max levels           %d \n",   braid_maxlevels);
-   fprintf(outfile, "#                min coarse           %d \n",   braid_mincoarse);
-   fprintf(outfile, "#                coasening            %d \n",   braid_cfactor);
-   fprintf(outfile, "#                coasening (level 0)  %d \n",   braid_cfactor0);
-   fprintf(outfile, "#                max. braid iter      %d \n",   braid_maxiter);
-   fprintf(outfile, "#                abs. tol             %1.e \n", braid_abstol);
-   fprintf(outfile, "#                abs. toladj          %1.e \n", braid_abstoladj);
-   fprintf(outfile, "#                print level          %d \n",   braid_printlevel);
-   fprintf(outfile, "#                access level         %d \n",   braid_accesslevel);
-   fprintf(outfile, "#                skip?                %d \n",   braid_setskip);
-   fprintf(outfile, "#                fmg?                 %d \n",   braid_fmg);
-   fprintf(outfile, "#                nrelax (level 0)     %d \n",   braid_nrelax0);
-   fprintf(outfile, "#                nrelax               %d \n",   braid_nrelax);
-   fprintf(outfile, "# Optimization:  optimization type    %s \n",   optimtypename);
-   fprintf(outfile, "#                nbatch               %d \n",   nbatch);
-   fprintf(outfile, "#                gamma_tik            %1.e \n", gamma_tik);
-   fprintf(outfile, "#                gamma_ddt            %1.e \n", gamma_ddt);
-   fprintf(outfile, "#                gamma_class          %1.e \n", gamma_class);
-   fprintf(outfile, "#                stepsize type        %s \n",   stepsizetypename);
-   fprintf(outfile, "#                stepsize             %f \n",   stepsize_init);
-   fprintf(outfile, "#                max. optim iter      %d \n",   maxoptimiter);
-   fprintf(outfile, "#                gtol                 %1.e \n", gtol);
-   fprintf(outfile, "#                max. ls iter         %d \n",   ls_maxiter);
-   fprintf(outfile, "#                ls factor            %f \n",   ls_factor);
-   fprintf(outfile, "#                weights_init         %f \n",   weights_init);
-   fprintf(outfile, "#                weights_open_init    %f \n",   weights_open_init);
-   fprintf(outfile, "#                weights_class_init   %f \n",   weights_class_init) ;
-   fprintf(outfile, "#                hessianapprox_type   %s \n",   hessetypename);
-   fprintf(outfile, "#                lbfgs_stages         %d \n",   lbfgs_stages);
-   fprintf(outfile, "#                validationlevel      %d \n",   validationlevel);
-   fprintf(outfile, "\n");
-
-   return 0;
+int Config::writeToFile(FILE *outfile) {
+  const char *activname, *networktypename, *hessetypename, *optimtypename,
+      *stepsizetypename;
+
+  /* Get names of some int options */
+  switch (activation) {
+    case TANH:
+      activname = "tanh";
+      break;
+    case RELU:
+      activname = "ReLu";
+      break;
+    case SMRELU:
+      activname = "SmoothReLU";
+      break;
+    default:
+      activname = "invalid!";
+  }
+  switch (network_type) {
+    case DENSE:
+      networktypename = "dense";
+      break;
+    case CONVOLUTIONAL:
+      networktypename = "convolutional";
+      break;
+    default:
+      networktypename = "invalid!";
+  }
+  switch (hessianapprox_type) {
+    case BFGS_SERIAL:
+      hessetypename = "BFGS";
+      break;
+    case LBFGS:
+      hessetypename = "L-BFGS";
+      break;
+    case IDENTITY:
+      hessetypename = "Identity";
+      break;
+    default:
+      hessetypename = "invalid!";
+  }
+  switch (batch_type) {
+    case DETERMINISTIC:
+      optimtypename = "deterministic";
+      break;
+    case STOCHASTIC:
+      optimtypename = "stochastic";
+      break;
+    default:
+      optimtypename = "invalid!";
+  }
+  switch (stepsize_type) {
+    case FIXED:
+      stepsizetypename = "fixed";
+      break;
+    case BACKTRACKINGLS:
+      stepsizetypename = "backtracking line-search";
+      break;
+    case ONEOVERK:
+      stepsizetypename = "1/k";
+      break;
+    default:
+      stepsizetypename = "invalid!";
+  }
+
+  /* print config option */
+  fprintf(outfile, "# Problem setup: datafolder           %s \n", datafolder);
+  fprintf(outfile, "#                training examples    %s \n", ftrain_ex);
+  fprintf(outfile, "#                training labels      %s \n",
+          ftrain_labels);
+  fprintf(outfile, "#                validation examples  %s \n", fval_ex);
+  fprintf(outfile, "#                validation labels    %s \n", fval_labels);
+  fprintf(outfile, "#                ntraining            %d \n", ntraining);
+  fprintf(outfile, "#                nvalidation          %d \n", nvalidation);
+  fprintf(outfile, "#                nfeatures            %d \n", nfeatures);
+  fprintf(outfile, "#                nclasses             %d \n", nclasses);
+  fprintf(outfile, "#                nchannels            %d \n", nchannels);
+  fprintf(outfile, "#                nlayers              %d \n", nlayers);
+  fprintf(outfile, "#                T                    %f \n", T);
+  fprintf(outfile, "#                network type         %s \n",
+          networktypename);
+  fprintf(outfile, "#                Activation           %s \n", activname);
+  fprintf(outfile, "#                openlayer type       %d \n",
+          openlayer_type);
+  fprintf(outfile, "# XBraid setup:  max levels           %d \n",
+          braid_maxlevels);
+  fprintf(outfile, "#                min coarse           %d \n",
+          braid_mincoarse);
+  fprintf(outfile, "#                coasening            %d \n",
+          braid_cfactor);
+  fprintf(outfile, "#                coasening (level 0)  %d \n",
+          braid_cfactor0);
+  fprintf(outfile, "#                max. braid iter      %d \n",
+          braid_maxiter);
+  fprintf(outfile, "#                abs. tol             %1.e \n",
+          braid_abstol);
+  fprintf(outfile, "#                abs. toladj          %1.e \n",
+          braid_abstoladj);
+  fprintf(outfile, "#                print level          %d \n",
+          braid_printlevel);
+  fprintf(outfile, "#                access level         %d \n",
+          braid_accesslevel);
+  fprintf(outfile, "#                skip?                %d \n",
+          braid_setskip);
+  fprintf(outfile, "#                fmg?                 %d \n", braid_fmg);
+  fprintf(outfile, "#                nrelax (level 0)     %d \n",
+          braid_nrelax0);
+  fprintf(outfile, "#                nrelax               %d \n", braid_nrelax);
+  fprintf(outfile, "# Optimization:  optimization type    %s \n",
+          optimtypename);
+  fprintf(outfile, "#                nbatch               %d \n", nbatch);
+  fprintf(outfile, "#                gamma_tik            %1.e \n", gamma_tik);
+  fprintf(outfile, "#                gamma_ddt            %1.e \n", gamma_ddt);
+  fprintf(outfile, "#                gamma_class          %1.e \n",
+          gamma_class);
+  fprintf(outfile, "#                stepsize type        %s \n",
+          stepsizetypename);
+  fprintf(outfile, "#                stepsize             %f \n",
+          stepsize_init);
+  fprintf(outfile, "#                max. optim iter      %d \n", maxoptimiter);
+  fprintf(outfile, "#                gtol                 %1.e \n", gtol);
+  fprintf(outfile, "#                max. ls iter         %d \n", ls_maxiter);
+  fprintf(outfile, "#                ls factor            %f \n", ls_factor);
+  fprintf(outfile, "#                weights_init         %f \n", weights_init);
+  fprintf(outfile, "#                weights_open_init    %f \n",
+          weights_open_init);
+  fprintf(outfile, "#                weights_class_init   %f \n",
+          weights_class_init);
+  fprintf(outfile, "#                hessianapprox_type   %s \n",
+          hessetypename);
+  fprintf(outfile, "#                lbfgs_stages         %d \n", lbfgs_stages);
+  fprintf(outfile, "#                validationlevel      %d \n",
+          validationlevel);
+  fprintf(outfile, "\n");
+
+  return 0;
 }
 
-
-
-MyReal Config::getStepsize(int optimiter)
-{
-    MyReal stepsize = 0.0;
-
-    switch (stepsize_type)
-    {
-        case FIXED:
-            stepsize = stepsize_init;
-            break;
-        case BACKTRACKINGLS:
-            stepsize = stepsize_init;   
-            break;
-        case ONEOVERK:
-            stepsize = 1.0 / (MyReal) (optimiter+1); // add one because optimiter starts with 0
-    }
-
-    return stepsize;
+MyReal Config::getStepsize(int optimiter) {
+  MyReal stepsize = 0.0;
+
+  switch (stepsize_type) {
+    case FIXED:
+      stepsize = stepsize_init;
+      break;
+    case BACKTRACKINGLS:
+      stepsize = stepsize_init;
+      break;
+    case ONEOVERK:
+      stepsize = 1.0 / (MyReal)(optimiter +
+                                1);  // add one because optimiter starts with 0
+  }
+
+  return stepsize;
 }
diff --git a/src/dataset.cpp b/src/dataset.cpp
index ac0a617..08adcf7 100644
--- a/src/dataset.cpp
+++ b/src/dataset.cpp
@@ -1,209 +1,197 @@
+// Copyright
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Underlying paper:
+//
+// Layer-Parallel Training of Deep Residual Neural Networks
+// S. Guenther, L. Ruthotto, J.B. Schroder, E.C. Czr, and N.R. Gauger
+//
+// Download: https://arxiv.org/pdf/1812.04352.pdf
+//
 #include "dataset.hpp"
 
-DataSet::DataSet()
-{
-   nelements = 0;
-   nfeatures = 0;
-   nlabels   = 0;
-   nbatch    = 0;
-   MPIsize   = 0;
-   MPIrank   = 0;
-   navail    = 0;
-
-   examples = NULL;
-   labels   = NULL;
-   batchIDs = NULL;
-   availIDs = NULL;
+DataSet::DataSet() {
+  nelements = 0;
+  nfeatures = 0;
+  nlabels = 0;
+  nbatch = 0;
+  MPIsize = 0;
+  MPIrank = 0;
+  navail = 0;
+
+  examples = NULL;
+  labels = NULL;
+  batchIDs = NULL;
+  availIDs = NULL;
 }
 
-void DataSet::initialize(int      nElements, 
-                         int      nFeatures, 
-                         int      nLabels,
-                         int      nBatch,
-                         MPI_Comm comm)
-{
-
-
-   nelements = nElements;
-   nfeatures = nFeatures;
-   nlabels   = nLabels;
-   nbatch    = nBatch;
-   navail    = nelements;
-
-   MPI_Comm_rank(comm, &MPIrank);
-   MPI_Comm_size(comm, &MPIsize);
-
-   /* Sanity check */
-   if (nbatch > nelements) nbatch = nelements;
-
-   /* Allocate feature vectors on first processor */
-   if (MPIrank == 0)
-   {
-      examples = new MyReal*[nelements];
-      for (int ielem = 0; ielem < nelements; ielem++)
-      {
-         examples[ielem] = new MyReal[nfeatures];
-      }
-   }
-   /* Allocate label vectors on last processor */
-   if (MPIrank == MPIsize - 1)
-   {
-      labels = new MyReal*[nelements];
-      for (int ielem = 0; ielem < nelements; ielem++)
-      {
-         labels[ielem] = new MyReal[nlabels];
-      }
-   }
-
-   /* Allocate and initialize availIDs and batchIDs on first and last processor */
-   if (MPIrank == 0 || MPIrank == MPIsize - 1)
-   {
-      availIDs = new int[nelements];    // all elements 
-      batchIDs = new int[nbatch];       
-
-      /* Initialize available ID with identity */
-      for (int idx = 0; idx < nelements; idx++)
-      {
-         availIDs[idx] = idx;
-      }
-
-      /* Initialize the batch with identity */
-      for (int idx = 0; idx < nbatch; idx++)
-      {
-         batchIDs[idx] = idx;
-      }
-   }
+void DataSet::initialize(int nElements, int nFeatures, int nLabels, int nBatch,
+                         MPI_Comm comm) {
+  nelements = nElements;
+  nfeatures = nFeatures;
+  nlabels = nLabels;
+  nbatch = nBatch;
+  navail = nelements;
+
+  MPI_Comm_rank(comm, &MPIrank);
+  MPI_Comm_size(comm, &MPIsize);
+
+  /* Sanity check */
+  if (nbatch > nelements) nbatch = nelements;
+
+  /* Allocate feature vectors on first processor */
+  if (MPIrank == 0) {
+    examples = new MyReal *[nelements];
+    for (int ielem = 0; ielem < nelements; ielem++) {
+      examples[ielem] = new MyReal[nfeatures];
+    }
+  }
+  /* Allocate label vectors on last processor */
+  if (MPIrank == MPIsize - 1) {
+    labels = new MyReal *[nelements];
+    for (int ielem = 0; ielem < nelements; ielem++) {
+      labels[ielem] = new MyReal[nlabels];
+    }
+  }
+
+  /* Allocate and initialize availIDs and batchIDs on first and last processor
+   */
+  if (MPIrank == 0 || MPIrank == MPIsize - 1) {
+    availIDs = new int[nelements];  // all elements
+    batchIDs = new int[nbatch];
+
+    /* Initialize available ID with identity */
+    for (int idx = 0; idx < nelements; idx++) {
+      availIDs[idx] = idx;
+    }
+
+    /* Initialize the batch with identity */
+    for (int idx = 0; idx < nbatch; idx++) {
+      batchIDs[idx] = idx;
+    }
+  }
 }
 
-
-DataSet::~DataSet()
-{
-   /* Deallocate feature vectors on first processor */
-   if (examples != NULL)
-   {
-      for (int ielem = 0; ielem < nelements; ielem++)
-      {
-         delete [] examples[ielem];
-      }
-      delete [] examples;
-   }
-
-   /* Deallocate label vectors on last processor */
-   if (labels != NULL)
-   {
-      for (int ielem = 0; ielem < nelements; ielem++)
-      {
-         delete [] labels[ielem];
-      }
-      delete [] labels;
-   }
-
-   if (availIDs != NULL) delete [] availIDs;
-   if (batchIDs != NULL) delete [] batchIDs;
+DataSet::~DataSet() {
+  /* Deallocate feature vectors on first processor */
+  if (examples != NULL) {
+    for (int ielem = 0; ielem < nelements; ielem++) {
+      delete[] examples[ielem];
+    }
+    delete[] examples;
+  }
+
+  /* Deallocate label vectors on last processor */
+  if (labels != NULL) {
+    for (int ielem = 0; ielem < nelements; ielem++) {
+      delete[] labels[ielem];
+    }
+    delete[] labels;
+  }
+
+  if (availIDs != NULL) delete[] availIDs;
+  if (batchIDs != NULL) delete[] batchIDs;
 }
 
-
 int DataSet::getnBatch() { return nbatch; }
 
-MyReal* DataSet::getExample(int id) 
-{ 
-   if (examples == NULL) return NULL;
+MyReal *DataSet::getExample(int id) {
+  if (examples == NULL) return NULL;
 
-   return examples[batchIDs[id]]; 
+  return examples[batchIDs[id]];
 }
 
-MyReal* DataSet::getLabel(int id) 
-{ 
-   if (labels == NULL) return NULL;
-   
-   return labels[batchIDs[id]]; 
+MyReal *DataSet::getLabel(int id) {
+  if (labels == NULL) return NULL;
+
+  return labels[batchIDs[id]];
 }
 
-void DataSet::readData(const char* datafolder,
-                       const char* examplefile, 
-                       const char* labelfile)
-{
-   char examplefilename[255], labelfilename[255];
+void DataSet::readData(const char *datafolder, const char *examplefile,
+                       const char *labelfile) {
+  char examplefilename[255], labelfilename[255];
 
-   /* Set the file names  */
-   sprintf(examplefilename,  "%s/%s", datafolder, examplefile);
-   sprintf(labelfilename,    "%s/%s", datafolder, labelfile);
+  /* Set the file names  */
+  sprintf(examplefilename, "%s/%s", datafolder, examplefile);
+  sprintf(labelfilename, "%s/%s", datafolder, labelfile);
 
-   /* Read feature vectors on first processor */
-   if (MPIrank == 0) read_matrix(examplefilename, examples, nelements, nfeatures);
+  /* Read feature vectors on first processor */
+  if (MPIrank == 0)
+    read_matrix(examplefilename, examples, nelements, nfeatures);
 
-   /* Read label vectors on last processor) */
-   if (MPIrank == MPIsize - 1) read_matrix(labelfilename, labels, nelements, nlabels);
+  /* Read label vectors on last processor) */
+  if (MPIrank == MPIsize - 1)
+    read_matrix(labelfilename, labels, nelements, nlabels);
 }
 
+void DataSet::selectBatch(int batch_type, MPI_Comm comm) {
+  int irand, rand_range;
+  int tmp;
+  MPI_Request sendreq, recvreq;
+  MPI_Status status;
+
+  switch (batch_type) {
+    case DETERMINISTIC:
+      /* Do nothing, keep the batch fixed. */
+      break;
+
+    case STOCHASTIC:
+
+      /* Randomly choose a batch on first processor, send to last processor */
+      if (MPIrank == 0) {
+        /* Fill the batchID vector with randomly generated integer */
+        rand_range = navail - 1;
+        for (int ibatch = 0; ibatch < nbatch; ibatch++) {
+          /* Generate a new random index in [0,range] */
+          irand = (int)((((double)rand()) / (double)RAND_MAX) * rand_range);
+
+          /* Set the batchID */
+          batchIDs[ibatch] = availIDs[irand];
+
+          /* Remove the ID from available IDs (by swapping it with the last
+           * available id and reducing the range) */
+          tmp = availIDs[irand];
+          availIDs[irand] = availIDs[rand_range];
+          availIDs[rand_range] = tmp;
+          rand_range--;
+        }
+
+        /* Send to the last processor */
+        int receiver = MPIsize - 1;
+        MPI_Isend(batchIDs, nbatch, MPI_INT, receiver, 0, comm, &sendreq);
+      }
+
+      /* Receive the batch IDs on last processor */
+      if (MPIrank == MPIsize - 1) {
+        int source = 0;
+        MPI_Irecv(batchIDs, nbatch, MPI_INT, source, 0, comm, &recvreq);
+      }
 
+      /* Wait to finish communication */
+      if (MPIrank == 0) MPI_Wait(&sendreq, &status);
+      if (MPIrank == MPIsize - 1) MPI_Wait(&recvreq, &status);
 
-void DataSet::selectBatch(int      batch_type, 
-                          MPI_Comm comm)
-{
-   int irand, rand_range;
-   int tmp;
-   MPI_Request sendreq, recvreq;
-   MPI_Status status;
-
-   switch (batch_type)
-   {
-      case DETERMINISTIC:
-         /* Do nothing, keep the batch fixed. */
-         break;
-
-      case STOCHASTIC:
-
-         /* Randomly choose a batch on first processor, send to last processor */
-         if (MPIrank == 0)
-         {
-            /* Fill the batchID vector with randomly generated integer */
-            rand_range = navail - 1;
-            for (int ibatch = 0; ibatch < nbatch; ibatch++)
-            {
-               /* Generate a new random index in [0,range] */
-               irand = (int) ( ( ((double) rand()) /  (double) RAND_MAX ) * rand_range );
-
-               /* Set the batchID */
-               batchIDs[ibatch] = availIDs[irand];
-
-               /* Remove the ID from available IDs (by swapping it with the last available id and reducing the range) */
-               tmp = availIDs[irand];
-               availIDs[irand] = availIDs[rand_range];
-               availIDs[rand_range] = tmp;
-               rand_range--;
-            }
-
-            /* Send to the last processor */
-            int receiver = MPIsize - 1;
-            MPI_Isend(batchIDs, nbatch, MPI_INT, receiver, 0, comm, &sendreq);
-         }
-
-         /* Receive the batch IDs on last processor */
-         if (MPIrank == MPIsize - 1)
-         {
-            int source = 0;
-            MPI_Irecv(batchIDs, nbatch, MPI_INT, source, 0, comm, &recvreq);
-         }
-
-         /* Wait to finish communication */
-         if (MPIrank == 0)          MPI_Wait(&sendreq, &status);
-         if (MPIrank == MPIsize-1)  MPI_Wait(&recvreq, &status);
-
-
-         break; // break switch statement
-   }
+      break;  // break switch statement
+  }
 }
 
-
-void DataSet::printBatch()
-{
-   if (batchIDs != NULL)  // only first and last processor
-   {
-      printf("%d:\n", MPIrank);
-      for (int ibatch = 0; ibatch < nbatch; ibatch++)
-      {
-         printf("%d, %04d\n", ibatch, batchIDs[ibatch]);
-      }
-   }
+void DataSet::printBatch() {
+  if (batchIDs != NULL)  // only first and last processor
+  {
+    printf("%d:\n", MPIrank);
+    for (int ibatch = 0; ibatch < nbatch; ibatch++) {
+      printf("%d, %04d\n", ibatch, batchIDs[ibatch]);
+    }
+  }
 }
diff --git a/src/hessianApprox.cpp b/src/hessianApprox.cpp
index 97ab982..7c04961 100644
--- a/src/hessianApprox.cpp
+++ b/src/hessianApprox.cpp
@@ -1,326 +1,279 @@
+// Copyright
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Underlying paper:
+//
+// Layer-Parallel Training of Deep Residual Neural Networks
+// S. Guenther, L. Ruthotto, J.B. Schroder, E.C. Czr, and N.R. Gauger
+//
+// Download: https://arxiv.org/pdf/1812.04352.pdf
+//
 #include "hessianApprox.hpp"
 
-HessianApprox::HessianApprox(MPI_Comm comm)
-{
-  dimN    = 0;
+HessianApprox::HessianApprox(MPI_Comm comm) {
+  dimN = 0;
   MPIcomm = comm;
 }
-HessianApprox::~HessianApprox(){}
-
-
-L_BFGS::L_BFGS(MPI_Comm comm, int N, int stages) : HessianApprox(comm)
-{
-   dimN = N;
-   M    = stages;
-   H0   = 1.0;
-
-   /* Allocate memory for sk and yk for all stages */
-   s = new MyReal*[M];
-   y = new MyReal*[M];
-   for (int imem = 0; imem < M; imem++)
-   {
-      s[imem] = new MyReal[dimN];
-      y[imem] = new MyReal[dimN];
-      for (int i = 0; i < dimN; i++)
-      {
-          s[imem][i] = 0.0;
-          y[imem][i] = 0.0;
-      }
-   }
-
-   /* Allocate memory for rho's values */
-   rho = new MyReal[M];
-   for (int i = 0; i < M; i++)
-   {
-       rho[i] = 0.0;
-   }
-
-   /* Allocate memory for storing design at previous iteration */
-   design_old   = new MyReal[dimN];
-   gradient_old = new MyReal[dimN];
+HessianApprox::~HessianApprox() {}
+
+L_BFGS::L_BFGS(MPI_Comm comm, int N, int stages) : HessianApprox(comm) {
+  dimN = N;
+  M = stages;
+  H0 = 1.0;
+
+  /* Allocate memory for sk and yk for all stages */
+  s = new MyReal *[M];
+  y = new MyReal *[M];
+  for (int imem = 0; imem < M; imem++) {
+    s[imem] = new MyReal[dimN];
+    y[imem] = new MyReal[dimN];
+    for (int i = 0; i < dimN; i++) {
+      s[imem][i] = 0.0;
+      y[imem][i] = 0.0;
+    }
+  }
+
+  /* Allocate memory for rho's values */
+  rho = new MyReal[M];
+  for (int i = 0; i < M; i++) {
+    rho[i] = 0.0;
+  }
+
+  /* Allocate memory for storing design at previous iteration */
+  design_old = new MyReal[dimN];
+  gradient_old = new MyReal[dimN];
 }
 
+L_BFGS::~L_BFGS() {
+  /* Deallocate memory */
+  delete[] rho;
+  for (int imem = 0; imem < M; imem++) {
+    delete[] s[imem];
+    delete[] y[imem];
+  }
+  delete[] s;
+  delete[] y;
 
-L_BFGS::~L_BFGS()
-{
-   /* Deallocate memory */
-   delete [] rho;
-   for (int imem = 0; imem < M; imem++)
-   {
-      delete [] s[imem];
-      delete [] y[imem];
-   }
-   delete [] s;
-   delete [] y;
-
-   delete [] design_old;
-   delete [] gradient_old;
+  delete[] design_old;
+  delete[] gradient_old;
 }
 
+void L_BFGS::computeAscentDir(int iter, MyReal *gradient, MyReal *ascentdir) {
+  int imemory;
+  MyReal beta;
+  MyReal *alpha = new MyReal[M];
+  int imax, imin;
 
+  /* Initialize the ascentdir with steepest descent */
+  for (int idir = 0; idir < dimN; idir++) {
+    ascentdir[idir] = gradient[idir];
+  }
 
+  /* Set range of the two-loop recursion */
+  imax = iter - 1;
+  if (iter < M) {
+    imin = 0;
+  } else {
+    imin = iter - M;
+  }
 
-void L_BFGS::computeAscentDir(int     iter,
-                              MyReal* gradient,
-                              MyReal* ascentdir)
-{
-   int imemory;
-   MyReal beta;
-   MyReal* alpha = new MyReal[M];
-   int imax, imin;
-
-
-   /* Initialize the ascentdir with steepest descent */
-   for (int idir = 0; idir < dimN; idir++)
-   {
-      ascentdir[idir] = gradient[idir];
-   }
-
-
-   /* Set range of the two-loop recursion */
-   imax = iter-1;
-   if (iter < M)
-   {
-     imin = 0;
-   }
-   else
-   {
-     imin = iter - M;
-   }
-
-   /* Loop backwards through lbfgs memory */
-   for (int i = imax; i >= imin; i--)
-   {
-      imemory = i % M;
-      /* Compute alpha */
-      alpha[imemory] = rho[imemory] * vecdot_par(dimN, s[imemory], ascentdir, MPIcomm);
-      /* Update the ascentdir */
-      for (int idir = 0; idir < dimN; idir++)
-      {
-         ascentdir[idir] -= alpha[imemory] * y[imemory][idir];
-      }
-   }
+  /* Loop backwards through lbfgs memory */
+  for (int i = imax; i >= imin; i--) {
+    imemory = i % M;
+    /* Compute alpha */
+    alpha[imemory] =
+        rho[imemory] * vecdot_par(dimN, s[imemory], ascentdir, MPIcomm);
+    /* Update the ascentdir */
+    for (int idir = 0; idir < dimN; idir++) {
+      ascentdir[idir] -= alpha[imemory] * y[imemory][idir];
+    }
+  }
 
-   /* scale the ascentdir by H0 */
-   for (int idir = 0; idir < dimN; idir++)
-   {
-     ascentdir[idir] *= H0;
-   }
+  /* scale the ascentdir by H0 */
+  for (int idir = 0; idir < dimN; idir++) {
+    ascentdir[idir] *= H0;
+  }
 
   /* loop forwards through the l-bfgs memory */
-  for (int i = imin; i <= imax; i++)
-  {
+  for (int i = imin; i <= imax; i++) {
     imemory = i % M;
     /* Compute beta */
     beta = rho[imemory] * vecdot_par(dimN, y[imemory], ascentdir, MPIcomm);
     /* Update the ascentdir */
-    for (int idir = 0; idir < dimN; idir++)
-    {
+    for (int idir = 0; idir < dimN; idir++) {
       ascentdir[idir] += s[imemory][idir] * (alpha[imemory] - beta);
     }
   }
 
-  delete [] alpha;
-
+  delete[] alpha;
 }
 
+void L_BFGS::updateMemory(int iter, MyReal *design, MyReal *gradient) {
+  /* Update lbfgs memory only if iter > 0 */
+  if (iter > 0) {
+    MyReal yTy, yTs;
 
+    /* Get storing state */
+    int imemory = (iter - 1) % M;
 
-void L_BFGS::updateMemory(int     iter,
-                          MyReal* design,
-                          MyReal* gradient)
-{
-
-  /* Update lbfgs memory only if iter > 0 */
-  if (iter > 0) 
-  {
-     MyReal yTy, yTs;
-
-     /* Get storing state */
-     int imemory = (iter-1) % M ;
-
-     /* Update BFGS memory for s, y */
-     for (int idir = 0; idir < dimN; idir++)
-     {
-       y[imemory][idir] = gradient[idir] - gradient_old[idir];
-       s[imemory][idir] = design[idir]   - design_old[idir];
-     }
-
-     /* Update rho and H0 */
-     yTs = vecdot_par(dimN, y[imemory], s[imemory], MPIcomm);
-     yTy = vecdot_par(dimN, y[imemory], y[imemory], MPIcomm);
-     if (yTs == 0.0) 
-     {
-       printf("  Warning: resetting yTs to 1.\n");
-       yTs = 1.0;
-     }
-     if (yTy == 0.0) 
-     {
-       printf("  Warning: resetting yTy to 1.\n");
-       yTy = 1.;
-     }
-     rho[imemory] = 1. / yTs;
-     H0 = yTs / yTy;
+    /* Update BFGS memory for s, y */
+    for (int idir = 0; idir < dimN; idir++) {
+      y[imemory][idir] = gradient[idir] - gradient_old[idir];
+      s[imemory][idir] = design[idir] - design_old[idir];
+    }
 
+    /* Update rho and H0 */
+    yTs = vecdot_par(dimN, y[imemory], s[imemory], MPIcomm);
+    yTy = vecdot_par(dimN, y[imemory], y[imemory], MPIcomm);
+    if (yTs == 0.0) {
+      printf("  Warning: resetting yTs to 1.\n");
+      yTs = 1.0;
+    }
+    if (yTy == 0.0) {
+      printf("  Warning: resetting yTy to 1.\n");
+      yTy = 1.;
+    }
+    rho[imemory] = 1. / yTs;
+    H0 = yTs / yTy;
   }
 
-   /* Update old design and gradient */
-   vec_copy(dimN, design,   design_old);
-   vec_copy(dimN, gradient, gradient_old);
+  /* Update old design and gradient */
+  vec_copy(dimN, design, design_old);
+  vec_copy(dimN, gradient, gradient_old);
 }
 
+BFGS::BFGS(MPI_Comm comm, int N) : HessianApprox(comm) {
+  dimN = N;
 
+  Hessian = new MyReal[N * N];
+  setIdentity();
 
-BFGS::BFGS(MPI_Comm comm, int N) : HessianApprox(comm)
-{
-    dimN = N;
-
-    Hessian = new MyReal[N*N];
-    setIdentity();
-
-    y = new MyReal[N];
-    s = new MyReal[N];
+  y = new MyReal[N];
+  s = new MyReal[N];
 
-    Hy = new MyReal[N];
-    A  = new MyReal[N*N];
-    B  = new MyReal[N*N];
+  Hy = new MyReal[N];
+  A = new MyReal[N * N];
+  B = new MyReal[N * N];
 
-    /* Allocate memory for storing design at previous iteration */
-    design_old   = new MyReal[dimN];
-    gradient_old = new MyReal[dimN];
+  /* Allocate memory for storing design at previous iteration */
+  design_old = new MyReal[dimN];
+  gradient_old = new MyReal[dimN];
 
-    /* Sanity check */
-    int size;
-    MPI_Comm_size(MPIcomm, &size);
-    if (size > 1) printf("\n\n WARNING: Parallel BFGS not implemented.\n BFGS updates will be LOCAL to each processor -> block-BFGS. \n\n");
+  /* Sanity check */
+  int size;
+  MPI_Comm_size(MPIcomm, &size);
+  if (size > 1)
+    printf(
+        "\n\n WARNING: Parallel BFGS not implemented.\n BFGS updates will "
+        "be LOCAL to each processor -> block-BFGS. \n\n");
 }
 
-void BFGS::setIdentity()
-{
-    for (int i = 0; i<dimN; i++)
-    {
-      for (int j = 0; j<dimN; j++)
-      {
-          if (i==j) Hessian[i*dimN+j] = 1.0;
-          else      Hessian[i*dimN+j] = 0.0;
-      }
+void BFGS::setIdentity() {
+  for (int i = 0; i < dimN; i++) {
+    for (int j = 0; j < dimN; j++) {
+      if (i == j)
+        Hessian[i * dimN + j] = 1.0;
+      else
+        Hessian[i * dimN + j] = 0.0;
     }
+  }
 }
 
-BFGS::~BFGS()
-{
-  delete [] Hessian;
-  delete [] y; 
-  delete [] s; 
-  delete [] A; 
-  delete [] B; 
-  delete [] Hy; 
-  delete [] design_old;
-  delete [] gradient_old;
+BFGS::~BFGS() {
+  delete[] Hessian;
+  delete[] y;
+  delete[] s;
+  delete[] A;
+  delete[] B;
+  delete[] Hy;
+  delete[] design_old;
+  delete[] gradient_old;
 }
 
-
-void BFGS::updateMemory(int     iter,
-                        MyReal* design,
-                        MyReal* gradient)
-{
-    /* Update BFGS memory for s, y */
-    for (int idir = 0; idir < dimN; idir++)
-    {
-      y[idir] = gradient[idir] - gradient_old[idir];
-      s[idir] = design[idir]   - design_old[idir];
-    }
+void BFGS::updateMemory(int iter, MyReal *design, MyReal *gradient) {
+  /* Update BFGS memory for s, y */
+  for (int idir = 0; idir < dimN; idir++) {
+    y[idir] = gradient[idir] - gradient_old[idir];
+    s[idir] = design[idir] - design_old[idir];
+  }
 }
 
-void BFGS::computeAscentDir(int     iter, 
-                             MyReal* gradient, 
-                             MyReal* ascentdir)
-{
-    MyReal yTy, yTs, H0;
-    MyReal b, rho;
-
-    /* Steepest descent in first iteration */
-    if (iter == 0)
-    {
-      setIdentity();
-      matvec(dimN, Hessian, gradient, ascentdir);
-      return;
-    }
+void BFGS::computeAscentDir(int iter, MyReal *gradient, MyReal *ascentdir) {
+  MyReal yTy, yTs, H0;
+  MyReal b, rho;
 
-    /* Check curvature conditoin */
-    yTs = vecdot(dimN, y, s);
-    if ( yTs < 1e-12) 
-    {
-      printf(" Warning: Curvature condition not satisfied %1.14e \n", yTs);
-      setIdentity();
-    }
-    else
-    {
-      /* Scale first Hessian approximation */
-      yTy = vecdot(dimN, y, y);
-      if (iter == 1)
-      {
-        H0  = yTs / yTy;
-        for (int i=0; i<dimN; i++)
-        {
-            Hessian[i*dimN+i] = H0;
-        }
-      }
+  /* Steepest descent in first iteration */
+  if (iter == 0) {
+    setIdentity();
+    matvec(dimN, Hessian, gradient, ascentdir);
+    return;
+  }
 
-      /** BFGS Update for H  (Noceda, Wright, Chapter 6.1)
-       *  H_new  = H + \rho( B - (A+A'))
-       * where B = (1.0 + \rho * y'Hy) * ss'
-       *       A = Hys'
-       */
+  /* Check curvature conditoin */
+  yTs = vecdot(dimN, y, s);
+  if (yTs < 1e-12) {
+    printf(" Warning: Curvature condition not satisfied %1.14e \n", yTs);
+    setIdentity();
+  } else {
+    /* Scale first Hessian approximation */
+    yTy = vecdot(dimN, y, y);
+    if (iter == 1) {
+      H0 = yTs / yTy;
+      for (int i = 0; i < dimN; i++) {
+        Hessian[i * dimN + i] = H0;
+      }
+    }
 
-      /* Compute A = Hys' */
-      matvec(dimN, Hessian, y, Hy);
-      vecvecT(dimN, Hy, s, A);
+    /** BFGS Update for H  (Noceda, Wright, Chapter 6.1)
+     *  H_new  = H + \rho( B - (A+A'))
+     * where B = (1.0 + \rho * y'Hy) * ss'
+     *       A = Hys'
+     */
 
-      /* scalar 1 + rho y'Hy */
-      rho = 1. / yTs;
-      b   = 1.0 + rho * vecdot(dimN, y, Hy);
+    /* Compute A = Hys' */
+    matvec(dimN, Hessian, y, Hy);
+    vecvecT(dimN, Hy, s, A);
 
-      /* Compute B */
-      vecvecT(dimN, s, s, B);
+    /* scalar 1 + rho y'Hy */
+    rho = 1. / yTs;
+    b = 1.0 + rho * vecdot(dimN, y, Hy);
 
-      /* H += rho * (b*B - (A+A')) */
-      for (int i=0; i<dimN; i++)
-      {
-        for (int j=0; j<dimN; j++)
-        {
-           Hessian[i*dimN+j] += rho * ( b * B[i*dimN+j] - A[i*dimN+j] - A[j*dimN+i] ) ;
-        }
-      } 
+    /* Compute B */
+    vecvecT(dimN, s, s, B);
 
+    /* H += rho * (b*B - (A+A')) */
+    for (int i = 0; i < dimN; i++) {
+      for (int j = 0; j < dimN; j++) {
+        Hessian[i * dimN + j] +=
+            rho * (b * B[i * dimN + j] - A[i * dimN + j] - A[j * dimN + i]);
+      }
     }
+  }
 
-    /* Compute the ascentdir */
-    matvec(dimN, Hessian, gradient, ascentdir);
-}
-
-Identity::Identity(MPI_Comm comm, int N) : HessianApprox(comm)
-{
-  dimN = N;
+  /* Compute the ascentdir */
+  matvec(dimN, Hessian, gradient, ascentdir);
 }
 
-Identity::~Identity(){}
-
-void Identity::updateMemory(int     iter,
-                            MyReal* design,
-                            MyReal* gradient) {}
+Identity::Identity(MPI_Comm comm, int N) : HessianApprox(comm) { dimN = N; }
 
+Identity::~Identity() {}
 
+void Identity::updateMemory(int iter, MyReal *design, MyReal *gradient) {}
 
-void Identity::computeAscentDir(int     iter, 
-                                 MyReal* gradient, 
-                                 MyReal* ascentdir)
-{
+void Identity::computeAscentDir(int iter, MyReal *gradient, MyReal *ascentdir) {
   /*  Steepest descent */
-  for (int i = 0; i<dimN; i++)
-  {
+  for (int i = 0; i < dimN; i++) {
     ascentdir[i] = gradient[i];
   }
-}                           
-
-
+}
diff --git a/src/layer.cpp b/src/layer.cpp
index 3980bb9..74bf641 100644
--- a/src/layer.cpp
+++ b/src/layer.cpp
@@ -1,82 +1,89 @@
-#include <math.h>
-#include <assert.h>
+// Copyright
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Underlying paper:
+//
+// Layer-Parallel Training of Deep Residual Neural Networks
+// S. Guenther, L. Ruthotto, J.B. Schroder, E.C. Czr, and N.R. Gauger
+//
+// Download: https://arxiv.org/pdf/1812.04352.pdf
+//
 #include "layer.hpp"
+#include <assert.h>
+#include <math.h>
 
 #include <iostream>
 
-Layer::Layer()
-{
-   dim_In       = 0;
-   dim_Out      = 0;
-   dim_Bias     = 0;
-   ndesign      = 0;
-   nweights     = 0;
-   nconv        = 0;
-   csize        = 0;
-
-   index        = 0;
-   dt           = 0.0;
-   activ        = -1;
-   weights      = NULL;
-   weights_bar  = NULL;
-   bias         = NULL;
-   bias_bar     = NULL;
-   gamma_tik    = 0.0;
-   gamma_ddt    = 0.0;
-   update       = NULL;
-   update_bar   = NULL;
+Layer::Layer() {
+  dim_In = 0;
+  dim_Out = 0;
+  dim_Bias = 0;
+  ndesign = 0;
+  nweights = 0;
+  nconv = 0;
+  csize = 0;
+
+  index = 0;
+  dt = 0.0;
+  activ = -1;
+  weights = NULL;
+  weights_bar = NULL;
+  bias = NULL;
+  bias_bar = NULL;
+  gamma_tik = 0.0;
+  gamma_ddt = 0.0;
+  update = NULL;
+  update_bar = NULL;
 }
 
-Layer::Layer(int     idx,
-             int     Type,
-             int     dimI,
-             int     dimO,
-             int     dimB,
-             int     dimW,
-             MyReal  deltaT,
-             int     Activ,
-             MyReal  gammatik,
-             MyReal  gammaddt) : Layer()
-{
-   index       = idx;
-   type        = Type;
-   dim_In      = dimI;
-   dim_Out     = dimO;
-   dim_Bias    = dimB;
-   ndesign     = dimW + dimB;
-   nweights    = dimW;
-   dt          = deltaT;
-   activ       = Activ;
-   gamma_tik   = gammatik;
-   gamma_ddt   = gammaddt;
-   
-   update     = new MyReal[dimO];
-   update_bar = new MyReal[dimO];
-}   
- 
-Layer::~Layer()
-{
-    delete [] update;
-    delete [] update_bar;
+Layer::Layer(int idx, int Type, int dimI, int dimO, int dimB, int dimW,
+             MyReal deltaT, int Activ, MyReal gammatik, MyReal gammaddt)
+    : Layer() {
+  index = idx;
+  type = Type;
+  dim_In = dimI;
+  dim_Out = dimO;
+  dim_Bias = dimB;
+  ndesign = dimW + dimB;
+  nweights = dimW;
+  dt = deltaT;
+  activ = Activ;
+  gamma_tik = gammatik;
+  gamma_ddt = gammaddt;
+
+  update = new MyReal[dimO];
+  update_bar = new MyReal[dimO];
 }
 
+Layer::~Layer() {
+  delete[] update;
+  delete[] update_bar;
+}
 
 void Layer::setDt(MyReal DT) { dt = DT; }
 
 MyReal Layer::getDt() { return dt; }
 
-void Layer::setMemory(MyReal* design_memloc, 
-                      MyReal* gradient_memloc)
-{
-    /* Set design and gradient memory locations */
-    weights     = design_memloc;
-    weights_bar = gradient_memloc;
-    
-    /* Bias memory locations is a shift by number of weights */
-    bias         = design_memloc + nweights;    
-    bias_bar     = gradient_memloc + nweights;
-}                   
+void Layer::setMemory(MyReal *design_memloc, MyReal *gradient_memloc) {
+  /* Set design and gradient memory locations */
+  weights = design_memloc;
+  weights_bar = gradient_memloc;
 
+  /* Bias memory locations is a shift by number of weights */
+  bias = design_memloc + nweights;
+  bias_bar = gradient_memloc + nweights;
+}
 
 MyReal Layer::getGammaTik() { return gamma_tik; }
 
@@ -86,14 +93,14 @@ int Layer::getActivation() { return activ; }
 
 int Layer::getType() { return type; }
 
-MyReal* Layer::getWeights() { return weights; }
-MyReal* Layer::getBias()    { return bias; }
+MyReal *Layer::getWeights() { return weights; }
+MyReal *Layer::getBias() { return bias; }
 
-MyReal* Layer::getWeightsBar() { return weights_bar; }
-MyReal* Layer::getBiasBar()    { return bias_bar; }
+MyReal *Layer::getWeightsBar() { return weights_bar; }
+MyReal *Layer::getBiasBar() { return bias_bar; }
 
-int Layer::getDimIn()   { return dim_In;   }
-int Layer::getDimOut()  { return dim_Out;  }
+int Layer::getDimIn() { return dim_In; }
+int Layer::getDimOut() { return dim_Out; }
 int Layer::getDimBias() { return dim_Bias; }
 int Layer::getnWeights() { return nweights; }
 int Layer::getnDesign() { return ndesign; }
@@ -103,839 +110,658 @@ int Layer::getCSize() { return csize; }
 
 int Layer::getIndex() { return index; }
 
-void Layer::print_data(MyReal* data)
-{
-    printf("DATA: ");
-    for (int io = 0; io < dim_Out; io++)
-    {
-        printf("%1.14e ", data[io]);
-    }
-    printf("\n");
+void Layer::print_data(MyReal *data) {
+  printf("DATA: ");
+  for (int io = 0; io < dim_Out; io++) {
+    printf("%1.14e ", data[io]);
+  }
+  printf("\n");
 }
 
-
-MyReal Layer::activation(MyReal x)
-{
-    MyReal y;
-    switch ( activ )
-    {
-       case TANH:
-          y = Layer::tanh_act(x);
-          break;
-       case RELU:
-          y = Layer::ReLu_act(x);
-          break;
-       case SMRELU:
-          y = Layer::SmoothReLu_act(x);
-          break;
-       default:
-          y = -1000000.0;
-          printf("ERROR: You should specify an activation function!\n");
-          printf("GO HOME AND GET SOME SLEEP!");
-          break;
-    }
-    return y;
+MyReal Layer::activation(MyReal x) {
+  MyReal y;
+  switch (activ) {
+    case TANH:
+      y = Layer::tanh_act(x);
+      break;
+    case RELU:
+      y = Layer::ReLu_act(x);
+      break;
+    case SMRELU:
+      y = Layer::SmoothReLu_act(x);
+      break;
+    default:
+      y = -1000000.0;
+      printf("ERROR: You should specify an activation function!\n");
+      printf("GO HOME AND GET SOME SLEEP!");
+      break;
+  }
+  return y;
 }
 
-MyReal Layer::dactivation(MyReal x)
-{
-    MyReal y;
-    switch ( activ)
-    {
-       case TANH:
-          y = Layer::dtanh_act(x);
-          break;
-       case RELU:
-          y = Layer::dReLu_act(x);
-          break;
-       case SMRELU:
-          y = Layer::dSmoothReLu_act(x);
-          break;
-       default:
-          y = -1000000.0;
-          printf("ERROR: You should specify an activation function!\n");
-          printf("GO HOME AND GET SOME SLEEP!");
-          break;
-    }
-    return y;
+MyReal Layer::dactivation(MyReal x) {
+  MyReal y;
+  switch (activ) {
+    case TANH:
+      y = Layer::dtanh_act(x);
+      break;
+    case RELU:
+      y = Layer::dReLu_act(x);
+      break;
+    case SMRELU:
+      y = Layer::dSmoothReLu_act(x);
+      break;
+    default:
+      y = -1000000.0;
+      printf("ERROR: You should specify an activation function!\n");
+      printf("GO HOME AND GET SOME SLEEP!");
+      break;
+  }
+  return y;
+}
 
+void Layer::packDesign(MyReal *buffer, int size) {
+  int nweights = getnWeights();
+  int nbias = getDimBias();
+  int idx = 0;
+  for (int i = 0; i < nweights; i++) {
+    buffer[idx] = getWeights()[i];
+    idx++;
+  }
+  for (int i = 0; i < nbias; i++) {
+    buffer[idx] = getBias()[i];
+    idx++;
+  }
+  /* Set the rest to zero */
+  for (int i = idx; i < size; i++) {
+    buffer[idx] = 0.0;
+    idx++;
+  }
 }
 
+void Layer::unpackDesign(MyReal *buffer) {
+  int nweights = getnWeights();
+  int nbias = getDimBias();
 
-void Layer::packDesign(MyReal* buffer, 
-                       int     size)
-{
-    int nweights = getnWeights();
-    int nbias    = getDimBias();
-    int idx = 0;
-    for (int i = 0; i < nweights; i++)
-    {
-        buffer[idx] = getWeights()[i];   idx++;
-    }
-    for (int i = 0; i < nbias; i++)
-    {
-        buffer[idx] = getBias()[i];     idx++;
-    }
-    /* Set the rest to zero */
-    for (int i = idx; i < size; i++)
-    {
-        buffer[idx] = 0.0;  idx++;
-    }
+  int idx = 0;
+  for (int i = 0; i < nweights; i++) {
+    getWeights()[i] = buffer[idx];
+    idx++;
+  }
+  for (int i = 0; i < nbias; i++) {
+    getBias()[i] = buffer[idx];
+    idx++;
+  }
 }
 
-void Layer::unpackDesign(MyReal* buffer)
-{
-    int nweights = getnWeights();
-    int nbias    = getDimBias();
+void Layer::scaleDesign(MyReal factor) {
+  /* Scale design by a factor */
+  for (int i = 0; i < nweights; i++) {
+    weights[i] = factor * weights[i];
+  }
+  for (int i = 0; i < dim_Bias; i++) {
+    bias[i] = factor * bias[i];
+  }
 
-    int idx = 0;
-    for (int i = 0; i < nweights; i++)
-    {
-        getWeights()[i] = buffer[idx]; idx++;
-    }
-    for (int i = 0; i < nbias; i++)
-    {
-        getBias()[i] = buffer[idx];   idx++;
-    }
+  /* Reset the gradient */
+  resetBar();
 }
 
-void Layer::scaleDesign(MyReal  factor)
-{
-    /* Scale design by a factor */
-    for (int i = 0; i < nweights; i++)
-    {
-        weights[i]     = factor * weights[i];
-    }
-    for (int i = 0; i < dim_Bias; i++)
-    {
-        bias[i]     = factor * bias[i];
-    }
+void Layer::resetBar() {
+  for (int i = 0; i < nweights; i++) {
+    weights_bar[i] = 0.0;
+  }
+  for (int i = 0; i < dim_Bias; i++) {
+    bias_bar[i] = 0.0;
+  }
+}
 
-    /* Reset the gradient */
-    resetBar();
-}                   
+MyReal Layer::evalTikh() {
+  MyReal tik = 0.0;
+  for (int i = 0; i < nweights; i++) {
+    tik += pow(weights[i], 2);
+  }
+  for (int i = 0; i < dim_Bias; i++) {
+    tik += pow(bias[i], 2);
+  }
 
-void Layer::resetBar()
-{
-    for (int i = 0; i < nweights; i++)
-    {
-        weights_bar[i] = 0.0;
-    }
-    for (int i = 0; i < dim_Bias; i++)
-    {
-        bias_bar[i] = 0.0;
-    }
+  return gamma_tik / 2.0 * tik;
 }
 
+void Layer::evalTikh_diff(MyReal regul_bar) {
+  regul_bar = gamma_tik * regul_bar;
 
-MyReal Layer::evalTikh()
-{
-    MyReal tik = 0.0;
-    for (int i = 0; i < nweights; i++)
-    {
-        tik += pow(weights[i],2);
+  /* Derivative bias term */
+  for (int i = 0; i < dim_Bias; i++) {
+    bias_bar[i] += bias[i] * regul_bar;
+  }
+  for (int i = 0; i < nweights; i++) {
+    weights_bar[i] += weights[i] * regul_bar;
+  }
+}
+
+MyReal Layer::evalRegulDDT(Layer *layer_prev, MyReal deltat) {
+  if (layer_prev == NULL) return 0.0;  // this holds for opening layer
+
+  MyReal diff;
+  MyReal regul_ddt = 0.0;
+
+  /* Compute ddt-regularization only if dimensions match  */
+  /* this excludes first intermediate layer and classification layer. */
+  if (layer_prev->getnDesign() == ndesign && layer_prev->getDimIn() == dim_In &&
+      layer_prev->getDimOut() == dim_Out &&
+      layer_prev->getDimBias() == dim_Bias &&
+      layer_prev->getnWeights() == nweights) {
+    for (int iw = 0; iw < nweights; iw++) {
+      diff = (getWeights()[iw] - layer_prev->getWeights()[iw]) / deltat;
+      regul_ddt += pow(diff, 2);
     }
-    for (int i = 0; i < dim_Bias; i++)
-    {
-        tik += pow(bias[i],2);
+    for (int ib = 0; ib < dim_Bias; ib++) {
+      diff = (getBias()[ib] - layer_prev->getBias()[ib]) / deltat;
+      regul_ddt += pow(diff, 2);
     }
+    regul_ddt = gamma_ddt / 2.0 * regul_ddt;
+  }
 
-    return gamma_tik / 2.0 * tik;
+  return regul_ddt;
 }
 
-void Layer::evalTikh_diff(MyReal regul_bar)
-{
-    regul_bar = gamma_tik * regul_bar;
+void Layer::evalRegulDDT_diff(Layer *layer_prev, Layer *layer_next,
+                              MyReal deltat) {
+  if (layer_prev == NULL) return;
+  if (layer_next == NULL) return;
+
+  MyReal diff;
+  int regul_bar = gamma_ddt / (deltat * deltat);
+
+  /* Left sided derivative term */
+  if (layer_prev->getnDesign() == ndesign && layer_prev->getDimIn() == dim_In &&
+      layer_prev->getDimOut() == dim_Out &&
+      layer_prev->getDimBias() == dim_Bias &&
+      layer_prev->getnWeights() == nweights) {
+    for (int ib = 0; ib < dim_Bias; ib++) {
+      diff = getBias()[ib] - layer_prev->getBias()[ib];
+      getBiasBar()[ib] += diff * regul_bar;
+    }
 
-    /* Derivative bias term */
-    for (int i = 0; i < dim_Bias; i++)
-    {
-        bias_bar[i] += bias[i] * regul_bar;
+    for (int iw = 0; iw < nweights; iw++) {
+      diff = getWeights()[iw] - layer_prev->getWeights()[iw];
+      getWeightsBar()[iw] += diff * regul_bar;
     }
-    for (int i = 0; i < nweights; i++)
-    {
-        weights_bar[i] += weights[i] * regul_bar;
+  }
+
+  /* Right sided derivative term */
+  if (layer_next->getnDesign() == ndesign && layer_next->getDimIn() == dim_In &&
+      layer_next->getDimOut() == dim_Out &&
+      layer_next->getDimBias() == dim_Bias &&
+      layer_next->getnWeights() == nweights) {
+    for (int ib = 0; ib < dim_Bias; ib++) {
+      diff = getBias()[ib] - layer_next->getBias()[ib];
+      getBiasBar()[ib] += diff * regul_bar;
+    }
+
+    for (int iw = 0; iw < nweights; iw++) {
+      diff = getWeights()[iw] - layer_next->getWeights()[iw];
+      getWeightsBar()[iw] += diff * regul_bar;
     }
+  }
 }
 
+void Layer::setExample(MyReal *example_ptr) {}
 
-MyReal Layer::evalRegulDDT(Layer* layer_prev, 
-                           MyReal deltat)
-{
-    if (layer_prev == NULL) return 0.0;  // this holds for opening layer
-
-    MyReal diff;
-    MyReal regul_ddt = 0.0;
-
-    /* Compute ddt-regularization only if dimensions match  */
-    /* this excludes first intermediate layer and classification layer. */
-    if (layer_prev->getnDesign() == ndesign   &&
-        layer_prev->getDimIn()   == dim_In    &&
-        layer_prev->getDimOut()  == dim_Out   &&
-        layer_prev->getDimBias() == dim_Bias  &&
-        layer_prev->getnWeights() == nweights   )
-    {
-        for (int iw = 0; iw < nweights; iw++)
-        {
-            diff = (getWeights()[iw] - layer_prev->getWeights()[iw]) / deltat;
-            regul_ddt += pow(diff,2);
-        }
-        for (int ib = 0; ib < dim_Bias; ib++)
-        {
-            diff       = (getBias()[ib] - layer_prev->getBias()[ib]) / deltat;
-            regul_ddt += pow(diff,2);
-        }
-        regul_ddt = gamma_ddt / 2.0 * regul_ddt;
-    }
+void Layer::setLabel(MyReal *example_ptr) {}
 
-    return regul_ddt;
-}                
+DenseLayer::DenseLayer(int idx, int dimI, int dimO, MyReal deltaT, int Activ,
+                       MyReal gammatik, MyReal gammaddt)
+    : Layer(idx, DENSE, dimI, dimO, 1, dimI * dimO, deltaT, Activ, gammatik,
+            gammaddt) {}
 
-void Layer::evalRegulDDT_diff(Layer* layer_prev, 
-                              Layer* layer_next,
-                              MyReal deltat)
-{
+DenseLayer::~DenseLayer() {}
 
-    if (layer_prev == NULL) return;
-    if (layer_next == NULL) return;
-
-    MyReal diff;
-    int regul_bar = gamma_ddt / (deltat*deltat);
-
-    /* Left sided derivative term */
-    if (layer_prev->getnDesign() == ndesign   &&
-        layer_prev->getDimIn()   == dim_In    &&
-        layer_prev->getDimOut()  == dim_Out   &&
-        layer_prev->getDimBias() == dim_Bias  &&
-        layer_prev->getnWeights() == nweights   )
-    {
-        for (int ib = 0; ib < dim_Bias ; ib++)
-        {
-            diff              = getBias()[ib] - layer_prev->getBias()[ib];
-            getBiasBar()[ib] += diff * regul_bar;
-        }
+void DenseLayer::applyFWD(MyReal *state) {
+  /* Affine transformation */
+  for (int io = 0; io < dim_Out; io++) {
+    /* Apply weights */
+    update[io] = vecdot(dim_In, &(weights[io * dim_In]), state);
 
-        for (int iw = 0; iw < nweights; iw++)
-        {
-            diff                 = getWeights()[iw] - layer_prev->getWeights()[iw];
-            getWeightsBar()[iw] += diff * regul_bar;
-        }
-    }
+    /* Add bias */
+    update[io] += bias[0];
+  }
 
-    /* Right sided derivative term */
-    if (layer_next->getnDesign() == ndesign   &&
-        layer_next->getDimIn()   == dim_In    &&
-        layer_next->getDimOut()  == dim_Out   &&
-        layer_next->getDimBias() == dim_Bias  &&
-        layer_next->getnWeights() == nweights   )
-    {
-        for (int ib = 0; ib < dim_Bias; ib++)
-        {
-            diff              = getBias()[ib] - layer_next->getBias()[ib];
-            getBiasBar()[ib] += diff * regul_bar;
-        }
+  /* Apply step */
+  for (int io = 0; io < dim_Out; io++) {
+    state[io] = state[io] + dt * activation(update[io]);
+  }
+}
 
-        for (int iw = 0; iw < nweights; iw++)
-        {
-            diff                 = getWeights()[iw] - layer_next->getWeights()[iw];
-            getWeightsBar()[iw] += diff * regul_bar;
-        }
-    }
-} 
+void DenseLayer::applyBWD(MyReal *state, MyReal *state_bar,
+                          int compute_gradient) {
+  /* state_bar is the adjoint of the state variable, it contains the
+     old time adjoint informationk, and is modified on the way out to
+     contain the update. */
 
+  /* Derivative of the step */
+  for (int io = 0; io < dim_Out; io++) {
+    /* Recompute affine transformation */
+    update[io] = vecdot(dim_In, &(weights[io * dim_In]), state);
+    update[io] += bias[0];
 
+    /* Derivative: This is the update from old time */
+    update_bar[io] = dt * dactivation(update[io]) * state_bar[io];
+  }
 
+  /* Derivative of linear transformation */
+  for (int io = 0; io < dim_Out; io++) {
+    /* Derivative of bias addition */
+    if (compute_gradient) bias_bar[0] += update_bar[io];
 
-void Layer::setExample(MyReal* example_ptr) {}
+    /* Derivative of weight application */
+    for (int ii = 0; ii < dim_In; ii++) {
+      if (compute_gradient)
+        weights_bar[io * dim_In + ii] += state[ii] * update_bar[io];
+      state_bar[ii] += weights[io * dim_In + ii] * update_bar[io];
+    }
+  }
+}
 
-void Layer::setLabel(MyReal* example_ptr) {}
+OpenDenseLayer::OpenDenseLayer(int dimI, int dimO, int Activ, MyReal gammatik)
+    : DenseLayer(-1, dimI, dimO, 1.0, Activ, gammatik, 0.0) {
+  type = OPENDENSE;
+  example = NULL;
+}
 
+OpenDenseLayer::~OpenDenseLayer() {}
 
-DenseLayer::DenseLayer(int     idx,
-                       int     dimI,
-                       int     dimO,
-                       MyReal  deltaT,
-                       int     Activ,
-                       MyReal  gammatik, 
-                       MyReal  gammaddt) : Layer(idx, DENSE, dimI, dimO, 1, dimI*dimO, deltaT, Activ, gammatik, gammaddt)
-{}
-   
-DenseLayer::~DenseLayer() {}
+void OpenDenseLayer::setExample(MyReal *example_ptr) { example = example_ptr; }
 
+void OpenDenseLayer::applyFWD(MyReal *state) {
+  /* affine transformation */
+  for (int io = 0; io < dim_Out; io++) {
+    /* Apply weights */
+    update[io] = vecdot(dim_In, &(weights[io * dim_In]), example);
 
-void DenseLayer::applyFWD(MyReal* state)
-{
-   /* Affine transformation */
-   for (int io = 0; io < dim_Out; io++)
-   {
-      /* Apply weights */
-      update[io] = vecdot(dim_In, &(weights[io*dim_In]), state);
-
-      /* Add bias */
-      update[io] += bias[0];
-   }
-
-      /* Apply step */
-   for (int io = 0; io < dim_Out; io++)
-   {
-      state[io] = state[io] + dt * activation(update[io]);
-   }
+    /* Add bias */
+    update[io] += bias[0];
+  }
+
+  /* Step */
+  for (int io = 0; io < dim_Out; io++) {
+    state[io] = activation(update[io]);
+  }
 }
 
+void OpenDenseLayer::applyBWD(MyReal *state, MyReal *state_bar,
+                              int compute_gradient) {
+  /* Derivative of step */
+  for (int io = 0; io < dim_Out; io++) {
+    /* Recompute affine transformation */
+    update[io] = vecdot(dim_In, &(weights[io * dim_In]), example);
+    update[io] += bias[0];
 
-void DenseLayer::applyBWD(MyReal* state,
-                          MyReal* state_bar,
-                          int     compute_gradient)
-{
+    /* Derivative */
+    update_bar[io] = dactivation(update[io]) * state_bar[io];
+    state_bar[io] = 0.0;
+  }
 
-   /* state_bar is the adjoint of the state variable, it contains the 
-      old time adjoint informationk, and is modified on the way out to
-      contain the update. */
-
-   /* Derivative of the step */
-   for (int io = 0; io < dim_Out; io++)
-   {
-      /* Recompute affine transformation */
-        update[io]  = vecdot(dim_In, &(weights[io*dim_In]), state);
-        update[io] += bias[0];
-        
-        /* Derivative: This is the update from old time */
-        update_bar[io] = dt * dactivation(update[io]) * state_bar[io];
-   }
-
-    /* Derivative of linear transformation */
-   for (int io = 0; io < dim_Out; io++)
-   {
+  /* Derivative of affine transformation */
+  if (compute_gradient) {
+    for (int io = 0; io < dim_Out; io++) {
       /* Derivative of bias addition */
-      if (compute_gradient) bias_bar[0] += update_bar[io];
+      bias_bar[0] += update_bar[io];
 
       /* Derivative of weight application */
-      for (int ii = 0; ii < dim_In; ii++)
-      {
-         if (compute_gradient) weights_bar[io*dim_In + ii] += state[ii] * update_bar[io];
-         state_bar[ii] += weights[io*dim_In + ii] * update_bar[io]; 
+      for (int ii = 0; ii < dim_In; ii++) {
+        weights_bar[io * dim_In + ii] += example[ii] * update_bar[io];
       }
-   }
+    }
+  }
 }
 
-
-OpenDenseLayer::OpenDenseLayer(int     dimI,
-                               int     dimO,
-                               int     Activ,
-                               MyReal  gammatik) : DenseLayer(-1, dimI, dimO, 1.0, Activ, gammatik, 0.0) 
-{
-    type    = OPENDENSE;
-    example = NULL;
+OpenExpandZero::OpenExpandZero(int dimI, int dimO)
+    : Layer(-1, OPENZERO, dimI, dimO, 0, 0, 1.0, -1, 0.0, 0.0) {
+  /* this layer doesn't have any design variables. */
+  ndesign = 0;
+  nweights = 0;
 }
 
-OpenDenseLayer::~OpenDenseLayer(){}
+OpenExpandZero::~OpenExpandZero() {}
 
-void OpenDenseLayer::setExample(MyReal* example_ptr)
-{
-    example = example_ptr;
-}
+void OpenExpandZero::setExample(MyReal *example_ptr) { example = example_ptr; }
 
-void OpenDenseLayer::applyFWD(MyReal* state) 
-{
-   /* affine transformation */
-   for (int io = 0; io < dim_Out; io++)
-   {
-      /* Apply weights */
-      update[io] = vecdot(dim_In, &(weights[io*dim_In]), example);
-
-      /* Add bias */
-      update[io] += bias[0];
-   }
-
-   /* Step */
-   for (int io = 0; io < dim_Out; io++)
-   {
-      state[io] = activation(update[io]);
-   }
+void OpenExpandZero::applyFWD(MyReal *state) {
+  for (int ii = 0; ii < dim_In; ii++) {
+    state[ii] = example[ii];
+  }
+  for (int io = dim_In; io < dim_Out; io++) {
+    state[io] = 0.0;
+  }
 }
 
-void OpenDenseLayer::applyBWD(MyReal* state,
-                              MyReal* state_bar,
-                              int     compute_gradient)
-{
-   /* Derivative of step */
-   for (int io = 0; io < dim_Out; io++)
-   {
-      /* Recompute affine transformation */
-      update[io]  = vecdot(dim_In, &(weights[io*dim_In]), example);
-      update[io] += bias[0];
-
-      /* Derivative */
-      update_bar[io] = dactivation(update[io]) * state_bar[io];
-      state_bar[io] = 0.0;
-   }
-
-   /* Derivative of affine transformation */
-   if (compute_gradient) 
-   {
-       for (int io = 0; io < dim_Out; io++)
-       {
-          /* Derivative of bias addition */
-          bias_bar[0] += update_bar[io];
-
-          /* Derivative of weight application */
-          for (int ii = 0; ii < dim_In; ii++)
-          {
-             weights_bar[io*dim_In + ii] += example[ii] * update_bar[io];
-          }
-       }
-   }
-}                
-
-
-OpenExpandZero::OpenExpandZero(int dimI,
-                               int dimO) : Layer(-1, OPENZERO, dimI, dimO, 0, 0, 1.0, -1, 0.0, 0.0)
-{
-    /* this layer doesn't have any design variables. */ 
-    ndesign = 0;
-    nweights = 0;
+void OpenExpandZero::applyBWD(MyReal *state, MyReal *state_bar,
+                              int compute_gradient) {
+  for (int ii = 0; ii < dim_Out; ii++) {
+    state_bar[ii] = 0.0;
+  }
 }
 
+OpenConvLayer::OpenConvLayer(int dimI, int dimO)
+    : Layer(-1, OPENCONV, dimI, dimO, 0, 0, 1.0, -1, 0.0, 0.0) {
+  /* this layer doesn't have any design variables. */
+  ndesign = 0;
+  nweights = 0;
+  dim_Bias = 0;
 
-OpenExpandZero::~OpenExpandZero(){}
-
+  nconv = dim_Out / dim_In;
 
-void OpenExpandZero::setExample(MyReal* example_ptr)
-{
-    example = example_ptr;
+  assert(nconv * dim_In == dim_Out);
 }
 
+OpenConvLayer::~OpenConvLayer() {}
 
+void OpenConvLayer::setExample(MyReal *example_ptr) { example = example_ptr; }
 
-void OpenExpandZero::applyFWD(MyReal* state)
-{
-   for (int ii = 0; ii < dim_In; ii++)
-   {
-      state[ii] = example[ii];
-   }
-   for (int io = dim_In; io < dim_Out; io++)
-   {
-      state[io] = 0.0;
-   }
-}                           
-
-void OpenExpandZero::applyBWD(MyReal* state,
-                              MyReal* state_bar,
-                              int     compute_gradient)
-{
-   for (int ii = 0; ii < dim_Out; ii++)
-   {
-      state_bar[ii] = 0.0;
-   }
-}                           
-
-
-OpenConvLayer::OpenConvLayer(int dimI,
-                             int dimO) : Layer(-1, OPENCONV, dimI, dimO, 0, 0, 1.0, -1, 0.0, 0.0)
-{
-    /* this layer doesn't have any design variables. */ 
-    ndesign = 0;
-    nweights = 0;
-    dim_Bias = 0;
-    
-    nconv = dim_Out/dim_In;
-
-    assert(nconv*dim_In == dim_Out);
+void OpenConvLayer::applyFWD(MyReal *state) {
+  // replicate the image data
+  for (int img = 0; img < nconv; img++) {
+    for (int ii = 0; ii < dim_In; ii++) {
+      state[ii + dim_In * img] = example[ii];
+    }
+  }
 }
 
-
-OpenConvLayer::~OpenConvLayer(){}
-
-
-void OpenConvLayer::setExample(MyReal* example_ptr)
-{
-    example = example_ptr;
+void OpenConvLayer::applyBWD(MyReal *state, MyReal *state_bar,
+                             int compute_gradient) {
+  for (int ii = 0; ii < dim_Out; ii++) {
+    state_bar[ii] = 0.0;
+  }
 }
 
-
-void OpenConvLayer::applyFWD(MyReal* state)
-{
-   // replicate the image data
-   for(int img = 0; img < nconv; img++) 
-   {
-      for (int ii = 0; ii < dim_In; ii++)
-      {
-         state[ii+dim_In*img] = example[ii];
-      }
-   }
-}                           
-
-void OpenConvLayer::applyBWD(MyReal* state,
-                             MyReal* state_bar,
-			                 int     compute_gradient)
-{
-   for (int ii = 0; ii < dim_Out; ii++)
-   {
-      state_bar[ii] = 0.0;
-   }
-}                           
-
-
-OpenConvLayerMNIST::OpenConvLayerMNIST(int dimI, int dimO) : OpenConvLayer(dimI, dimO) 
-{
-   type = OPENCONVMNIST;
+OpenConvLayerMNIST::OpenConvLayerMNIST(int dimI, int dimO)
+    : OpenConvLayer(dimI, dimO) {
+  type = OPENCONVMNIST;
 }
 
+OpenConvLayerMNIST::~OpenConvLayerMNIST() {}
+
+void OpenConvLayerMNIST::applyFWD(MyReal *state) {
+  // replicate the image data
+  for (int img = 0; img < nconv; img++) {
+    for (int ii = 0; ii < dim_In; ii++) {
+      // The MNIST data is integer from [0, 255], so we rescale it to floats
+      // over the range[0,6]
+      //
+      // Also, rescale tanh so that it appropriately activates over the x-range
+      // of [0,6]
+      state[ii + dim_In * img] = tanh((6.0 * example[ii] / 255.0) - 3.0) + 1;
+    }
+  }
+}
 
-OpenConvLayerMNIST::~OpenConvLayerMNIST(){}
-
+void OpenConvLayerMNIST::applyBWD(MyReal *state, MyReal *state_bar,
+                                  int compute_gradient) {
+  // Derivative of step
+  for (int img = 0; img < nconv; img++) {
+    for (int ii = 0; ii < dim_In; ii++) {
+      state_bar[ii + dim_In * img] =
+          (1.0 - pow(tanh(example[ii]), 2)) * state_bar[ii + dim_In * img];
+      // state_bar[ii + dim_In*img] = 0.0;
+    }
+  }
 
-void OpenConvLayerMNIST::applyFWD(MyReal* state)
-{
-   // replicate the image data
-   for(int img = 0; img < nconv; img++)
-   {
-      for (int ii = 0; ii < dim_In; ii++)
-      {
-         // The MNIST data is integer from [0, 255], so we rescale it to floats
-         // over the range[0,6]
-         //
-         // Also, rescale tanh so that it appropriately activates over the x-range of [0,6]
-         state[ii+dim_In*img] = tanh( (6.0*example[ii]/255.0) - 3.0) + 1;
-      }
-   }
+  // Derivative of affine transformation
+  // This is "0" because we have no bias or weights
 }
 
-void OpenConvLayerMNIST::applyBWD(MyReal* state,
-                                  MyReal* state_bar,
-				  int     compute_gradient)
-{
-   // Derivative of step
-   for(int img = 0; img < nconv; img++)
-   {
-      for (int ii = 0; ii < dim_In; ii++)
-      {
-         state_bar[ii + dim_In*img] =  (1.0 - pow(tanh(example[ii]),2))*state_bar[ii + dim_In*img];
-         // state_bar[ii + dim_In*img] = 0.0;
-      }
-   }
-
-   // Derivative of affine transformation
-   // This is "0" because we have no bias or weights
+ClassificationLayer::ClassificationLayer(int idx, int dimI, int dimO,
+                                         MyReal gammatik)
+    : Layer(idx, CLASSIFICATION, dimI, dimO, dimO, dimI * dimO, 1.0, -1, 0.0,
+            0.0) {
+  gamma_tik = gammatik;
+  /* Allocate the probability vector */
+  probability = new MyReal[dimO];
 }
 
+ClassificationLayer::~ClassificationLayer() { delete[] probability; }
 
+void ClassificationLayer::setLabel(MyReal *label_ptr) { label = label_ptr; }
 
-ClassificationLayer::ClassificationLayer(int    idx,
-                                         int    dimI,
-                                         int    dimO,
-                                         MyReal gammatik) : Layer(idx, CLASSIFICATION, dimI, dimO, dimO, dimI*dimO, 1.0, -1, 0.0, 0.0)
-{
-    gamma_tik = gammatik;
-    /* Allocate the probability vector */
-    probability = new MyReal[dimO];
-}
+void ClassificationLayer::applyFWD(MyReal *state) {
+  /* Compute affine transformation */
+  for (int io = 0; io < dim_Out; io++) {
+    /* Apply weights */
+    update[io] = vecdot(dim_In, &(weights[io * dim_In]), state);
+    /* Add bias */
+    update[io] += bias[io];
+  }
 
-ClassificationLayer::~ClassificationLayer()
-{
-    delete [] probability;
-}
+  /* Data normalization y - max(y) (needed for stable softmax evaluation */
+  normalize(update);
 
+  if (dim_In < dim_Out) {
+    printf(
+        "Error: nchannels < nclasses. Implementation of classification "
+        "layer doesn't support this setting. Change! \n");
+    exit(1);
+  }
 
-void ClassificationLayer::setLabel(MyReal* label_ptr)
-{
-   label = label_ptr;
+  /* Apply step */
+  for (int io = 0; io < dim_Out; io++) {
+    state[io] = update[io];
+  }
+  /* Set remaining to zero */
+  for (int ii = dim_Out; ii < dim_In; ii++) {
+    state[ii] = 0.0;
+  }
 }
 
+void ClassificationLayer::applyBWD(MyReal *state, MyReal *state_bar,
+                                   int compute_gradient) {
+  /* Recompute affine transformation */
+  for (int io = 0; io < dim_Out; io++) {
+    update[io] = vecdot(dim_In, &(weights[io * dim_In]), state);
+    update[io] += bias[io];
+  }
 
-void ClassificationLayer::applyFWD(MyReal* state)
-{
-    /* Compute affine transformation */
-    for (int io = 0; io < dim_Out; io++)
-    {
-        /* Apply weights */
-        update[io] = vecdot(dim_In, &(weights[io*dim_In]), state);
-        /* Add bias */
-        update[io] += bias[io];
-    }
+  /* Derivative of step */
+  for (int ii = dim_Out; ii < dim_In; ii++) {
+    state_bar[ii] = 0.0;
+  }
+  for (int io = 0; io < dim_Out; io++) {
+    update_bar[io] = state_bar[io];
+    state_bar[io] = 0.0;
+  }
 
-    /* Data normalization y - max(y) (needed for stable softmax evaluation */
-    normalize(update);
+  /* Derivative of the normalization */
+  normalize_diff(update, update_bar);
 
-    if (dim_In < dim_Out)
-    {
-        printf("Error: nchannels < nclasses. Implementation of classification layer doesn't support this setting. Change! \n");
-        exit(1);
-    }
+  /* Derivatie of affine transformation */
+  for (int io = 0; io < dim_Out; io++) {
+    /* Derivative of bias addition */
+    if (compute_gradient) bias_bar[io] += update_bar[io];
 
-    /* Apply step */
-    for (int io = 0; io < dim_Out; io++)
-    {
-        state[io] = update[io];
-    }
-    /* Set remaining to zero */
-    for (int ii = dim_Out; ii < dim_In; ii++)
-    {
-        state[ii] = 0.0;
+    /* Derivative of weight application */
+    for (int ii = 0; ii < dim_In; ii++) {
+      if (compute_gradient)
+        weights_bar[io * dim_In + ii] += state[ii] * update_bar[io];
+      state_bar[ii] += weights[io * dim_In + ii] * update_bar[io];
     }
-}                           
-      
-void ClassificationLayer::applyBWD(MyReal* state,
-                                   MyReal* state_bar,
-                                   int     compute_gradient)
-{
-    /* Recompute affine transformation */
-    for (int io = 0; io < dim_Out; io++)
-    {
-        update[io] = vecdot(dim_In, &(weights[io*dim_In]), state);
-        update[io] += bias[io];
-    }        
+  }
+}
 
+void ClassificationLayer::normalize(MyReal *data) {
+  /* Find maximum value */
+  MyReal max = vecmax(dim_Out, data);
+  /* Shift the data vector */
+  for (int io = 0; io < dim_Out; io++) {
+    data[io] = data[io] - max;
+  }
+}
 
-    /* Derivative of step */
-    for (int ii = dim_Out; ii < dim_In; ii++)
-    {
-        state_bar[ii] = 0.0;
-    }
-    for (int io = 0; io < dim_Out; io++)
-    {
-        update_bar[io] = state_bar[io];
-        state_bar[io]  = 0.0;
-    }
-    
-    /* Derivative of the normalization */
-    normalize_diff(update, update_bar);
-
-    /* Derivatie of affine transformation */
-    for (int io = 0; io < dim_Out; io++)
-    {
-       /* Derivative of bias addition */
-        if (compute_gradient) bias_bar[io] += update_bar[io];
-  
-        /* Derivative of weight application */
-        for (int ii = 0; ii < dim_In; ii++)
-        {
-           if (compute_gradient) weights_bar[io*dim_In + ii] += state[ii] * update_bar[io];
-           state_bar[ii] += weights[io*dim_In + ii] * update_bar[io];
-        }
-    }   
+void ClassificationLayer::normalize_diff(MyReal *data, MyReal *data_bar) {
+  MyReal max_b = 0.0;
+  /* Derivative of the shift */
+  for (int io = 0; io < dim_Out; io++) {
+    max_b -= data_bar[io];
+  }
+  /* Derivative of the vecmax */
+  int i_max = argvecmax(dim_Out, data);
+  data_bar[i_max] += max_b;
 }
 
+MyReal ClassificationLayer::crossEntropy(MyReal *data_Out) {
+  MyReal label_pr, exp_sum;
+  MyReal CELoss;
 
-void ClassificationLayer::normalize(MyReal* data)
-{
+  /* Label projection */
+  label_pr = vecdot(dim_Out, label, data_Out);
 
-   /* Find maximum value */
-   MyReal max = vecmax(dim_Out, data);
-   /* Shift the data vector */
-   for (int io = 0; io < dim_Out; io++)
-   {
-       data[io] = data[io] - max;
-   }
-}   
-
-void ClassificationLayer::normalize_diff(MyReal* data, 
-                                         MyReal* data_bar)
-{
-    MyReal max_b = 0.0;
-    /* Derivative of the shift */
-    for (int io = 0; io < dim_Out; io++)
-    {
-        max_b -= data_bar[io];
-    }
-    /* Derivative of the vecmax */
-    int i_max = argvecmax(dim_Out, data);
-    data_bar[i_max] += max_b;
-}                                     
+  /* Compute sum_i (exp(x_i)) */
+  exp_sum = 0.0;
+  for (int io = 0; io < dim_Out; io++) {
+    exp_sum += exp(data_Out[io]);
+  }
 
-MyReal ClassificationLayer::crossEntropy(MyReal *data_Out) 
-{
-   MyReal label_pr, exp_sum;
-   MyReal CELoss;
+  /* Cross entropy loss function */
+  CELoss = -label_pr + log(exp_sum);
 
-   /* Label projection */
-   label_pr = vecdot(dim_Out, label, data_Out);
+  return CELoss;
+}
 
-   /* Compute sum_i (exp(x_i)) */
-   exp_sum = 0.0;
-   for (int io = 0; io < dim_Out; io++)
-   {
-      exp_sum += exp(data_Out[io]);
-   }
+void ClassificationLayer::crossEntropy_diff(MyReal *data_Out,
+                                            MyReal *data_Out_bar,
+                                            MyReal loss_bar) {
+  MyReal exp_sum, exp_sum_bar;
+  MyReal label_pr_bar = -loss_bar;
+
+  /* Recompute exp_sum */
+  exp_sum = 0.0;
+  for (int io = 0; io < dim_Out; io++) {
+    exp_sum += exp(data_Out[io]);
+  }
 
-   /* Cross entropy loss function */
-   CELoss = - label_pr + log(exp_sum);
+  /* derivative of log(exp_sum) */
+  exp_sum_bar = 1. / exp_sum * loss_bar;
+  for (int io = 0; io < dim_Out; io++) {
+    data_Out_bar[io] = exp(data_Out[io]) * exp_sum_bar;
+  }
 
-   return CELoss;
+  /* Derivative of vecdot */
+  for (int io = 0; io < dim_Out; io++) {
+    data_Out_bar[io] += label[io] * label_pr_bar;
+  }
 }
-      
-      
-void ClassificationLayer::crossEntropy_diff(MyReal *data_Out, 
-                                            MyReal *data_Out_bar,
-                                            MyReal  loss_bar)
-{
-    MyReal exp_sum, exp_sum_bar;
-    MyReal label_pr_bar = - loss_bar;
-
-    /* Recompute exp_sum */
-    exp_sum = 0.0;
-    for (int io = 0; io < dim_Out; io++)
-    {
-       exp_sum += exp(data_Out[io]);
-    }
 
-    /* derivative of log(exp_sum) */
-    exp_sum_bar  = 1./exp_sum * loss_bar;
-    for (int io = 0; io < dim_Out; io++)
-    {
-        data_Out_bar[io] = exp(data_Out[io]) * exp_sum_bar;
-    }
+int ClassificationLayer::prediction(MyReal *data_Out, int *class_id_ptr) {
+  MyReal exp_sum, max;
+  int class_id = -1;
+  int success = 0;
 
-    /* Derivative of vecdot */
-    for (int io = 0; io < dim_Out; io++)
-    {
-        data_Out_bar[io] +=  label[io] * label_pr_bar;
-    }
-}                              
+  /* Compute sum_i (exp(x_i)) */
+  max = -1.0;
+  exp_sum = 0.0;
+  for (int io = 0; io < dim_Out; io++) {
+    exp_sum += exp(data_Out[io]);
+  }
 
+  for (int io = 0; io < dim_Out; io++) {
+    /* Compute class probabilities (Softmax) */
+    probability[io] = exp(data_Out[io]) / exp_sum;
 
-int ClassificationLayer::prediction(MyReal* data_Out, 
-                                    int*    class_id_ptr)
-{
-   MyReal exp_sum, max;
-   int    class_id = -1;
-   int    success = 0;
-
-   /* Compute sum_i (exp(x_i)) */
-   max = -1.0;
-   exp_sum = 0.0;
-   for (int io = 0; io < dim_Out; io++)
-   {
-      exp_sum += exp(data_Out[io]);
-   }
-
-   for (int io = 0; io < dim_Out; io++)
-   {
-       /* Compute class probabilities (Softmax) */
-       probability[io] = exp(data_Out[io]) / exp_sum;
-
-      /* Predicted class is the one with maximum probability */ 
-      if (probability[io] > max)
-      {
-          max      = probability[io]; 
-          class_id = io; 
-      }
-   }
+    /* Predicted class is the one with maximum probability */
+    if (probability[io] > max) {
+      max = probability[io];
+      class_id = io;
+    }
+  }
 
   /* Test for successful prediction */
-  if ( label[class_id] > 0.99 )  
-  {
-      success = 1;
+  if (label[class_id] > 0.99) {
+    success = 1;
   }
-   
-   /* return */
-   *class_id_ptr = class_id;
-   return success;
-}
-
-  
-
-MyReal Layer::ReLu_act(MyReal x)
-{
-    MyReal max = 0.0;
-
-    if ( x > 0.0 ) max = x;
 
-    return max;
+  /* return */
+  *class_id_ptr = class_id;
+  return success;
 }
 
+MyReal Layer::ReLu_act(MyReal x) {
+  MyReal max = 0.0;
 
-MyReal Layer::dReLu_act(MyReal x)
-{
-    MyReal diff;
-    if (x >= 0.0) diff = 1.0;
-    else         diff = 0.0;
+  if (x > 0.0) max = x;
 
-    return diff;
+  return max;
 }
 
+MyReal Layer::dReLu_act(MyReal x) {
+  MyReal diff;
+  if (x >= 0.0)
+    diff = 1.0;
+  else
+    diff = 0.0;
 
-MyReal Layer::SmoothReLu_act(MyReal x)
-{
-    /* range of quadratic interpolation */
-    MyReal eta = 0.1;
-    /* Coefficients of quadratic interpolation */
-    MyReal a   = 1./(4.*eta);
-    MyReal b   = 1./2.;
-    MyReal c   = eta / 4.;
-
-    if (-eta < x && x < eta)
-    {
-        /* Quadratic Activation */
-        return a*pow(x,2) + b*x + c;
-    }
-    else
-    {
-        /* ReLu Activation */
-        return Layer::ReLu_act(x);
-    }
+  return diff;
 }
 
-MyReal Layer::dSmoothReLu_act(MyReal x)
-{
-    /* range of quadratic interpolation */
-    MyReal eta = 0.1;
-    /* Coefficients of quadratic interpolation */
-    MyReal a   = 1./(4.*eta);
-    MyReal b   = 1./2.;
-
-    if (-eta < x && x < eta)
-    {
-        return 2.*a*x + b;
-    }
-    else
-    {
-        return Layer::dReLu_act(x);
-    }
-
+MyReal Layer::SmoothReLu_act(MyReal x) {
+  /* range of quadratic interpolation */
+  MyReal eta = 0.1;
+  /* Coefficients of quadratic interpolation */
+  MyReal a = 1. / (4. * eta);
+  MyReal b = 1. / 2.;
+  MyReal c = eta / 4.;
+
+  if (-eta < x && x < eta) {
+    /* Quadratic Activation */
+    return a * pow(x, 2) + b * x + c;
+  } else {
+    /* ReLu Activation */
+    return Layer::ReLu_act(x);
+  }
 }
 
-
-MyReal Layer::tanh_act(MyReal x)
-{
-    return tanh(x);
+MyReal Layer::dSmoothReLu_act(MyReal x) {
+  /* range of quadratic interpolation */
+  MyReal eta = 0.1;
+  /* Coefficients of quadratic interpolation */
+  MyReal a = 1. / (4. * eta);
+  MyReal b = 1. / 2.;
+
+  if (-eta < x && x < eta) {
+    return 2. * a * x + b;
+  } else {
+    return Layer::dReLu_act(x);
+  }
 }
 
-MyReal Layer::dtanh_act(MyReal x)
-{
-    MyReal diff = 1.0 - pow(tanh(x),2);
+MyReal Layer::tanh_act(MyReal x) { return tanh(x); }
+
+MyReal Layer::dtanh_act(MyReal x) {
+  MyReal diff = 1.0 - pow(tanh(x), 2);
 
-    return diff;
+  return diff;
 }
 
-ConvLayer::ConvLayer(int     idx,
-                     int     dimI,
-                     int     dimO,
-                     int     csize_in,
-                     int     nconv_in,
-                     MyReal  deltaT,
-                     int     Activ,
-                     MyReal  Gammatik,
-		     MyReal  Gammaddt) : Layer(idx, CONVOLUTION, 
-                                                       dimI, dimO, dimI/nconv_in, csize_in*csize_in*nconv_in*nconv_in,
-                                                       deltaT, Activ, Gammatik, Gammaddt)
-{
-   csize = csize_in;
-   nconv = nconv_in;
+ConvLayer::ConvLayer(int idx, int dimI, int dimO, int csize_in, int nconv_in,
+                     MyReal deltaT, int Activ, MyReal Gammatik, MyReal Gammaddt)
+    : Layer(idx, CONVOLUTION, dimI, dimO, dimI / nconv_in,
+            csize_in * csize_in * nconv_in * nconv_in, deltaT, Activ, Gammatik,
+            Gammaddt) {
+  csize = csize_in;
+  nconv = nconv_in;
 
-   fcsize = floor(csize/2.0);
-   csize2 = csize*csize;
+  fcsize = floor(csize / 2.0);
+  csize2 = csize * csize;
 
-   img_size = dim_In / nconv;
-   img_size_sqrt = round(sqrt(img_size));
+  img_size = dim_In / nconv;
+  img_size_sqrt = round(sqrt(img_size));
 
-   // nweights = csize*csize*nconv*nconv;
-   // ndesign = nweights + dimI/nconv; // must add to account for the bias
+  // nweights = csize*csize*nconv*nconv;
+  // ndesign = nweights + dimI/nconv; // must add to account for the bias
 }
-   
+
 ConvLayer::~ConvLayer() {}
 
-/** 
+/**
  * This method is designed to be used only in the applyBWD. It computes the
  * derivative of the objective with respect to the weights. In particular
  * if you objective is $g$ and your kernel operator has value tau at index
@@ -943,316 +769,283 @@ ConvLayer::~ConvLayer() {}
  *
  * d_tau [ g] = \sum_{image j,k} tau state_{j+a,k+b} * update_bar_{j,k}
  *
- * Note that we assume that update_bar is 
+ * Note that we assume that update_bar is
  *
  *   update_bar = dt * dactivation * state_bar
  *
  * Where state_bar _must_ be at the old time. Note that the adjoint variable
  * state_bar carries withit all the information of the objective derivative.
  */
-MyReal ConvLayer::
-updateWeightDerivative(MyReal* state, 
-		       MyReal* update_bar, 
-                       int output_conv,  /* output convolution */
-                       int j,            /* pixel index */
-                       int k)            /* pixel index */
+MyReal ConvLayer::updateWeightDerivative(
+    MyReal *state, MyReal *update_bar, int output_conv, /* output convolution */
+    int j,                                              /* pixel index */
+    int k)                                              /* pixel index */
 {
-   MyReal val = 0;
-
-   int fcsize_s_l = -fcsize;
-   int fcsize_s_u =  fcsize;
-   int fcsize_t_l = -fcsize;
-   int fcsize_t_u =  fcsize;
-   int fcsize_s_l_adj = -fcsize;
-   int fcsize_t_l_adj = -fcsize;
-   
-   if((j+fcsize_s_l) < 0) fcsize_s_l = -j;
-   if((k+fcsize_t_l) < 0) fcsize_t_l = -k;
-   if((j+fcsize_s_u) >= img_size_sqrt) fcsize_s_u = img_size_sqrt-j-1;
-   if((k+fcsize_t_u) >= img_size_sqrt) fcsize_t_u = img_size_sqrt-k-1;
-
-   if((j-fcsize_s_l_adj) >= img_size_sqrt) fcsize_s_l_adj = -(img_size_sqrt-j-1);
-   if((k-fcsize_t_l_adj) >= img_size_sqrt) fcsize_t_l_adj = -(img_size_sqrt-k-1);
-
-   const int fcsize_s = fcsize_s_u-fcsize_s_l;
-   const int fcsize_t = fcsize_t_u-fcsize_t_l;
-
-   int center_index = j*img_size_sqrt+k;
-   int input_wght_idx = output_conv*csize2*nconv + fcsize*(csize+1);
-
-   int offset   =  fcsize_t_l + img_size_sqrt*fcsize_s_l;
-   int wght_idx =  fcsize_t_l+         csize*fcsize_s_l;
-
-   int offset_adj   = - fcsize_t_l_adj - img_size_sqrt*fcsize_s_l_adj;
-   int wght_idx_adj =   fcsize_t_l_adj +         csize*fcsize_s_l_adj;
-
-   for(int input_image = 0; input_image < nconv; input_image++, 
-                                                 center_index+= img_size,
-                                                 input_wght_idx+=csize2)  
-   {
-      MyReal update_val = update_bar[center_index]; 
-
-      MyReal * state_base       = state      + center_index+ offset;
-      MyReal * weights_bar_base = weights_bar + input_wght_idx + wght_idx;
-
-      MyReal * update_base  = update_bar  + center_index + offset_adj; 
-      MyReal * weights_base = weights     + input_wght_idx + wght_idx_adj;
-
-      // weight derivative
-      for(int s = 0; s <= fcsize_s; s++, 
-                                    state_base+=img_size_sqrt, 
-                                    weights_bar_base+=csize,
-                                    update_base-=img_size_sqrt, 
-                                    weights_base+=csize)
-      {
-        MyReal * state_local       = state_base;
-        MyReal * weights_bar_local = weights_bar_base;
-
-        MyReal * update_local  = update_base;
-        MyReal * weights_local = weights_base;
-
-        for(int t = 0; t <= fcsize_t; t++,
-                                      state_local++,
-                                      weights_bar_local++,
-                                      update_local--,
-                                      weights_local++)
-        {
-          (*weights_bar_local) += update_val*(*state_local);
-          val += (*update_local)*(*weights_local);
-        }
+  MyReal val = 0;
+
+  int fcsize_s_l = -fcsize;
+  int fcsize_s_u = fcsize;
+  int fcsize_t_l = -fcsize;
+  int fcsize_t_u = fcsize;
+  int fcsize_s_l_adj = -fcsize;
+  int fcsize_t_l_adj = -fcsize;
+
+  if ((j + fcsize_s_l) < 0) fcsize_s_l = -j;
+  if ((k + fcsize_t_l) < 0) fcsize_t_l = -k;
+  if ((j + fcsize_s_u) >= img_size_sqrt) fcsize_s_u = img_size_sqrt - j - 1;
+  if ((k + fcsize_t_u) >= img_size_sqrt) fcsize_t_u = img_size_sqrt - k - 1;
+
+  if ((j - fcsize_s_l_adj) >= img_size_sqrt)
+    fcsize_s_l_adj = -(img_size_sqrt - j - 1);
+  if ((k - fcsize_t_l_adj) >= img_size_sqrt)
+    fcsize_t_l_adj = -(img_size_sqrt - k - 1);
+
+  const int fcsize_s = fcsize_s_u - fcsize_s_l;
+  const int fcsize_t = fcsize_t_u - fcsize_t_l;
+
+  int center_index = j * img_size_sqrt + k;
+  int input_wght_idx = output_conv * csize2 * nconv + fcsize * (csize + 1);
+
+  int offset = fcsize_t_l + img_size_sqrt * fcsize_s_l;
+  int wght_idx = fcsize_t_l + csize * fcsize_s_l;
+
+  int offset_adj = -fcsize_t_l_adj - img_size_sqrt * fcsize_s_l_adj;
+  int wght_idx_adj = fcsize_t_l_adj + csize * fcsize_s_l_adj;
+
+  for (int input_image = 0; input_image < nconv;
+       input_image++, center_index += img_size, input_wght_idx += csize2) {
+    MyReal update_val = update_bar[center_index];
+
+    MyReal *state_base = state + center_index + offset;
+    MyReal *weights_bar_base = weights_bar + input_wght_idx + wght_idx;
+
+    MyReal *update_base = update_bar + center_index + offset_adj;
+    MyReal *weights_base = weights + input_wght_idx + wght_idx_adj;
+
+    // weight derivative
+    for (int s = 0; s <= fcsize_s; s++, state_base += img_size_sqrt,
+             weights_bar_base += csize, update_base -= img_size_sqrt,
+             weights_base += csize) {
+      MyReal *state_local = state_base;
+      MyReal *weights_bar_local = weights_bar_base;
+
+      MyReal *update_local = update_base;
+      MyReal *weights_local = weights_base;
+
+      for (int t = 0; t <= fcsize_t; t++, state_local++, weights_bar_local++,
+               update_local--, weights_local++) {
+        (*weights_bar_local) += update_val * (*state_local);
+        val += (*update_local) * (*weights_local);
       }
-   }
+    }
+  }
 
-   return val;
+  return val;
 }
 
-MyReal ConvLayer::apply_conv(MyReal* state, 
-                             int output_conv,  /* output convolution */
-                             int j,            /* pixel index */
-                             int k)            /* pixel index */
+MyReal ConvLayer::apply_conv(MyReal *state,
+                             int output_conv, /* output convolution */
+                             int j,           /* pixel index */
+                             int k)           /* pixel index */
 {
-   MyReal val = 0.0;
-
-   int fcsize_s_l = -fcsize;
-   int fcsize_s_u =  fcsize;
-   int fcsize_t_l = -fcsize;
-   int fcsize_t_u =  fcsize;
-   
-   // protect indexing at image boundaries
-   if((j+fcsize_s_l) < 0) fcsize_s_l = -j;
-   if((k+fcsize_t_l) < 0) fcsize_t_l = -k;
-   if((j+fcsize_s_u) >= img_size_sqrt) fcsize_s_u = img_size_sqrt-j-1;
-   if((k+fcsize_t_u) >= img_size_sqrt) fcsize_t_u = img_size_sqrt-k-1;
-
-   const int fcsize_s = fcsize_s_u-fcsize_s_l;
-   const int fcsize_t = fcsize_t_u-fcsize_t_l;
-
-   int center_index   = j*img_size_sqrt+k+fcsize_t_l + img_size_sqrt*fcsize_s_l;
-   int input_wght_idx = output_conv*csize2*nconv +fcsize*(csize+1) + fcsize_t_l+csize*fcsize_s_l;
-
-   /* loop over all the images */
-   for(int input_image = 0; input_image < nconv; input_image++, 
-                                                 center_index+= img_size,
-                                                 input_wght_idx+=csize2)  
-   {
-      MyReal * state_base   = state     + center_index; 
-      MyReal * weights_base = weights   + input_wght_idx;
-
-      for(int s = 0; s <= fcsize_s; s++, state_base+=img_size_sqrt, 
-                                         weights_base+=csize)
-      {
-         MyReal * state_local = state_base;
-         MyReal * weights_local = weights_base;
-
-         for(int t = 0; t <= fcsize_t; t++,state_local++,weights_local++)
-         {
-            val += (*state_local)*(*weights_local);
-         }
+  MyReal val = 0.0;
+
+  int fcsize_s_l = -fcsize;
+  int fcsize_s_u = fcsize;
+  int fcsize_t_l = -fcsize;
+  int fcsize_t_u = fcsize;
+
+  // protect indexing at image boundaries
+  if ((j + fcsize_s_l) < 0) fcsize_s_l = -j;
+  if ((k + fcsize_t_l) < 0) fcsize_t_l = -k;
+  if ((j + fcsize_s_u) >= img_size_sqrt) fcsize_s_u = img_size_sqrt - j - 1;
+  if ((k + fcsize_t_u) >= img_size_sqrt) fcsize_t_u = img_size_sqrt - k - 1;
+
+  const int fcsize_s = fcsize_s_u - fcsize_s_l;
+  const int fcsize_t = fcsize_t_u - fcsize_t_l;
+
+  int center_index =
+      j * img_size_sqrt + k + fcsize_t_l + img_size_sqrt * fcsize_s_l;
+  int input_wght_idx = output_conv * csize2 * nconv + fcsize * (csize + 1) +
+                       fcsize_t_l + csize * fcsize_s_l;
+
+  /* loop over all the images */
+  for (int input_image = 0; input_image < nconv;
+       input_image++, center_index += img_size, input_wght_idx += csize2) {
+    MyReal *state_base = state + center_index;
+    MyReal *weights_base = weights + input_wght_idx;
+
+    for (int s = 0; s <= fcsize_s;
+         s++, state_base += img_size_sqrt, weights_base += csize) {
+      MyReal *state_local = state_base;
+      MyReal *weights_local = weights_base;
+
+      for (int t = 0; t <= fcsize_t; t++, state_local++, weights_local++) {
+        val += (*state_local) * (*weights_local);
       }
-   }
+    }
+  }
 
-   return val;
+  return val;
 }
 
-MyReal ConvLayer::apply_conv_trans(MyReal* state, 
-                             int output_conv,  /* output convolution */
-                             int j,            /* pixel index */
-                             int k)            /* pixel index */
+MyReal ConvLayer::apply_conv_trans(MyReal *state,
+                                   int output_conv, /* output convolution */
+                                   int j,           /* pixel index */
+                                   int k)           /* pixel index */
 {
-   MyReal val = 0.0;
-
-   int fcsize_s_l = -fcsize;
-   int fcsize_s_u =  fcsize;
-   int fcsize_t_l = -fcsize;
-   int fcsize_t_u =  fcsize;
-   
-   if((j-fcsize_s_u) < 0) fcsize_s_u = j;
-   if((k-fcsize_t_u) < 0) fcsize_t_u = k;
-   if((j-fcsize_s_l) >= img_size_sqrt) fcsize_s_l = -(img_size_sqrt-j-1);
-   if((k-fcsize_t_l) >= img_size_sqrt) fcsize_t_l = -(img_size_sqrt-k-1);
-
-   const int fcsize_s = fcsize_s_u-fcsize_s_l;
-   const int fcsize_t = fcsize_t_u-fcsize_t_l;
-
-   /* loop over all the images */
-   int center_index = j*img_size_sqrt+k;
-   int input_wght_idx = output_conv*csize2*nconv;
-   for(int input_image = 0; input_image < nconv; input_image++, 
-                                                 center_index+=img_size,
-                                                 input_wght_idx+=csize2)  
-   {
-      int offset   = center_index   - fcsize_t_l;
-      int wght_idx = input_wght_idx + fcsize*(csize+1) + fcsize_t_l;
-
-      MyReal * state_base   = state   + offset   - img_size_sqrt*fcsize_s_l;
-      MyReal * weights_base = weights + wght_idx +         csize*fcsize_s_l;
-
-      for(int s = 0; s <= fcsize_s; s++, state_base-=img_size_sqrt, 
-                                         weights_base+=csize)
-      {
-         MyReal * state_local = state_base;
-         MyReal * weights_local = weights_base;
-
-         for(int t = 0; t <= fcsize_t; t++,state_local--,weights_local++)
-         {
-            val += (*state_local)*(*weights_local);
-         }
+  MyReal val = 0.0;
+
+  int fcsize_s_l = -fcsize;
+  int fcsize_s_u = fcsize;
+  int fcsize_t_l = -fcsize;
+  int fcsize_t_u = fcsize;
+
+  if ((j - fcsize_s_u) < 0) fcsize_s_u = j;
+  if ((k - fcsize_t_u) < 0) fcsize_t_u = k;
+  if ((j - fcsize_s_l) >= img_size_sqrt) fcsize_s_l = -(img_size_sqrt - j - 1);
+  if ((k - fcsize_t_l) >= img_size_sqrt) fcsize_t_l = -(img_size_sqrt - k - 1);
+
+  const int fcsize_s = fcsize_s_u - fcsize_s_l;
+  const int fcsize_t = fcsize_t_u - fcsize_t_l;
+
+  /* loop over all the images */
+  int center_index = j * img_size_sqrt + k;
+  int input_wght_idx = output_conv * csize2 * nconv;
+  for (int input_image = 0; input_image < nconv;
+       input_image++, center_index += img_size, input_wght_idx += csize2) {
+    int offset = center_index - fcsize_t_l;
+    int wght_idx = input_wght_idx + fcsize * (csize + 1) + fcsize_t_l;
+
+    MyReal *state_base = state + offset - img_size_sqrt * fcsize_s_l;
+    MyReal *weights_base = weights + wght_idx + csize * fcsize_s_l;
+
+    for (int s = 0; s <= fcsize_s;
+         s++, state_base -= img_size_sqrt, weights_base += csize) {
+      MyReal *state_local = state_base;
+      MyReal *weights_local = weights_base;
+
+      for (int t = 0; t <= fcsize_t; t++, state_local--, weights_local++) {
+        val += (*state_local) * (*weights_local);
       }
-   }
+    }
+  }
 
-   return val;
+  return val;
 }
 
-void ConvLayer::applyFWD(MyReal* state)
-{
-   /* Apply step */
-   for (int io = 0; io < dim_Out; io++)
-      update[io] = state[io];
-
-   /* Affine transformation */
-   for(int i = 0; i < nconv; i++)
-   {
-      for(int j = 0; j < img_size_sqrt; j++)
-      {
-         int state_index = i*img_size + j*img_size_sqrt;
-         MyReal * update_local = state + state_index;
-         MyReal * bias_local = bias+j*img_size_sqrt;
-
-         for(int k = 0; k < img_size_sqrt; k++,
-                                           update_local++,
-                                           bias_local++)
-         {
-            // (*update_local) += dt*tanh(apply_conv(update, i, j, k) + (*bias_local));
-            (*update_local) += dt*ReLu_act(apply_conv(update, i, j, k) + (*bias_local));
-         }
+void ConvLayer::applyFWD(MyReal *state) {
+  /* Apply step */
+  for (int io = 0; io < dim_Out; io++) update[io] = state[io];
+
+  /* Affine transformation */
+  for (int i = 0; i < nconv; i++) {
+    for (int j = 0; j < img_size_sqrt; j++) {
+      int state_index = i * img_size + j * img_size_sqrt;
+      MyReal *update_local = state + state_index;
+      MyReal *bias_local = bias + j * img_size_sqrt;
+
+      for (int k = 0; k < img_size_sqrt; k++, update_local++, bias_local++) {
+        // (*update_local) += dt*tanh(apply_conv(update, i, j, k) +
+        // (*bias_local));
+        (*update_local) +=
+            dt * ReLu_act(apply_conv(update, i, j, k) + (*bias_local));
       }
-   }
+    }
+  }
 }
 
+void ConvLayer::applyBWD(MyReal *state, MyReal *state_bar,
+                         int compute_gradient) {
+  /* state_bar is the adjoint of the state variable, it contains the
+     old time adjoint information, and is modified on the way out to
+     contain the update. */
 
-void ConvLayer::applyBWD(MyReal* state,
-                         MyReal* state_bar,
-		         int     compute_gradient)
-{
-   /* state_bar is the adjoint of the state variable, it contains the 
-      old time adjoint information, and is modified on the way out to
-      contain the update. */
-
-   /* Okay, for my own clarity:
-      state       = forward state solution
-      state_bar   = backward adjoint solution (in - new time, out - current time)
-      update_bar  = update to the bacward solution, this is "MyReal dipped" in that
-                    it is used to compute the weight and bias derivative.
-                    Note that because this is written as a forward update (the
-                    residual is F = u_{n+1} - u_n - dt * sigma(W_n * u_n + b_n)               
-                    the adjoint variable is also the derivative of the objective
-                    with respect to the solution. 
-      weights_bar = Derivative of the objective with respect to the weights
-      bias_bar    = Derivative of the objective with respect to the bias
-
-  
-      More details: Assume that the objective is 'g', and the constraint in
-      residual form is F(u,W). Then
- 
-        d_{W_n} g = \partial_{u} g * \partial_{W_n} u
-
-      Note that $\partial_{u} g$ only depends on the final layer. Expanding 
-      around the constraint then gives
- 
-        d_{W_n} g = \partial_{u} g * (\partial_{u} F)^{-1} * \partial_{W_n} F
-
-      and now doing the standard adjoint thing we get
-      
-        d_{W_n} g = (\partial_{u} F)^{-T} * \partial_{u} g ) * \partial_{W_n} F
- 
-      yielding
-         
-        d_{W_n} g = state_bar * \partial_{W_n} F
-
-      This is directly 
-
-        weights_bar = state_bar * \partial_{W_n} F
-
-      computed below. Similar for the bias. 
-    */
-
-   /* Affine transformation, and derivative of time step */
-
-   /* loop over number convolutions */
-   for(int i = 0; i < nconv; i++)
-   {
-      /* loop over full image */
-      for(int j = 0; j < img_size_sqrt; j++)
-      {
-         int state_index = i*img_size + j*img_size_sqrt;
-         MyReal * state_bar_local = state_bar + state_index;
-         MyReal * update_bar_local = update_bar + state_index;
-         MyReal * bias_local = bias+j*img_size_sqrt;
-
-         for(int k = 0; k < img_size_sqrt; k++,
-                                           state_bar_local++,
-                                           update_bar_local++,
-                                           bias_local++)
-         {
-             /* compute the affine transformation */
-             MyReal local_update     = apply_conv(state, i, j, k) + (*bias_local);
-
-             /* derivative of the update, this is the contribution from old time */
-             // (*update_bar_local) = dt * dactivation(local_update) * (*state_bar_local);
-             // (*update_bar_local) = dt * (1.0-pow(tanh(local_update),2)) * (*state_bar_local);
-             (*update_bar_local) = dt * dReLu_act(local_update) * (*state_bar_local);
-         }
+  /* Okay, for my own clarity:
+     state       = forward state solution
+     state_bar   = backward adjoint solution (in - new time, out - current time)
+     update_bar  = update to the bacward solution, this is "MyReal dipped" in
+     that it is used to compute the weight and bias derivative. Note that
+     because this is written as a forward update (the residual is F = u_{n+1} -
+     u_n - dt * sigma(W_n * u_n + b_n) the adjoint variable is also the
+     derivative of the objective with respect to the solution. weights_bar =
+     Derivative of the objective with respect to the weights bias_bar    =
+     Derivative of the objective with respect to the bias
+
+
+     More details: Assume that the objective is 'g', and the constraint in
+     residual form is F(u,W). Then
+
+       d_{W_n} g = \partial_{u} g * \partial_{W_n} u
+
+     Note that $\partial_{u} g$ only depends on the final layer. Expanding
+     around the constraint then gives
+
+       d_{W_n} g = \partial_{u} g * (\partial_{u} F)^{-1} * \partial_{W_n} F
+
+     and now doing the standard adjoint thing we get
+
+       d_{W_n} g = (\partial_{u} F)^{-T} * \partial_{u} g ) * \partial_{W_n} F
+
+     yielding
+
+       d_{W_n} g = state_bar * \partial_{W_n} F
+
+     This is directly
+
+       weights_bar = state_bar * \partial_{W_n} F
+
+     computed below. Similar for the bias.
+   */
+
+  /* Affine transformation, and derivative of time step */
+
+  /* loop over number convolutions */
+  for (int i = 0; i < nconv; i++) {
+    /* loop over full image */
+    for (int j = 0; j < img_size_sqrt; j++) {
+      int state_index = i * img_size + j * img_size_sqrt;
+      MyReal *state_bar_local = state_bar + state_index;
+      MyReal *update_bar_local = update_bar + state_index;
+      MyReal *bias_local = bias + j * img_size_sqrt;
+
+      for (int k = 0; k < img_size_sqrt;
+           k++, state_bar_local++, update_bar_local++, bias_local++) {
+        /* compute the affine transformation */
+        MyReal local_update = apply_conv(state, i, j, k) + (*bias_local);
+
+        /* derivative of the update, this is the contribution from old time */
+        // (*update_bar_local) = dt * dactivation(local_update) *
+        // (*state_bar_local);
+        // (*update_bar_local) = dt * (1.0-pow(tanh(local_update),2)) *
+        // (*state_bar_local);
+        (*update_bar_local) = dt * dReLu_act(local_update) * (*state_bar_local);
       }
-   }
-
-   /* Loop over the output dimensions */
-   for(int i = 0; i < nconv; i++)
-   {
-      /* loop over full image */
-      for(int j = 0; j < img_size_sqrt; j++)
-      {
-         int state_index = i*img_size + j*img_size_sqrt;
-
-         MyReal * state_bar_local = state_bar + state_index;
-         MyReal * update_bar_local = update_bar + state_index;
-         MyReal * bias_bar_local = bias_bar+j*img_size_sqrt;
-
-         for(int k = 0; k < img_size_sqrt; k++,
-                                           state_bar_local++,
-                                           update_bar_local++,
-                                           bias_bar_local++)
-         {
-            if (compute_gradient) 
-            {
-               (*bias_bar_local) += (*update_bar_local);
- 
-               (*state_bar_local) += updateWeightDerivative(state,update_bar,i,j,k);
-            }
-            else {
-               (*state_bar_local) +=  apply_conv_trans(update_bar, i, j, k);
-            }
-         }
+    }
+  }
+
+  /* Loop over the output dimensions */
+  for (int i = 0; i < nconv; i++) {
+    /* loop over full image */
+    for (int j = 0; j < img_size_sqrt; j++) {
+      int state_index = i * img_size + j * img_size_sqrt;
+
+      MyReal *state_bar_local = state_bar + state_index;
+      MyReal *update_bar_local = update_bar + state_index;
+      MyReal *bias_bar_local = bias_bar + j * img_size_sqrt;
+
+      for (int k = 0; k < img_size_sqrt;
+           k++, state_bar_local++, update_bar_local++, bias_bar_local++) {
+        if (compute_gradient) {
+          (*bias_bar_local) += (*update_bar_local);
+
+          (*state_bar_local) +=
+              updateWeightDerivative(state, update_bar, i, j, k);
+        } else {
+          (*state_bar_local) += apply_conv_trans(update_bar, i, j, k);
+        }
       }
+    }
 
-   } // end for i
+  }  // end for i
 }
diff --git a/src/linalg.cpp b/src/linalg.cpp
index 5fa3b90..32924cc 100644
--- a/src/linalg.cpp
+++ b/src/linalg.cpp
@@ -1,131 +1,108 @@
+// Copyright
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Underlying paper:
+//
+// Layer-Parallel Training of Deep Residual Neural Networks
+// S. Guenther, L. Ruthotto, J.B. Schroder, E.C. Czr, and N.R. Gauger
+//
+// Download: https://arxiv.org/pdf/1812.04352.pdf
+//
 #include "linalg.hpp"
 
+MyReal vecdot_par(int dimN, MyReal *x, MyReal *y, MPI_Comm comm) {
+  MyReal localdot, globaldot;
 
-MyReal vecdot_par(int     dimN,
-                  MyReal* x,
-                  MyReal* y,
-                  MPI_Comm comm)
-{
+  localdot = vecdot(dimN, x, y);
+  MPI_Allreduce(&localdot, &globaldot, 1, MPI_MyReal, MPI_SUM, comm);
 
-    MyReal localdot, globaldot; 
-
-    localdot = vecdot(dimN, x,y);
-    MPI_Allreduce(&localdot, &globaldot, 1, MPI_MyReal, MPI_SUM, comm);
-
-    return globaldot;
-}              
-
-
-MyReal vecdot(int     dimN,
-              MyReal* x,
-              MyReal* y)
-{
-   MyReal dotprod = 0.0;
-   for (int i = 0; i < dimN; i++)
-   {
-      dotprod += x[i] * y[i];
-   }
-   return dotprod;
+  return globaldot;
 }
 
-          
-MyReal vecmax(int     dimN,
-              MyReal* x)
-{
-    MyReal max = - 1e+12;
-    
-    for (int i = 0; i < dimN; i++)
-    {
-        if (x[i] > max)
-        {
-           max = x[i];
-        }
-    }
-    return max;
+MyReal vecdot(int dimN, MyReal *x, MyReal *y) {
+  MyReal dotprod = 0.0;
+  for (int i = 0; i < dimN; i++) {
+    dotprod += x[i] * y[i];
+  }
+  return dotprod;
 }
 
+MyReal vecmax(int dimN, MyReal *x) {
+  MyReal max = -1e+12;
 
-int argvecmax(int     dimN,
-              MyReal* x)
-{
-    MyReal max = - 1e+12;
-    int    i_max;
-    for (int i = 0; i < dimN; i++)
-    {
-        if (x[i] > max)
-        {
-           max   = x[i];
-           i_max = i;
-        }
+  for (int i = 0; i < dimN; i++) {
+    if (x[i] > max) {
+      max = x[i];
     }
-    return i_max;
+  }
+  return max;
 }
 
-MyReal vecnormsq(int      dimN,
-                 MyReal   *x)
-{
-    MyReal normsq = 0.0;
-    for (int i = 0; i<dimN; i++)
-    {
-        normsq += pow(x[i],2);
+int argvecmax(int dimN, MyReal *x) {
+  MyReal max = -1e+12;
+  int i_max;
+  for (int i = 0; i < dimN; i++) {
+    if (x[i] > max) {
+      max = x[i];
+      i_max = i;
     }
-    return normsq;
-}               
+  }
+  return i_max;
+}
 
-MyReal vecnorm_par(int      dimN,
-                   MyReal   *x,
-                   MPI_Comm comm)
-{
-    MyReal localnorm, globalnorm;
+MyReal vecnormsq(int dimN, MyReal *x) {
+  MyReal normsq = 0.0;
+  for (int i = 0; i < dimN; i++) {
+    normsq += pow(x[i], 2);
+  }
+  return normsq;
+}
+
+MyReal vecnorm_par(int dimN, MyReal *x, MPI_Comm comm) {
+  MyReal localnorm, globalnorm;
 
-    localnorm = vecnormsq(dimN, x);
-    MPI_Allreduce(&localnorm, &globalnorm, 1, MPI_MyReal, MPI_SUM, comm);
-    globalnorm = sqrt(globalnorm);
+  localnorm = vecnormsq(dimN, x);
+  MPI_Allreduce(&localnorm, &globalnorm, 1, MPI_MyReal, MPI_SUM, comm);
+  globalnorm = sqrt(globalnorm);
 
-    return globalnorm;
+  return globalnorm;
 }
 
-int vec_copy(int N, 
-             MyReal* u, 
-             MyReal* u_copy)
-{
-    for (int i=0; i<N; i++)
-    {
-        u_copy[i] = u[i];
-    }
+int vec_copy(int N, MyReal *u, MyReal *u_copy) {
+  for (int i = 0; i < N; i++) {
+    u_copy[i] = u[i];
+  }
 
-    return 0;
+  return 0;
 }
 
-void vecvecT(int N,
-             MyReal* x,
-             MyReal* y,
-             MyReal* XYT)
-{
-   for (int i=0; i<N; i++)
-   {
-      for (int j=0; j<N; j++)
-      {
-         XYT[i*N+j] = x[i]*y[j];
-      }
-   }
+void vecvecT(int N, MyReal *x, MyReal *y, MyReal *XYT) {
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < N; j++) {
+      XYT[i * N + j] = x[i] * y[j];
+    }
+  }
 }
 
-void matvec(int dimN,
-            MyReal* H, 
-            MyReal* x,
-            MyReal* Hx)
-{
-    MyReal sum_j;
-
-    for (int i=0; i<dimN; i++)
-    {
-       sum_j = 0.0;
-       for (int j=0; j<dimN; j++)
-       {
-          sum_j +=  H[i*dimN+j] * x[j];
-       }
-       Hx[i] = sum_j;
-    } 
-}                           
+void matvec(int dimN, MyReal *H, MyReal *x, MyReal *Hx) {
+  MyReal sum_j;
 
+  for (int i = 0; i < dimN; i++) {
+    sum_j = 0.0;
+    for (int j = 0; j < dimN; j++) {
+      sum_j += H[i * dimN + j] * x[j];
+    }
+    Hx[i] = sum_j;
+  }
+}
diff --git a/src/main.cpp b/src/main.cpp
index 67db8a5..68965f0 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,497 +1,391 @@
-#include <sys/resource.h>
-#include <stdlib.h>
+// Copyright
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Underlying paper:
+//
+// Layer-Parallel Training of Deep Residual Neural Networks
+// S. Guenther, L. Ruthotto, J.B. Schroder, E.C. Czr, and N.R. Gauger
+//
+// Download: https://arxiv.org/pdf/1812.04352.pdf
+//
+#include <mpi.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
-#include <mpi.h>
+#include <sys/resource.h>
 
+#include "braid_wrapper.hpp"
+#include "config.hpp"
+#include "dataset.hpp"
 #include "defs.hpp"
 #include "hessianApprox.hpp"
-#include "util.hpp"
 #include "layer.hpp"
-#include "braid_wrapper.hpp"
-#include "config.hpp"
 #include "network.hpp"
-#include "dataset.hpp"
+#include "util.hpp"
 
 #define MASTER_NODE 0
 
-int main (int argc, char *argv[])
-{
-    /* --- Data --- */
-    Config*  config;              /**< Storing configurations */
-    DataSet* trainingdata;        /**< Training dataset */
-    DataSet* validationdata;      /**< Validation dataset */
-
-    /* --- Network --- */
-    Network* network;             /**< DNN Network architecture */
-    int      ilower, iupper;         /**< Index of first and last layer stored on this processor */
-    MyReal   accur_train = 0.0;   /**< Accuracy on training data */
-    MyReal   accur_val   = 0.0;   /**< Accuracy on validation data */
-    MyReal   loss_train  = 0.0;   /**< Loss function on training data */
-    MyReal   loss_val    = 0.0;   /**< Loss function on validation data */
-    MyReal   losstrain_out  = 0.0; 
-    MyReal   lossval_out    = 0.0; 
-    MyReal   accurtrain_out = 0.0; 
-    MyReal   accurval_out   = 0.0; 
- 
-    /* --- XBraid --- */
-    myBraidApp        *primaltrainapp;   /**< Braid App for training data */
-    myAdjointBraidApp *adjointtrainapp;  /**< Adjoint Braid for training data */
-    myBraidApp        *primalvalapp;     /**< Braid App for validation data */
-
-    /* --- Optimization --- */
-    int     ndesign_local;       /**< Number of local design variables on this processor */
-    int     ndesign_global;      /**< Number of global design variables (sum of local)*/
-    MyReal* ascentdir=0;         /**< Direction for design updates */
-    MyReal  objective;           /**< Optimization objective */
-    MyReal  wolfe;               /**< Holding the wolfe condition value */
-    MyReal  rnorm;               /**< Space-time Norm of the state variables */
-    MyReal  rnorm_adj;           /**< Space-time norm of the adjoint variables */
-    MyReal  gnorm;               /**< Norm of the gradient */
-    MyReal  ls_param;            /**< Parameter in wolfe condition test */
-    MyReal  stepsize;            /**< Stepsize used for design update */
-    char    optimfilename[255];  
-    FILE    *optimfile = 0;   
-    MyReal  ls_stepsize;
-    MyReal  ls_objective, test_obj;
-    int     ls_iter;
-
-
-    /* --- other --- */
-    int      myid;              
-    int      size;
-    struct   rusage r_usage;
-    MyReal   StartTime, StopTime, myMB, globalMB; 
-    MyReal   UsedTime = 0.0;
-
-    /* Initialize MPI */
-    MPI_Init(&argc, &argv);
-    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
-    MPI_Comm_size(MPI_COMM_WORLD, &size);
-
-
-    /*--- INITIALIZATION ---*/
-
-    /* Instantiate objects */
-    config         = new Config();
-    trainingdata   = new DataSet();
-    validationdata = new DataSet();
-    network        = new Network();
-
-
-    /* Read config file */
-    if (argc != 2)
-    {
-       if ( myid == MASTER_NODE )
-       {
-          printf("\n");
-          printf("USAGE: ./main </path/to/configfile> \n");
-       }
-       MPI_Finalize();
-       return (0);
+int main(int argc, char *argv[]) {
+  /* --- Data --- */
+  Config *config;          /**< Storing configurations */
+  DataSet *trainingdata;   /**< Training dataset */
+  DataSet *validationdata; /**< Validation dataset */
+
+  /* --- Network --- */
+  Network *network; /**< DNN Network architecture */
+  int ilower,
+      iupper; /**< Index of first and last layer stored on this processor */
+  MyReal accur_train = 0.0; /**< Accuracy on training data */
+  MyReal accur_val = 0.0;   /**< Accuracy on validation data */
+  MyReal loss_train = 0.0;  /**< Loss function on training data */
+  MyReal loss_val = 0.0;    /**< Loss function on validation data */
+  MyReal losstrain_out = 0.0;
+  MyReal lossval_out = 0.0;
+  MyReal accurtrain_out = 0.0;
+  MyReal accurval_out = 0.0;
+
+  /* --- XBraid --- */
+  myBraidApp *primaltrainapp;         /**< Braid App for training data */
+  myAdjointBraidApp *adjointtrainapp; /**< Adjoint Braid for training data */
+  myBraidApp *primalvalapp;           /**< Braid App for validation data */
+
+  /* --- Optimization --- */
+  int ndesign_local;  /**< Number of local design variables on this processor */
+  int ndesign_global; /**< Number of global design variables (sum of local)*/
+  MyReal *ascentdir = 0; /**< Direction for design updates */
+  MyReal objective;      /**< Optimization objective */
+  MyReal wolfe;          /**< Holding the wolfe condition value */
+  MyReal rnorm;          /**< Space-time Norm of the state variables */
+  MyReal rnorm_adj;      /**< Space-time norm of the adjoint variables */
+  MyReal gnorm;          /**< Norm of the gradient */
+  MyReal ls_param;       /**< Parameter in wolfe condition test */
+  MyReal stepsize;       /**< Stepsize used for design update */
+  char optimfilename[255];
+  FILE *optimfile = 0;
+  MyReal ls_stepsize;
+  MyReal ls_objective, test_obj;
+  int ls_iter;
+
+  /* --- Time measurements --- */
+  struct rusage r_usage;
+  MyReal StartTime, StopTime, myMB, globalMB;
+  MyReal UsedTime = 0.0;
+
+  /* Initialize MPI */
+  int myid;
+  int size;
+  MPI_Init(&argc, &argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, &myid);
+  MPI_Comm_size(MPI_COMM_WORLD, &size);
+
+  /* Instantiate objects */
+  config = new Config();
+  trainingdata = new DataSet();
+  validationdata = new DataSet();
+  network = new Network();
+
+  /* Read config file */
+  if (argc != 2) {
+    if (myid == MASTER_NODE) {
+      printf("\n");
+      printf("USAGE: ./main </path/to/configfile> \n");
     }
-    int err = config->readFromFile(argv[1]);
-    if (err)
-    {
-        printf("\nError while reading config file!\n");
-        MPI_Finalize();
-        return (0);
+    MPI_Finalize();
+    return 0;
+  }
+  int err = config->readFromFile(argv[1]);
+  if (err) {
+    printf("Error while reading config file!\n");
+    MPI_Finalize();
+    return 0;
+  }
+
+  /* Initialize training and validation data */
+  trainingdata->initialize(config->ntraining, config->nfeatures,
+                           config->nclasses, config->nbatch, MPI_COMM_WORLD);
+  trainingdata->readData(config->datafolder, config->ftrain_ex,
+                         config->ftrain_labels);
+
+  validationdata->initialize(config->nvalidation, config->nfeatures,
+                             config->nclasses, config->nvalidation,
+                             MPI_COMM_WORLD);  // full validation set!
+  validationdata->readData(config->datafolder, config->fval_ex,
+                           config->fval_labels);
+
+  /* Initialize XBraid */
+  primaltrainapp =
+      new myBraidApp(trainingdata, network, config, MPI_COMM_WORLD);
+  adjointtrainapp = new myAdjointBraidApp(
+      trainingdata, network, config, primaltrainapp->getCore(), MPI_COMM_WORLD);
+  primalvalapp =
+      new myBraidApp(validationdata, network, config, MPI_COMM_WORLD);
+
+  /* Initialize the network  */
+  primaltrainapp->GetGridDistribution(&ilower, &iupper);
+  network->createNetworkBlock(ilower, iupper, config, MPI_COMM_WORLD);
+  network->setInitialDesign(config);
+  ndesign_local = network->getnDesignLocal();
+  ndesign_global = network->getnDesignGlobal();
+
+  /* Print some neural network information */
+  printf("%d: Layer range: [%d, %d] / %d\n", myid, ilower, iupper,
+         config->nlayers);
+  printf("%d: Design variables (local/global): %d/%d\n", myid, ndesign_local,
+         ndesign_global);
+
+  /* Initialize Hessian approximation */
+  HessianApprox *hessian = NULL;
+  switch (config->hessianapprox_type) {
+    case BFGS_SERIAL:
+      hessian = new BFGS(MPI_COMM_WORLD, ndesign_local);
+      break;
+    case LBFGS:
+      hessian = new L_BFGS(MPI_COMM_WORLD, ndesign_local, config->lbfgs_stages);
+      break;
+    case IDENTITY:
+      hessian = new Identity(MPI_COMM_WORLD, ndesign_local);
+      break;
+    default:
+      printf("Error: unexpected hessianapprox_type returned");
+      return 0;
+  }
+
+  /* Initialize optimization parameters */
+  ascentdir = new MyReal[ndesign_local];
+  stepsize = config->getStepsize(0);
+  gnorm = 0.0;
+  objective = 0.0;
+  rnorm = 0.0;
+  rnorm_adj = 0.0;
+  ls_param = 1e-4;
+  ls_iter = 0;
+  ls_stepsize = stepsize;
+
+  /* Open and prepare optimization output file*/
+  if (myid == MASTER_NODE) {
+    sprintf(optimfilename, "%s.dat", "optim");
+    optimfile = fopen(optimfilename, "w");
+    config->writeToFile(optimfile);
+    fprintf(optimfile,
+            "#    || r ||          || r_adj ||      Objective             Loss "
+            "                 || grad ||            Stepsize  ls_iter   "
+            "Accur_train  Accur_val   Time(sec)\n");
+  }
+
+  /* Measure wall time */
+  StartTime = MPI_Wtime();
+  StopTime = 0.0;
+  UsedTime = 0.0;
+
+  /** Main optimization iteration
+   *
+   * The following loop represents the paper's Algorithm (2)
+   *
+   */
+  for (int iter = 0; iter < config->maxoptimiter; iter++) {
+    /* Set up the current batch */
+    trainingdata->selectBatch(config->batch_type, MPI_COMM_WORLD);
+
+    /** Solve state and adjoint equations (2.15) and (2.17)
+     *
+     *  Algorithm (2): Step 1 and 2
+     */
+    rnorm = primaltrainapp->run();
+    rnorm_adj = adjointtrainapp->run();
+
+    /* Get output */
+    objective = primaltrainapp->getObjective();
+    loss_train = network->getLoss();
+    accur_train = network->getAccuracy();
+
+    /* --- Validation data: Get accuracy --- */
+    if (config->validationlevel > 0) {
+      primalvalapp->run();
+      loss_val = network->getLoss();
+      accur_val = network->getAccuracy();
     }
 
-
-    /* Initialize training and validation data */
-    trainingdata->initialize(config->ntraining, config->nfeatures, config->nclasses, config->nbatch, MPI_COMM_WORLD);
-    trainingdata->readData(config->datafolder, config->ftrain_ex, config->ftrain_labels);
-
-    validationdata->initialize(config->nvalidation, config->nfeatures, config->nclasses, config->nvalidation, MPI_COMM_WORLD);  // full validation set!
-    validationdata->readData(config->datafolder, config->fval_ex, config->fval_labels);
-
-
-    /* Initialize XBraid */
-    primaltrainapp = new myBraidApp(trainingdata, network, config, MPI_COMM_WORLD);
-    adjointtrainapp = new myAdjointBraidApp(trainingdata, network, config, primaltrainapp->getCore(), MPI_COMM_WORLD);
-    primalvalapp = new myBraidApp(validationdata, network, config, MPI_COMM_WORLD);
-
-
-    /* Initialize the network  */
-    primaltrainapp->GetGridDistribution(&ilower, &iupper);
-    network->createNetworkBlock(ilower, iupper, config, MPI_COMM_WORLD);
-    network->setInitialDesign(config);
-    ndesign_local  = network->getnDesignLocal();
-    ndesign_global = network->getnDesignGlobal();
-
-    /* Print some network information */
-    int startid = ilower;
-    if (ilower == 0) startid = -1;
-    printf("%d: Layer range: [%d, %d] / %d\n", myid, startid, iupper, config->nlayers);
-    printf("%d: Design variables (local/global): %d/%d\n", myid, ndesign_local, ndesign_global);
-
-
-    /* Initialize hessian approximation */
-    HessianApprox  *hessian = 0;
-    switch (config->hessianapprox_type)
-    {
-        case BFGS_SERIAL:
-            hessian = new BFGS(MPI_COMM_WORLD, ndesign_local);
-            break;
-        case LBFGS: 
-            hessian = new L_BFGS(MPI_COMM_WORLD, ndesign_local, config->lbfgs_stages);
-            break;
-        case IDENTITY:
-            hessian = new Identity(MPI_COMM_WORLD, ndesign_local);
+    /* --- Optimization control and output ---*/
+
+    /** Compute global gradient norm
+     *
+     *  Algorithm (2): Step 3
+     */
+    gnorm = vecnorm_par(ndesign_local, network->getGradient(), MPI_COMM_WORLD);
+
+    /* Communicate loss and accuracy. This is actually only needed for output.
+     * TODO: Remove it. */
+    MPI_Allreduce(&loss_train, &losstrain_out, 1, MPI_MyReal, MPI_SUM,
+                  MPI_COMM_WORLD);
+    MPI_Allreduce(&loss_val, &lossval_out, 1, MPI_MyReal, MPI_SUM,
+                  MPI_COMM_WORLD);
+    MPI_Allreduce(&accur_train, &accurtrain_out, 1, MPI_MyReal, MPI_SUM,
+                  MPI_COMM_WORLD);
+    MPI_Allreduce(&accur_val, &accurval_out, 1, MPI_MyReal, MPI_SUM,
+                  MPI_COMM_WORLD);
+
+    /* Output */
+    StopTime = MPI_Wtime();
+    UsedTime = StopTime - StartTime;
+    if (myid == MASTER_NODE) {
+      printf(
+          "\n|| r ||\t|| r_adj ||\tObjective\tLoss\t\t\t|| grad "
+          "||\t\tStepsize\t\tls_iter\tAccur_train\tAccur_val\tTime(sec)\n");
+      printf(
+          "%03d\t%1.8e\t%1.8e\t%1.14e\t%1.14e\t%1.14e\t%5f\t%2d\t%2.2f%%\t%2."
+          "2f%%\t%.1f\n\n",
+          iter, rnorm, rnorm_adj, objective, losstrain_out, gnorm, stepsize,
+          ls_iter, accurtrain_out, accurval_out, UsedTime);
+      fprintf(optimfile,
+              "%03d  %1.8e  %1.8e  %1.14e  %1.14e  %1.14e  %5f  %2d        "
+              "%2.2f%%      %2.2f%%     %.1f\n",
+              iter, rnorm, rnorm_adj, objective, losstrain_out, gnorm, stepsize,
+              ls_iter, accurtrain_out, accurval_out, UsedTime);
+      fflush(optimfile);
     }
 
-    /* Allocate ascent direction for design updates */
-
-    /* Initialize optimization parameters */
-    ascentdir   = new MyReal[ndesign_local];
-    stepsize    = config->getStepsize(0);
-    gnorm       = 0.0;
-    objective   = 0.0;
-    rnorm       = 0.0;
-    rnorm_adj   = 0.0;
-    ls_param    = 1e-4;
-    ls_iter     = 0;
-    ls_stepsize = stepsize;
-
-    /* Open and prepare optimization output file*/
-    if (myid == MASTER_NODE)
-    {
-        sprintf(optimfilename, "%s/%s.dat", config->datafolder, "optim");
-        optimfile = fopen(optimfilename, "w");
-        config->writeToFile(optimfile);
-
-       fprintf(optimfile, "#    || r ||          || r_adj ||      Objective             Loss                  || grad ||            Stepsize  ls_iter   Accur_train  Accur_val   Time(sec)\n");
-
-       /* Screen output */
-       printf("\n#    || r ||          || r_adj ||      Objective             Loss                 || grad ||             Stepsize  ls_iter   Accur_train  Accur_val   Time(sec)\n");
+    /** Check optimization convergence
+     *
+     *  Algorithm (2): Step 6
+     */
+    if (gnorm < config->gtol) {
+      if (myid == MASTER_NODE) {
+        printf("Optimization has converged. \n");
+        printf("Be happy and go home!       \n");
+      }
+      break;
+    }
+    if (iter == config->maxoptimiter - 1) {
+      if (myid == MASTER_NODE) {
+        printf("\nMax. optimization iterations reached.\n");
+      }
+      break;
     }
 
-
-#if 1
-    /* --- OPTIMIZATION --- */
-    StartTime = MPI_Wtime();
-    StopTime  = 0.0;
-    UsedTime = 0.0;
-    for (int iter = 0; iter < config->maxoptimiter; iter++)
-    {
-
-        /* --- Training data: Get objective and gradient ---*/ 
-        
-        /* Set up the current batch */
-        trainingdata->selectBatch(config->batch_type, MPI_COMM_WORLD);
-
-        /* Solve state and adjoint equation */
-        rnorm       = primaltrainapp->run();
-        rnorm_adj = adjointtrainapp->run();
-
-        /* Get output */
-        objective   = primaltrainapp->getObjective();
-        loss_train  = network->getLoss();
-        accur_train = network->getAccuracy();
-
-
-        /* --- Validation data: Get accuracy --- */
-
-        if ( config->validationlevel > 0 )
-        {
-            primalvalapp->run();
-            loss_val  = network->getLoss();
-            accur_val = network->getAccuracy();
-        }
-
-
-        /* --- Optimization control and output ---*/
-
-        /* Compute global gradient norm */
-        gnorm = vecnorm_par(ndesign_local, network->getGradient(), MPI_COMM_WORLD);
-
-        /* Communicate loss and accuracy. This is actually only needed for output. Remove it. */
-        MPI_Allreduce(&loss_train, &losstrain_out, 1, MPI_MyReal, MPI_SUM, MPI_COMM_WORLD);
-        MPI_Allreduce(&loss_val, &lossval_out, 1, MPI_MyReal, MPI_SUM, MPI_COMM_WORLD);
-        MPI_Allreduce(&accur_train, &accurtrain_out, 1, MPI_MyReal, MPI_SUM, MPI_COMM_WORLD);
-        MPI_Allreduce(&accur_val, &accurval_out, 1, MPI_MyReal, MPI_SUM, MPI_COMM_WORLD);
-
-        /* Output */
-        StopTime = MPI_Wtime();
-        UsedTime = StopTime-StartTime;
+    /* If optimization didn't converge, continue */
+
+    /* --- Design update --- */
+
+    /** Compute search direction
+     *
+     *  Algorithm (2): Step 4
+     */
+    hessian->updateMemory(iter, network->getDesign(), network->getGradient());
+    hessian->computeAscentDir(iter, network->getGradient(), ascentdir);
+    stepsize = config->getStepsize(iter);
+
+    /** Update the design/network control parameter in negative ascent direction
+     *  and perform backtracking linesearch.
+     *
+     *  Algorithm (2): Step 5
+     */
+    network->updateDesign(-1.0 * stepsize, ascentdir, MPI_COMM_WORLD);
+
+    if (config->stepsize_type == BACKTRACKINGLS) {
+      /* Compute wolfe condition */
+      wolfe = vecdot_par(ndesign_local, network->getGradient(), ascentdir,
+                         MPI_COMM_WORLD);
+
+      /* Start linesearch iterations */
+      ls_stepsize = config->getStepsize(iter);
+      stepsize = ls_stepsize;
+      for (ls_iter = 0; ls_iter < config->ls_maxiter; ls_iter++) {
+        primaltrainapp->getCore()->SetPrintLevel(0);
+        primaltrainapp->run();
+        ls_objective = primaltrainapp->getObjective();
+        primaltrainapp->getCore()->SetPrintLevel(config->braid_printlevel);
+
+        test_obj = objective - ls_param * ls_stepsize * wolfe;
         if (myid == MASTER_NODE)
-        {
-            printf("%03d  %1.8e  %1.8e  %1.14e  %1.14e  %1.14e  %5f  %2d        %2.2f%%      %2.2f%%    %.1f\n", iter, rnorm, rnorm_adj, objective, losstrain_out, gnorm, stepsize, ls_iter, accurtrain_out, accurval_out, UsedTime);
-            fprintf(optimfile,"%03d  %1.8e  %1.8e  %1.14e  %1.14e  %1.14e  %5f  %2d        %2.2f%%      %2.2f%%     %.1f\n", iter, rnorm, rnorm_adj, objective, losstrain_out, gnorm, stepsize, ls_iter, accurtrain_out, accurval_out, UsedTime);
-            fflush(optimfile);
-        }
-
-        /* Check optimization convergence */
-        if (  gnorm < config->gtol )
-        {
-            if (myid == MASTER_NODE) 
-            {
-                printf("Optimization has converged. \n");
-                printf("Be happy and go home!       \n");
-            }
-            break;
-        }
-        if ( iter == config->maxoptimiter - 1 )
-        {
+          printf("ls_iter = %d:\tls_objective = %1.14e\ttest_obj = %1.14e\n",
+                 ls_iter, ls_objective, test_obj);
+        /* Test the wolfe condition */
+        if (ls_objective <= test_obj) {
+          /* Success, use this new design */
+          break;
+        } else {
+          /* Test for line-search failure */
+          if (ls_iter == config->ls_maxiter - 1) {
             if (myid == MASTER_NODE)
-            {
-                printf("\nMax. optimization iterations reached.\n");
-            }
+              printf("\n\n   WARNING: LINESEARCH FAILED! \n\n");
             break;
-        }
+          }
 
+          /* Go back part of the step */
+          network->updateDesign((1.0 - config->ls_factor) * stepsize, ascentdir,
+                                MPI_COMM_WORLD);
 
-        /* --- Design update --- */
-
-        /* Compute search direction */
-        hessian->updateMemory(iter, network->getDesign(), network->getGradient());
-        hessian->computeAscentDir(iter, network->getGradient(), ascentdir);
-        
-        /* Update the design in negative ascent direction */
-        stepsize = config->getStepsize(iter);
-        network->updateDesign( -1.0 * stepsize, ascentdir, MPI_COMM_WORLD);
-
-
-        /* --- Backtracking linesearch --- */
-
-        if (config->stepsize_type == BACKTRACKINGLS)
-        {
-            /* Compute wolfe condition */
-            wolfe = vecdot_par(ndesign_local, network->getGradient(), ascentdir, MPI_COMM_WORLD);
-
-            /* Start linesearch iterations */
-            ls_stepsize  = config->getStepsize(iter);
-            stepsize     = ls_stepsize;
-            for (ls_iter = 0; ls_iter < config->ls_maxiter; ls_iter++)
-            {
-
-                primaltrainapp->getCore()->SetPrintLevel(0);
-                primaltrainapp->run();
-                ls_objective = primaltrainapp->getObjective();
-                primaltrainapp->getCore()->SetPrintLevel(config->braid_printlevel);
-
-                test_obj = objective - ls_param * ls_stepsize * wolfe;
-                if (myid == MASTER_NODE) printf("ls_iter %d: %1.14e %1.14e\n", ls_iter, ls_objective, test_obj);
-                /* Test the wolfe condition */
-                if (ls_objective <= test_obj) 
-                {
-                    /* Success, use this new design */
-                    break;
-                }
-                else
-                {
-                    /* Test for line-search failure */
-                    if (ls_iter == config->ls_maxiter - 1)
-                    {
-                        if (myid == MASTER_NODE) printf("\n\n   WARNING: LINESEARCH FAILED! \n\n");
-                        break;
-                    }
-
-                    /* Go back part of the step */
-                    network->updateDesign((1.0 - config->ls_factor) * stepsize, ascentdir, MPI_COMM_WORLD);
-
-                    /* Decrease the stepsize */
-                    ls_stepsize = ls_stepsize * config->ls_factor;
-                    stepsize = ls_stepsize;
-                }
-            }
+          /* Decrease the stepsize */
+          ls_stepsize = ls_stepsize * config->ls_factor;
+          stepsize = ls_stepsize;
         }
+      }
     }
-
-    /* --- Run final validation and write prediction file --- */
-    if (config->validationlevel > -1)
-    {
-        if (myid == MASTER_NODE) printf("\n --- Run final validation ---\n");
-
-        primalvalapp->getCore()->SetPrintLevel(0);
-        primalvalapp->run();
-        loss_val  = network->getLoss();
-
-        printf("Final validation accuracy:  %2.2f%%\n", accur_val);
-    }
-
-    // write_vector("design.dat", design, ndesign);
-#endif
-
-
-
-
-/** ==================================================================================
- * Adjoint dot test xbarTxdot = ybarTydot
- * where xbar = (dfdx)T ybar
- *       ydot = (dfdx)  xdot
- * choosing xdot to be a vector of all ones, ybar = 1.0;
- * ==================================================================================*/
-#if 0
- 
-    if (size == 1)
-    {
-         MyReal obj1, obj0;
-        //  int nconv_size = 3;
-
-         printf("\n\n ============================ \n");
-         printf(" Adjoint dot test: \n\n");
-        //  printf("   ndesign   = %d (calc = %d)\n",ndesign,
-        //                                           nchannels*config->nclasses+config->nclasses // class layer
-        //                                           +(nlayers-2)+(nlayers-2)*(nconv_size*nconv_size*(nchannels/config->nfeatures)*(nchannels/config->nfeatures))); // con layers
-        //  printf("   nchannels = %d\n",nchannels);
-        //  printf("   nlayers   = %d\n",nlayers); 
-        //  printf("   conv_size = %d\n",nconv_size);
-        //  printf("   config->nclasses  = %d\n\n",config->nclasses);
-
-
-        /* TODO: read some design */
-
-        /* Propagate through braid */ 
-        braid_evalInit(core_train, app_train);
-        braid_Drive(core_train);
-        braid_evalObjective(core_train, app_train, &obj0, &loss_train, &accur_train);
-
-        /* Eval gradient */
-        braid_evalObjectiveDiff(core_adj, app_train);
-        braid_Drive(core_adj);
-        braid_evalInitDiff(core_adj, app_train);
-
-
-        MyReal xtx = 0.0;
-        MyReal EPS = 1e-7;
-        for (int i = 0; i < ndesign_global; i++)
-        {
-            /* Sum up xtx */
-            xtx += network->getGradient()[i];
-            /* perturb into direction "only ones" */
-            network->getDesign()[i] += EPS;
-        }
-
-
-        /* New objective function evaluation */
-        braid_evalInit(core_train, app_train);
-        braid_Drive(core_train);
-        braid_evalObjective(core_train, app_train, &obj1, &loss_train, &accur_train);
-
-        /* Finite differences */
-        MyReal yty = (obj1 - obj0)/EPS;
-
-
-        /* Print adjoint dot test result */
-        printf(" Dot-test: %1.16e  %1.16e\n\n Rel. error  %3.6f %%\n\n", xtx, yty, (yty-xtx)/xtx * 100.);
-        printf(" obj0 %1.14e, obj1 %1.14e\n", obj0, obj1);
-
-    }
-
-#endif
-
-/** =======================================
- * Full finite differences 
- * ======================================= */
-
-    // MyReal* findiff = new MyReal[ndesign];
-    // MyReal* relerr = new MyReal[ndesign];
-    // MyReal errnorm = 0.0;
-    // MyReal obj0, obj1, design_store;
-    // MyReal EPS;
-
-    // printf("\n--------------------------------\n");
-    // printf(" FINITE DIFFERENCE TESTING\n\n");
-
-    // /* Compute baseline objective */
-    // // read_vector("design.dat", design, ndesign);
-    // braid_SetObjectiveOnly(core_train, 0);
-    // braid_Drive(core_train);
-    // braid_GetObjective(core_train, &objective);
-    // obj0 = objective;
-
-    // EPS = 1e-4;
-    // for (int i = 0; i < ndesign; i++)
-    // // for (int i = 0; i < 22; i++)
-    // // int i=21;
-    // {
-    //     /* Restore design */
-    //     // read_vector("design.dat", design, ndesign);
-    
-    //     /*  Perturb design */
-    //     design_store = design[i];
-    //     design[i] += EPS;
-
-    //     /* Recompute objective */
-    //     _braid_CoreElt(core_train, warm_restart) = 0;
-    //     braid_SetObjectiveOnly(core_train, 1);
-    //     braid_SetPrintLevel(core_train, 0);
-    //     braid_Drive(core_train);
-    //     braid_GetObjective(core_train, &objective);
-    //     obj1 = objective;
-
-    //     /* Findiff */
-    //     findiff[i] = (obj1 - obj0) / EPS;
-    //     relerr[i]  = (gradient[i] - findiff[i]) / findiff[i];
-    //     errnorm += pow(relerr[i],2);
-
-    //     printf("\n %4d: % 1.14e % 1.14e, error: % 2.4f",i, findiff[i], gradient[i], relerr[i] * 100.0);
-
-    //     /* Restore design */
-    //     design[i] = design_store;
-    // }
-    // errnorm = sqrt(errnorm);
-    // printf("\n FinDiff ErrNorm  %1.14e\n", errnorm);
-
-    // write_vector("findiff.dat", findiff, ndesign); 
-    // write_vector("relerr.dat", relerr, ndesign); 
-     
-
- /* ======================================= 
-  * check network implementation 
-  * ======================================= */
-    // network->applyFWD(config->ntraining, train_examples, train_labels);
-    // MyReal accur = network->getAccuracy();
-    // MyReal regul = network->evalRegularization();
-    // objective = network->getLoss() + regul;
-    // printf("\n --- \n");
-    // printf(" Network: obj %1.14e \n", objective);
-    // printf(" ---\n");
-
-    /* Print some statistics */
-    StopTime = MPI_Wtime();
-    UsedTime = StopTime-StartTime;
-    getrusage(RUSAGE_SELF,&r_usage);
-    myMB = (MyReal) r_usage.ru_maxrss / 1024.0;
-    MPI_Allreduce(&myMB, &globalMB, 1, MPI_MyReal, MPI_SUM, MPI_COMM_WORLD);
-
-    // printf("%d; Memory Usage: %.2f MB\n",myid, myMB);
-    if (myid == MASTER_NODE)
-    {
-        printf("\n");
-        printf(" Used Time:        %.2f seconds\n",UsedTime);
-        printf(" Global Memory:    %.2f MB\n", globalMB);
-        printf(" Processors used:  %d\n", size);
-        printf("\n");
-    }
-
-
-    /* Clean up XBraid */
-    delete network;
-
-    delete primaltrainapp;
-    delete adjointtrainapp;
-    delete primalvalapp;
-
-    /* Delete optimization vars */
-    delete hessian;
-    delete [] ascentdir;
-
-    /* Delete training and validation examples  */
-    delete trainingdata;
-    delete validationdata;
-
-    /* Close optim file */
-    if (myid == MASTER_NODE)
-    {
-        fclose(optimfile);
-        printf("Optimfile: %s\n", optimfilename);
-    }
-
-    delete config;
-
-    MPI_Finalize();
-    return 0;
+  }
+
+  /* --- Run final validation and write prediction file --- */
+  if (config->validationlevel > -1) {
+    if (myid == MASTER_NODE) printf("\n --- Run final validation ---\n");
+
+    primalvalapp->getCore()->SetPrintLevel(0);
+    primalvalapp->run();
+    loss_val = network->getLoss();
+
+    printf("Final validation accuracy:  %2.2f%%\n", accur_val);
+  }
+
+  // write_vector("design.dat", design, ndesign);
+
+  /* Print some statistics */
+  StopTime = MPI_Wtime();
+  UsedTime = StopTime - StartTime;
+  getrusage(RUSAGE_SELF, &r_usage);
+  myMB = (MyReal)r_usage.ru_maxrss / 1024.0;
+  MPI_Allreduce(&myMB, &globalMB, 1, MPI_MyReal, MPI_SUM, MPI_COMM_WORLD);
+
+  // printf("%d; Memory Usage: %.2f MB\n",myid, myMB);
+  if (myid == MASTER_NODE) {
+    printf("\n");
+    printf(" Used Time:        %.2f seconds\n", UsedTime);
+    printf(" Global Memory:    %.2f MB\n", globalMB);
+    printf(" Processors used:  %d\n", size);
+    printf("\n");
+  }
+
+  /* Clean up XBraid */
+  delete network;
+
+  delete primaltrainapp;
+  delete adjointtrainapp;
+  delete primalvalapp;
+
+  /* Delete optimization vars */
+  delete hessian;
+  delete[] ascentdir;
+
+  /* Delete training and validation examples  */
+  delete trainingdata;
+  delete validationdata;
+
+  /* Close optim file */
+  if (myid == MASTER_NODE) {
+    fclose(optimfile);
+    printf("Optimfile: %s\n", optimfilename);
+  }
+
+  delete config;
+
+  MPI_Finalize();
+  return 0;
 }
diff --git a/src/network.cpp b/src/network.cpp
index 4c9ccb1..a177195 100644
--- a/src/network.cpp
+++ b/src/network.cpp
@@ -1,152 +1,161 @@
+// Copyright
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Underlying paper:
+//
+// Layer-Parallel Training of Deep Residual Neural Networks
+// S. Guenther, L. Ruthotto, J.B. Schroder, E.C. Czr, and N.R. Gauger
+//
+// Download: https://arxiv.org/pdf/1812.04352.pdf
+//
 #include "network.hpp"
-#include<assert.h>
+#include <assert.h>
 
-Network::Network()
-{
-   nlayers_global = 0;
-   nlayers_local  = 0;
-   nchannels      = 0;
-   dt             = 0.0;
-   loss           = 0.0;
-   accuracy       = 0.0;
+Network::Network() {
+  nlayers_global = 0;
+  nlayers_local = 0;
+  nchannels = 0;
+  dt = 0.0;
+  loss = 0.0;
+  accuracy = 0.0;
 
-   startlayerID   = 0;
-   endlayerID     = 0;
+  startlayerID = 0;
+  endlayerID = 0;
 
-   ndesign_local    = 0;
-   ndesign_global   = 0;
-   ndesign_layermax = 0;
+  ndesign_local = 0;
+  ndesign_global = 0;
+  ndesign_layermax = 0;
 
-   design         = NULL;
-   gradient       = NULL;
+  design = NULL;
+  gradient = NULL;
 
-   layers         = NULL;
-   openlayer      = NULL;
-   layer_left     = NULL;
-   layer_right    = NULL;
+  layers = NULL;
+  openlayer = NULL;
+  layer_left = NULL;
+  layer_right = NULL;
 
-   comm           = MPI_COMM_WORLD;
+  comm = MPI_COMM_WORLD;
 }
 
-void Network::createNetworkBlock(int      StartLayerID, 
-                                 int      EndLayerID,
-                                 Config*  config, 
-                                 MPI_Comm Comm) 
-{
-    /* Initilizize */
-    startlayerID     = StartLayerID;
-    endlayerID       = EndLayerID;
-    nlayers_local    = endlayerID - startlayerID + 1;
-    nlayers_global   = config->nlayers;
-    nchannels        = config->nchannels;
-    dt               = (config->T) / (MyReal)(config->nlayers-2);  // nlayers-2 = nhiddenlayers
-    comm             = Comm;
-
-    /* --- Create the layers --- */
-    ndesign_local = 0;
-
-    if (startlayerID == 0) // Opening layer
-    {
-        /* Create the opening layer */
-        int index = -1;
-        openlayer = createLayer(index, config);
-        ndesign_local += openlayer->getnDesign();
-        // printf("Create opening layer %d, ndesign_local %d \n", index, openlayer->getnDesign());
-    }
-
-    layers  = new Layer*[nlayers_local];  // Intermediate and classification layer 
-    for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++) 
-    {
-        /* Create a layer at time step ilayer. Local storage at ilayer - startlayerID */
-        int storeID = getLocalID(ilayer);
-        layers[storeID] = createLayer(ilayer, config);
-        ndesign_local += layers[storeID]->getnDesign();
-        // printf("creating hidden/class layer %d/%d, ndesign_local%d\n", ilayer, nlayers_local, layers[storeID]->getnDesign());
-    }
-
-    /* Create left neighbouring layer */
-    int leftID = startlayerID - 1;
-    layer_left = createLayer(leftID, config);
-
-    /* Create right neighbrouing layer */
-    int rightID = endlayerID + 1;
-    layer_right = createLayer(rightID, config);
-
-    /* Sum up global number of design vars */
-    MPI_Allreduce(&ndesign_local, &ndesign_global, 1, MPI_INT, MPI_SUM, comm);
-
-    /* Store maximum number of designs over all layers layermax */
-    ndesign_layermax = computeLayermax();
-
-    /* Allocate memory for network design and gradient variables */
-    design   = new MyReal[ndesign_local];
-    gradient = new MyReal[ndesign_local];
-
-    /* Set the memory locations for all layers */
-    int istart = 0;
-    if (openlayer != NULL) // Openlayer on first processor
-    {
-        openlayer->setMemory(&(design[istart]), &(gradient[istart]));
-        istart += openlayer->getnDesign();
-    }
-    for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++) // intermediate and hidden layers
-    {
-        layers[getLocalID(ilayer)]->setMemory(&(design[istart]), &(gradient[istart]));
-        istart += layers[getLocalID(ilayer)]->getnDesign();
-    }
-
-    /* left anr right neighbouring layer design, if exists */
-    if (layer_left != NULL)
-    {
-        MyReal *left_design   = new MyReal[layer_left->getnDesign()];
-        MyReal *left_gradient = new MyReal[layer_left->getnDesign()];
-        layer_left->setMemory(left_design, left_gradient);
-    }
-    /* Create and initialize right neighbouring layer design, if exists */
-    if (layer_right != NULL)
-    {
-        MyReal *right_design   = new MyReal[layer_right->getnDesign()];
-        MyReal *right_gradient = new MyReal[layer_right->getnDesign()];
-        layer_right->setMemory(right_design, right_gradient);
-    }
-
-}             
-
-
-  
-
-
-
-Network::~Network()
-{
-    /* Delete openlayer */
-    if (openlayer != NULL) delete openlayer;
-
-    /* Delete intermediate and classification layers */
-    for (int ilayer = 0; ilayer < nlayers_local; ilayer++)
-    {
-        delete layers[ilayer];
-    }
-    delete [] layers;
-
-    /* Delete design and gradient */
-    delete [] design;
-    delete [] gradient;
-
-    /* Delete neighbouring layer information */
-    if (layer_left != NULL)
-    {
-        delete [] layer_left->getWeights();
-        delete [] layer_left->getWeightsBar();
-        delete layer_left;
-    }
+void Network::createNetworkBlock(int StartLayerID, int EndLayerID,
+                                 Config *config, MPI_Comm Comm) {
+  /* Initilizize */
+  startlayerID = StartLayerID;
+  endlayerID = EndLayerID;
+  nlayers_local = endlayerID - startlayerID + 1;
+  nlayers_global = config->nlayers;
+  nchannels = config->nchannels;
+  dt = (config->T) / (MyReal)(config->nlayers - 2);  // nlayers-2 = nhiddenlayers
+  comm = Comm;
+
+  /* --- Create the layers --- */
+  ndesign_local = 0;
+
+  if (startlayerID == 0)  // Opening layer
+  {
+    /* Create the opening layer */
+    int index = -1;
+    openlayer = createLayer(index, config);
+    ndesign_local += openlayer->getnDesign();
+    // printf("Create opening layer %d, ndesign_local %d \n", index,
+    // openlayer->getnDesign());
+  }
+
+  layers = new Layer *[nlayers_local];  // Intermediate and classification layer
+  for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++) {
+    /* Create a layer at time step ilayer. Local storage at ilayer -
+     * startlayerID */
+    int storeID = getLocalID(ilayer);
+    layers[storeID] = createLayer(ilayer, config);
+    ndesign_local += layers[storeID]->getnDesign();
+    // printf("creating hidden/class layer %d/%d, ndesign_local%d\n", ilayer,
+    // nlayers_local, layers[storeID]->getnDesign());
+  }
+
+  /* Create left neighbouring layer */
+  int leftID = startlayerID - 1;
+  layer_left = createLayer(leftID, config);
+
+  /* Create right neighbrouing layer */
+  int rightID = endlayerID + 1;
+  layer_right = createLayer(rightID, config);
+
+  /* Sum up global number of design vars */
+  MPI_Allreduce(&ndesign_local, &ndesign_global, 1, MPI_INT, MPI_SUM, comm);
+
+  /* Store maximum number of designs over all layers layermax */
+  ndesign_layermax = computeLayermax();
+
+  /* Allocate memory for network design and gradient variables */
+  design = new MyReal[ndesign_local];
+  gradient = new MyReal[ndesign_local];
+
+  /* Set the memory locations for all layers */
+  int istart = 0;
+  if (openlayer != NULL)  // Openlayer on first processor
+  {
+    openlayer->setMemory(&(design[istart]), &(gradient[istart]));
+    istart += openlayer->getnDesign();
+  }
+  for (int ilayer = startlayerID; ilayer <= endlayerID;
+       ilayer++)  // intermediate and hidden layers
+  {
+    layers[getLocalID(ilayer)]->setMemory(&(design[istart]),
+                                          &(gradient[istart]));
+    istart += layers[getLocalID(ilayer)]->getnDesign();
+  }
+
+  /* left anr right neighbouring layer design, if exists */
+  if (layer_left != NULL) {
+    MyReal *left_design = new MyReal[layer_left->getnDesign()];
+    MyReal *left_gradient = new MyReal[layer_left->getnDesign()];
+    layer_left->setMemory(left_design, left_gradient);
+  }
+  /* Create and initialize right neighbouring layer design, if exists */
+  if (layer_right != NULL) {
+    MyReal *right_design = new MyReal[layer_right->getnDesign()];
+    MyReal *right_gradient = new MyReal[layer_right->getnDesign()];
+    layer_right->setMemory(right_design, right_gradient);
+  }
+}
 
-    if (layer_right != NULL)
-    {
-        delete [] layer_right->getWeights();
-        delete [] layer_right->getWeightsBar();
-        delete layer_right;
-    }
+Network::~Network() {
+  /* Delete openlayer */
+  if (openlayer != NULL) delete openlayer;
+
+  /* Delete intermediate and classification layers */
+  for (int ilayer = 0; ilayer < nlayers_local; ilayer++) {
+    delete layers[ilayer];
+  }
+  delete[] layers;
+
+  /* Delete design and gradient */
+  delete[] design;
+  delete[] gradient;
+
+  /* Delete neighbouring layer information */
+  if (layer_left != NULL) {
+    delete[] layer_left->getWeights();
+    delete[] layer_left->getWeightsBar();
+    delete layer_left;
+  }
+
+  if (layer_right != NULL) {
+    delete[] layer_right->getWeights();
+    delete[] layer_right->getWeightsBar();
+    delete layer_right;
+  }
 }
 
 int Network::getnChannels() { return nchannels; }
@@ -155,10 +164,9 @@ int Network::getnLayersGlobal() { return nlayers_global; }
 
 MyReal Network::getDT() { return dt; }
 
-int Network::getLocalID(int ilayer) 
-{
-    int idx = ilayer - startlayerID;
-    return idx;
+int Network::getLocalID(int ilayer) {
+  int idx = ilayer - startlayerID;
+  return idx;
 }
 
 MyReal Network::getLoss() { return loss; }
@@ -169,399 +177,359 @@ int Network::getnDesignLocal() { return ndesign_local; }
 
 int Network::getnDesignGlobal() { return ndesign_global; }
 
-MyReal* Network::getDesign() { return design; }
-       
-MyReal* Network::getGradient() { return gradient; }
+MyReal *Network::getDesign() { return design; }
+
+MyReal *Network::getGradient() { return gradient; }
 
 int Network::getStartLayerID() { return startlayerID; }
-int Network::getEndLayerID()   { return endlayerID; }
+int Network::getEndLayerID() { return endlayerID; }
 
 MPI_Comm Network::getComm() { return comm; }
 
-
-Layer* Network::createLayer(int     index,
-                            Config *config)
-{
-    Layer* layer = 0;
-    if (index == -1)  // Opening layer
-    {
-        switch ( config->network_type )
-{
-            case DENSE: 
-                if (config->weights_open_init == 0.0)
-                {
-                   layer  = new OpenExpandZero(config->nfeatures, nchannels);
-                }
-                else
-                {
-                   layer = new OpenDenseLayer(config->nfeatures, nchannels, config->activation, config->gamma_tik);
-                }
-                break;
-            case CONVOLUTIONAL:
-                /**< (Weight_open_init == 0.0) not needed for convolutional layers*/
-                if (config->openlayer_type == 0)
-                {
-                   layer = new OpenConvLayer(config->nfeatures, nchannels);
-                }
-                else if (config->openlayer_type == 1)
-                {
-                   layer = new OpenConvLayerMNIST(config->nfeatures, nchannels);
-                }
-                break;
+Layer *Network::createLayer(int index, Config *config) {
+  Layer *layer = 0;
+  if (index == -1)  // Opening layer
+  {
+    switch (config->network_type) {
+      case DENSE:
+        if (config->weights_open_init == 0.0) {
+          layer = new OpenExpandZero(config->nfeatures, nchannels);
+        } else {
+          layer = new OpenDenseLayer(config->nfeatures, nchannels,
+                                     config->activation, config->gamma_tik);
         }
-    }
-    else if (0 <= index && index < nlayers_global-2) // Intermediate layer
-    {
-        switch ( config->network_type )
-        {
-            case DENSE:
-                layer = new DenseLayer(index, nchannels, nchannels, dt, config->activation, config->gamma_tik, config->gamma_ddt);
-                break;
-            case CONVOLUTIONAL:
-                // TODO: Fix
-                int convolution_size = 3;
-                layer = new ConvLayer(index, nchannels, nchannels, convolution_size, nchannels/config->nfeatures, dt, config->activation, config->gamma_tik, config->gamma_ddt);
-                break;
+        break;
+      case CONVOLUTIONAL:
+        /**< (Weight_open_init == 0.0) not needed for convolutional layers*/
+        if (config->openlayer_type == 0) {
+          layer = new OpenConvLayer(config->nfeatures, nchannels);
+        } else if (config->openlayer_type == 1) {
+          layer = new OpenConvLayerMNIST(config->nfeatures, nchannels);
         }
-    }
-    else if (index == nlayers_global-2) // Classification layer 
-    {
-        layer = new ClassificationLayer(index, nchannels, config->nclasses, config->gamma_class);
-    }
-    else
-    {
-        layer = NULL;
-    }
-
-    return layer;
-}                        
-
-Layer* Network::getLayer(int layerindex)
-{
-    Layer* layer;
-
-    if (layerindex == -1)  // opening layer
-    {
-        layer = openlayer;
-    }
-    else if (layerindex == startlayerID - 1)  
-    {
-        layer = layer_left;
-    } 
-    else if (startlayerID <= layerindex && layerindex <= endlayerID) 
-    {
-        layer = layers[getLocalID(layerindex)];
-    }
-    else if (layerindex == endlayerID + 1)  
-    {
-        layer = layer_right;
-    }
-    else
-    {
-        layer = NULL;
-    }
+        break;
+    }
+  } else if (0 <= index && index < nlayers_global - 2)  // Intermediate layer
+  {
+    switch (config->network_type) {
+      case DENSE:
+        layer =
+            new DenseLayer(index, nchannels, nchannels, dt, config->activation,
+                           config->gamma_tik, config->gamma_ddt);
+        break;
+      case CONVOLUTIONAL:
+        // TODO: Fix
+        int convolution_size = 3;
+        layer =
+            new ConvLayer(index, nchannels, nchannels, convolution_size,
+                          nchannels / config->nfeatures, dt, config->activation,
+                          config->gamma_tik, config->gamma_ddt);
+        break;
+    }
+  } else if (index == nlayers_global - 2)  // Classification layer
+  {
+    layer = new ClassificationLayer(index, nchannels, config->nclasses,
+                                    config->gamma_class);
+  } else {
+    layer = NULL;
+  }
+
+  return layer;
+}
 
-    return layer;
+Layer *Network::getLayer(int layerindex) {
+  Layer *layer;
+
+  if (layerindex == -1)  // opening layer
+  {
+    layer = openlayer;
+  } else if (layerindex == startlayerID - 1) {
+    layer = layer_left;
+  } else if (startlayerID <= layerindex && layerindex <= endlayerID) {
+    layer = layers[getLocalID(layerindex)];
+  } else if (layerindex == endlayerID + 1) {
+    layer = layer_right;
+  } else {
+    layer = NULL;
+  }
+
+  return layer;
 }
 
 int Network::getnDesignLayermax() { return ndesign_layermax; }
 
-int Network::computeLayermax()
-{
-    int ndesignlayer;
-    int max = 0;
+int Network::computeLayermax() {
+  int ndesignlayer;
+  int max = 0;
 
-    /* Loop over all local layers */
-    for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++)
+  /* Loop over all local layers */
+  for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++) {
+    if (ilayer < nlayers_global - 2)  // excludes classification layer
     {
-        if (ilayer < nlayers_global-2) // excludes classification layer
-        {
-            /* Update maximum */
-            ndesignlayer = layers[getLocalID(ilayer)]->getnDesign();
-            if ( ndesignlayer > max)  max = ndesignlayer;
-        }
+      /* Update maximum */
+      ndesignlayer = layers[getLocalID(ilayer)]->getnDesign();
+      if (ndesignlayer > max) max = ndesignlayer;
     }
+  }
 
-    /* Get maximum over all local layer blocks */
-    int mymax = max;
-    MPI_Allreduce(&mymax, &max, 1, MPI_INT, MPI_MAX, comm);
+  /* Get maximum over all local layer blocks */
+  int mymax = max;
+  MPI_Allreduce(&mymax, &max, 1, MPI_INT, MPI_MAX, comm);
 
-    return max;
+  return max;
 }
 
-void Network::setInitialDesign(Config *config)
-{
-    MyReal  factor;
-    MyReal* design_init;
-    char    filename[255];
-    int myid;
-    MPI_Comm_rank(comm, &myid);
+void Network::setInitialDesign(Config *config) {
+  MyReal factor;
+  MyReal *design_init;
+  char filename[255];
+  int myid;
+  MPI_Comm_rank(comm, &myid);
 
-    /* Initialize design with random numbers (do on one processor and scatter for scaling test) */
-    if (myid == 0)
-    {
-        srand(1.0);
-        design_init = new MyReal[ndesign_global];
-        for (int i = 0; i < ndesign_global; i++)
-        {
-            design_init[i] = (MyReal) rand() / ((MyReal) RAND_MAX);
-        }
+  /* Initialize design with random numbers (do on one processor and scatter for
+   * scaling test) */
+  if (myid == 0) {
+    srand(1.0);
+    design_init = new MyReal[ndesign_global];
+    for (int i = 0; i < ndesign_global; i++) {
+      design_init[i] = (MyReal)rand() / ((MyReal)RAND_MAX);
     }
-    /* Scatter initial design to all processors */
-    MPI_ScatterVector(design_init, design, ndesign_local, 0, comm);
+  }
+  /* Scatter initial design to all processors */
+  MPI_ScatterVector(design_init, design, ndesign_local, 0, comm);
 
-    /* Scale the initial design by a factor and read from file, if set */
+  /* Scale the initial design by a factor and read from file, if set */
 
-    /* Opening layer on first processor */
-    if (startlayerID == 0)
-    {
-        /* Scale design by the factor */
-        factor = config->weights_open_init;
-        openlayer->scaleDesign(factor);
-
-        /* if set, overwrite opening design from file */
-        if (strcmp(config->weightsopenfile, "NONE") != 0)
-        {
-           sprintf(filename, "%s/%s", config->datafolder, config->weightsopenfile);
-           read_vector(filename, openlayer->getWeights(), openlayer->getnDesign());
-        }
-    }
+  /* Opening layer on first processor */
+  if (startlayerID == 0) {
+    /* Scale design by the factor */
+    factor = config->weights_open_init;
+    openlayer->scaleDesign(factor);
 
-    /* Intermediate (hidden) and classification layers */
-    for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++)
-    {
-        if (ilayer < nlayers_global-1) // Intermediate layer
-        {
-            factor = config->weights_init;
-        }
-        else // Classification layer 
-        {
-            factor = config->weights_class_init;
-        }
-
-        /* Set memory location and scale the current design by the factor */
-        int storeID = getLocalID(ilayer);
-        layers[storeID]->scaleDesign(factor);
-
-        /* if set, overwrite classification design from file */
-        if (ilayer == nlayers_global-1)
-        {
-            if (strcmp(config->weightsclassificationfile, "NONE") != 0)
-            {
-                sprintf(filename, "%s/%s", config->datafolder, config->weightsclassificationfile);
-                read_vector(filename, layers[storeID]->getWeights(), layers[storeID]->getnDesign());
-            }
-        }
+    /* if set, overwrite opening design from file */
+    if (strcmp(config->weightsopenfile, "NONE") != 0) {
+      sprintf(filename, "%s/%s", config->datafolder, config->weightsopenfile);
+      read_vector(filename, openlayer->getWeights(), openlayer->getnDesign());
     }
+  }
 
-    /* Communicate the neighbours across processors */
-    MPI_CommunicateNeighbours(comm);
-
-    if (myid == 0) delete [] design_init;
-
-}    
-
-
-void Network::MPI_CommunicateNeighbours(MPI_Comm comm)
-{
-    int myid, comm_size;
-    MPI_Comm_rank(comm, &myid);
-    MPI_Comm_size(comm, &comm_size);
-    MPI_Request sendlastreq, recvlastreq;
-    MPI_Request sendfirstreq, recvfirstreq;
-    MPI_Status status;
-
-    /* Allocate buffers */
-    int size_left = -1; 
-    int size_right = -1; 
-
-    MyReal* sendlast  = 0;
-    MyReal* recvlast  = 0; 
-    MyReal* sendfirst = 0;
-    MyReal* recvfirst = 0;
-
-    /* --- All but the first process receive the last layer from left neighbour --- */
-    if (myid > 0)
-    {
-        /* Receive from left neighbour */
-        int source = myid - 1;
-
-        size_left = layer_left->getnDesign();
-        recvlast  = new MyReal[size_left];
-
-        MPI_Irecv(recvlast, size_left, MPI_MyReal, source, 0, comm, &recvlastreq);
-    }
-
-    /* --- All but the last process sent their last layer to right neighbour --- */
-    if (myid < comm_size-1)
+  /* Intermediate (hidden) and classification layers */
+  for (int ilayer = startlayerID; ilayer <= endlayerID; ilayer++) {
+    if (ilayer < nlayers_global - 1)  // Intermediate layer
     {
-        size_left = layers[getLocalID(endlayerID)]->getnDesign();
-        sendlast  = new MyReal[size_left];
-        
-        /* Pack the last layer into a buffer */
-        layers[getLocalID(endlayerID)]->packDesign(sendlast, size_left);
-
-       /* Send to right neighbour */
-        int receiver = myid + 1;
-        MPI_Isend(sendlast, size_left, MPI_MyReal, receiver, 0, comm, &sendlastreq);
-    }
-
-    /* --- All but the last processor recv the first layer from the right neighbour --- */
-    if (myid < comm_size - 1)
-    {
-        /* Receive from right neighbour */
-        int source = myid + 1;
-
-        size_right = layer_right->getnDesign();
-        recvfirst  = new MyReal[size_right];
-
-        MPI_Irecv(recvfirst, size_right, MPI_MyReal, source, 1, comm, &recvfirstreq);
-    }
-
-
-    /* --- All but the first processor send their first layer to the left neighbour --- */
-    if (myid > 0)
+      factor = config->weights_init;
+    } else  // Classification layer
     {
-        size_right = layers[getLocalID(startlayerID)]->getnDesign();
-        sendfirst  = new MyReal[size_right];
-
-        /* Pack the first layer into a buffer */
-        layers[getLocalID(startlayerID)]->packDesign(sendfirst, size_right);
-
-        /* Send to left neighbour */
-        int receiver = myid - 1;
-        MPI_Isend(sendfirst, size_right, MPI_MyReal, receiver, 1, comm, &sendfirstreq);
+      factor = config->weights_class_init;
     }
 
+    /* Set memory location and scale the current design by the factor */
+    int storeID = getLocalID(ilayer);
+    layers[storeID]->scaleDesign(factor);
 
-    /* Wait to finish up communication */
-    if (myid > 0)              MPI_Wait(&recvlastreq, &status);
-    if (myid < comm_size - 1)  MPI_Wait(&sendlastreq, &status);
-    if (myid < comm_size - 1)  MPI_Wait(&recvfirstreq, &status);
-    if (myid > 0)              MPI_Wait(&sendfirstreq, &status);
-
-    /* Unpack and store the left received layer */
-    if (myid > 0)
-    {
-        layer_left->unpackDesign(recvlast);
+    /* if set, overwrite classification design from file */
+    if (ilayer == nlayers_global - 1) {
+      if (strcmp(config->weightsclassificationfile, "NONE") != 0) {
+        sprintf(filename, "%s/%s", config->datafolder,
+                config->weightsclassificationfile);
+        read_vector(filename, layers[storeID]->getWeights(),
+                    layers[storeID]->getnDesign());
+      }
     }
+  }
 
-    /* Unpack and store the right received layer */
-    if (myid < comm_size - 1)
-    {
-        layer_right->unpackDesign(recvfirst);
-    }
+  /* Communicate the neighbours across processors */
+  MPI_CommunicateNeighbours(comm);
 
-    /* Free the buffer */
-    if(sendlast!=0) delete [] sendlast;
-    if(recvlast!=0) delete [] recvlast;
-    if(sendfirst!=0) delete [] sendfirst;
-    if(recvfirst!=0) delete [] recvfirst;
+  if (myid == 0) delete[] design_init;
 }
 
+void Network::MPI_CommunicateNeighbours(MPI_Comm comm) {
+  int myid, comm_size;
+  MPI_Comm_rank(comm, &myid);
+  MPI_Comm_size(comm, &comm_size);
+  MPI_Request sendlastreq, recvlastreq;
+  MPI_Request sendfirstreq, recvfirstreq;
+  MPI_Status status;
+
+  /* Allocate buffers */
+  int size_left = -1;
+  int size_right = -1;
+
+  MyReal *sendlast = 0;
+  MyReal *recvlast = 0;
+  MyReal *sendfirst = 0;
+  MyReal *recvfirst = 0;
+
+  /* --- All but the first process receive the last layer from left neighbour
+   * --- */
+  if (myid > 0) {
+    /* Receive from left neighbour */
+    int source = myid - 1;
+
+    size_left = layer_left->getnDesign();
+    recvlast = new MyReal[size_left];
+
+    MPI_Irecv(recvlast, size_left, MPI_MyReal, source, 0, comm, &recvlastreq);
+  }
+
+  /* --- All but the last process sent their last layer to right neighbour ---
+   */
+  if (myid < comm_size - 1) {
+    size_left = layers[getLocalID(endlayerID)]->getnDesign();
+    sendlast = new MyReal[size_left];
+
+    /* Pack the last layer into a buffer */
+    layers[getLocalID(endlayerID)]->packDesign(sendlast, size_left);
+
+    /* Send to right neighbour */
+    int receiver = myid + 1;
+    MPI_Isend(sendlast, size_left, MPI_MyReal, receiver, 0, comm, &sendlastreq);
+  }
+
+  /* --- All but the last processor recv the first layer from the right
+   * neighbour --- */
+  if (myid < comm_size - 1) {
+    /* Receive from right neighbour */
+    int source = myid + 1;
+
+    size_right = layer_right->getnDesign();
+    recvfirst = new MyReal[size_right];
+
+    MPI_Irecv(recvfirst, size_right, MPI_MyReal, source, 1, comm,
+              &recvfirstreq);
+  }
+
+  /* --- All but the first processor send their first layer to the left
+   * neighbour --- */
+  if (myid > 0) {
+    size_right = layers[getLocalID(startlayerID)]->getnDesign();
+    sendfirst = new MyReal[size_right];
+
+    /* Pack the first layer into a buffer */
+    layers[getLocalID(startlayerID)]->packDesign(sendfirst, size_right);
+
+    /* Send to left neighbour */
+    int receiver = myid - 1;
+    MPI_Isend(sendfirst, size_right, MPI_MyReal, receiver, 1, comm,
+              &sendfirstreq);
+  }
+
+  /* Wait to finish up communication */
+  if (myid > 0) MPI_Wait(&recvlastreq, &status);
+  if (myid < comm_size - 1) MPI_Wait(&sendlastreq, &status);
+  if (myid < comm_size - 1) MPI_Wait(&recvfirstreq, &status);
+  if (myid > 0) MPI_Wait(&sendfirstreq, &status);
+
+  /* Unpack and store the left received layer */
+  if (myid > 0) {
+    layer_left->unpackDesign(recvlast);
+  }
+
+  /* Unpack and store the right received layer */
+  if (myid < comm_size - 1) {
+    layer_right->unpackDesign(recvfirst);
+  }
+
+  /* Free the buffer */
+  if (sendlast != 0) delete[] sendlast;
+  if (recvlast != 0) delete[] recvlast;
+  if (sendfirst != 0) delete[] sendfirst;
+  if (recvfirst != 0) delete[] recvfirst;
+}
 
-void Network::evalClassification(DataSet* data, 
-                                 MyReal** state,
-                                 int      output)
-{
-    MyReal *tmpstate = new MyReal[nchannels];
-    
-    int    class_id;
-    int    success, success_local;
-    FILE*  classfile;
-    ClassificationLayer* classificationlayer;
-    
-
-    /* Get classification layer */
-    classificationlayer = dynamic_cast<ClassificationLayer*>(getLayer(nlayers_global - 2));
-    if (classificationlayer == NULL) 
-    {
-        printf("\n ERROR: Network can't access classification layer!\n\n");
-        exit(1);
-    }
-
-    /* open file for printing predicted file */
-    if (output) classfile = fopen("classprediction.dat", "w");
-
-    loss     = 0.0;
-    accuracy = 0.0;
-    success = 0;
-    for (int iex = 0; iex < data->getnBatch(); iex++)
-    {
-        /* Copy values so that they are not overwrittn (they are needed for adjoint)*/
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-            tmpstate[ic] = state[iex][ic];
-        }
-        /* Apply classification on tmpstate */
-        classificationlayer->setLabel(data->getLabel(iex));
-        classificationlayer->applyFWD(tmpstate);
-        /* Evaluate Loss */
-        loss          += classificationlayer->crossEntropy(tmpstate);
-        success_local  = classificationlayer->prediction(tmpstate, &class_id);
-        success       += success_local;
-        if (output) fprintf(classfile, "%d   %d\n", class_id, success_local );
-    }
-    loss     = 1. / data->getnBatch() * loss;
-    accuracy = 100.0 * ( (MyReal) success ) / data->getnBatch();
-    // printf("Classification %d: %1.14e using layer %1.14e state %1.14e tmpstate[0] %1.14e\n", getIndex(), loss, weights[0], state[1][1], tmpstate[0]);
-
-    if (output) fclose(classfile);
-    if (output) printf("Prediction file written: classprediction.dat\n");
-
-    delete [] tmpstate;
-}       
-
-
-void Network::evalClassification_diff(DataSet* data, 
-                                      MyReal** primalstate,
-                                      MyReal** adjointstate,
-                                      int      compute_gradient)
-{
-    MyReal *tmpstate = new MyReal[nchannels];
-    ClassificationLayer* classificationlayer;
-    
-    /* Get classification layer */
-    classificationlayer = dynamic_cast<ClassificationLayer*>(getLayer(nlayers_global - 2));
-    if (classificationlayer == NULL) 
-    {
-        printf("\n ERROR: Network can't access classification layer!\n\n");
-        exit(1);
-    }
-
-    int    nbatch = data->getnBatch();
-    MyReal loss_bar = 1./nbatch; 
-    
-    for (int iex = 0; iex < nbatch; iex++)
-    {
-        /* Recompute the Classification */
-        for (int ic = 0; ic < nchannels; ic++)
-        {
-            tmpstate[ic] = primalstate[iex][ic];
-        }
-        classificationlayer->setLabel(data->getLabel(iex));
-        classificationlayer->applyFWD(tmpstate);
-
-        /* Derivative of Loss and classification. */
-        classificationlayer->crossEntropy_diff(tmpstate, adjointstate[iex],  loss_bar);
-        classificationlayer->applyBWD(primalstate[iex], adjointstate[iex], compute_gradient);
-    }
-    // printf("Classification_diff %d using layer %1.14e state %1.14e tmpstate %1.14e biasbar[dimOut-1] %1.14e\n", getIndex(), weights[0], primalstate[1][1], tmpstate[0], bias_bar[dim_Out-1]);
-
-    delete [] tmpstate;
+void Network::evalClassification(DataSet *data, MyReal **state, int output) {
+  MyReal *tmpstate = new MyReal[nchannels];
+
+  int class_id;
+  int success, success_local;
+  FILE *classfile;
+  ClassificationLayer *classificationlayer;
+
+  /* Get classification layer */
+  classificationlayer =
+      dynamic_cast<ClassificationLayer *>(getLayer(nlayers_global - 2));
+  if (classificationlayer == NULL) {
+    printf("\n ERROR: Network can't access classification layer!\n\n");
+    exit(1);
+  }
+
+  /* open file for printing predicted file */
+  if (output) classfile = fopen("classprediction.dat", "w");
+
+  loss = 0.0;
+  accuracy = 0.0;
+  success = 0;
+  for (int iex = 0; iex < data->getnBatch(); iex++) {
+    /* Copy values so that they are not overwrittn (they are needed for
+     * adjoint)*/
+    for (int ic = 0; ic < nchannels; ic++) {
+      tmpstate[ic] = state[iex][ic];
+    }
+    /* Apply classification on tmpstate */
+    classificationlayer->setLabel(data->getLabel(iex));
+    classificationlayer->applyFWD(tmpstate);
+    /* Evaluate Loss */
+    loss += classificationlayer->crossEntropy(tmpstate);
+    success_local = classificationlayer->prediction(tmpstate, &class_id);
+    success += success_local;
+    if (output) fprintf(classfile, "%d   %d\n", class_id, success_local);
+  }
+  loss = 1. / data->getnBatch() * loss;
+  accuracy = 100.0 * ((MyReal)success) / data->getnBatch();
+  // printf("Classification %d: %1.14e using layer %1.14e state %1.14e
+  // tmpstate[0] %1.14e\n", getIndex(), loss, weights[0], state[1][1],
+  // tmpstate[0]);
+
+  if (output) fclose(classfile);
+  if (output) printf("Prediction file written: classprediction.dat\n");
+
+  delete[] tmpstate;
+}
 
+void Network::evalClassification_diff(DataSet *data, MyReal **primalstate,
+                                      MyReal **adjointstate,
+                                      int compute_gradient) {
+  MyReal *tmpstate = new MyReal[nchannels];
+  ClassificationLayer *classificationlayer;
+
+  /* Get classification layer */
+  classificationlayer =
+      dynamic_cast<ClassificationLayer *>(getLayer(nlayers_global - 2));
+  if (classificationlayer == NULL) {
+    printf("\n ERROR: Network can't access classification layer!\n\n");
+    exit(1);
+  }
+
+  int nbatch = data->getnBatch();
+  MyReal loss_bar = 1. / nbatch;
+
+  for (int iex = 0; iex < nbatch; iex++) {
+    /* Recompute the Classification */
+    for (int ic = 0; ic < nchannels; ic++) {
+      tmpstate[ic] = primalstate[iex][ic];
+    }
+    classificationlayer->setLabel(data->getLabel(iex));
+    classificationlayer->applyFWD(tmpstate);
+
+    /* Derivative of Loss and classification. */
+    classificationlayer->crossEntropy_diff(tmpstate, adjointstate[iex],
+                                           loss_bar);
+    classificationlayer->applyBWD(primalstate[iex], adjointstate[iex],
+                                  compute_gradient);
+  }
+  // printf("Classification_diff %d using layer %1.14e state %1.14e tmpstate
+  // %1.14e biasbar[dimOut-1] %1.14e\n", getIndex(), weights[0],
+  // primalstate[1][1], tmpstate[0], bias_bar[dim_Out-1]);
+
+  delete[] tmpstate;
 }
 
-void Network::updateDesign(MyReal   stepsize,
-                           MyReal  *direction,
-                           MPI_Comm comm)
-{
-    /* Update design locally on this network-block */
-    for (int id = 0; id < ndesign_local; id++)
-    {
-        design[id] += stepsize * direction[id];
-    }
+void Network::updateDesign(MyReal stepsize, MyReal *direction, MPI_Comm comm) {
+  /* Update design locally on this network-block */
+  for (int id = 0; id < ndesign_local; id++) {
+    design[id] += stepsize * direction[id];
+  }
 
-    /* Communicate design across neighbouring processors (ghostlayers) */
-    MPI_CommunicateNeighbours(comm);
-}                   
\ No newline at end of file
+  /* Communicate design across neighbouring processors (ghostlayers) */
+  MPI_CommunicateNeighbours(comm);
+}
diff --git a/src/util.cpp b/src/util.cpp
index 5a58260..f0f2f0d 100644
--- a/src/util.cpp
+++ b/src/util.cpp
@@ -1,148 +1,120 @@
 #include "util.hpp"
 
-void read_matrix(char    *filename, 
-               MyReal **var, 
-               int      dimx, 
-               int      dimy)
-{
-   FILE   *file;
-   MyReal  tmp;
-
-   /* Open file */
-   file = fopen(filename, "r");
-   if (file == NULL)
-   {
-      printf("Can't open %s \n", filename);
-      exit(1);
-   }
-
-   /* Read data */
-   printf("Reading file %s\n", filename);
-   for (int ix = 0; ix < dimx; ix++)
-   {
-       for (int iy = 0; iy < dimy; iy++)
-       {
-            fscanf(file, "%lf", &tmp);
-            var[ix][iy] = tmp;
-       }
-   }
-
-   fclose(file);
+void read_matrix(char *filename, MyReal **var, int dimx, int dimy) {
+  FILE *file;
+  MyReal tmp;
+
+  /* Open file */
+  file = fopen(filename, "r");
+  if (file == NULL) {
+    printf("Can't open %s \n", filename);
+    exit(1);
+  }
+
+  /* Read data */
+  printf("Reading file %s\n", filename);
+  for (int ix = 0; ix < dimx; ix++) {
+    for (int iy = 0; iy < dimy; iy++) {
+      fscanf(file, "%lf", &tmp);
+      var[ix][iy] = tmp;
+    }
+  }
+
+  fclose(file);
 }
 
-void read_vector(char *filename, 
-                 MyReal *var, 
-                 int      dimx)
-{
-   FILE   *file;
-   MyReal  tmp;
-
-   /* Open file */
-   file = fopen(filename, "r");
-   if (file == NULL)
-   {
-      printf("Can't open %s \n", filename);
-      exit(1);
-   }
-
-   /* Read data */
-   printf("Reading file %s\n", filename);
-   for (int ix = 0; ix < dimx; ix++)
-   {
-            fscanf(file, "%lf", &tmp);
-            var[ix] = tmp;
-   }
-
-   fclose(file);
+void read_vector(char *filename, MyReal *var, int dimx) {
+  FILE *file;
+  MyReal tmp;
+
+  /* Open file */
+  file = fopen(filename, "r");
+  if (file == NULL) {
+    printf("Can't open %s \n", filename);
+    exit(1);
+  }
+
+  /* Read data */
+  printf("Reading file %s\n", filename);
+  for (int ix = 0; ix < dimx; ix++) {
+    fscanf(file, "%lf", &tmp);
+    var[ix] = tmp;
+  }
+
+  fclose(file);
 }
 
+void write_vector(char *filename, MyReal *var, int dimN) {
+  FILE *file;
+  int i;
+
+  /* open file */
+  file = fopen(filename, "w");
+  if (file == NULL) {
+    printf("Can't open %s \n", filename);
+    exit(1);
+  }
+
+  /* Write data */
+  printf("Writing file %s\n", filename);
+  for (i = 0; i < dimN; i++) {
+    fprintf(file, "%1.14e\n", var[i]);
+  }
+
+  /* close file */
+  fclose(file);
+}
 
-void write_vector(char   *filename,
-                  MyReal * var, 
-                  int      dimN)
-{
-   FILE *file;
-   int i;
-
-   /* open file */
-   file = fopen(filename, "w");
-   if (file == NULL)
-   {
-      printf("Can't open %s \n", filename);
-      exit(1);
-   }
-
-   /* Write data */
-   printf("Writing file %s\n", filename);
-   for ( i = 0; i < dimN; i++)
-   {
-      fprintf(file, "%1.14e\n", var[i]);
-   }
-
-   /* close file */
-   fclose(file);
-
-}            
-
-void MPI_GatherVector(MyReal*  sendbuffer,
-                      int      localsendcount,
-                      MyReal*  recvbuffer,
-                      int      rootprocessID,
-                      MPI_Comm comm)
-{
-    int comm_size;
-    MPI_Comm_size(comm, &comm_size);
-
-    int* recvcount = new int[comm_size];
-    int* displs    = new int[comm_size];
-
-    /* Gather the local send counts and store in recvcount vector on root */
-    MPI_Gather(&localsendcount, 1, MPI_INT, recvcount , 1, MPI_INT, rootprocessID, comm);
-
-    /* Compute displacement vector */
-    displs[0] = 0;
-    for (int i=1; i<comm_size; i++)
-    {
-        displs[i] = displs[i-1] + recvcount [i-1];
-    }
+void MPI_GatherVector(MyReal *sendbuffer, int localsendcount,
+                      MyReal *recvbuffer, int rootprocessID, MPI_Comm comm) {
+  int comm_size;
+  MPI_Comm_size(comm, &comm_size);
 
-    /* Gatherv the vector */
-    MPI_Gatherv(sendbuffer, localsendcount, MPI_MyReal, recvbuffer, recvcount , displs, MPI_MyReal, rootprocessID, comm);
+  int *recvcount = new int[comm_size];
+  int *displs = new int[comm_size];
 
-    /* Clean up */
-    delete [] recvcount;
-    delete [] displs;
-}                  
+  /* Gather the local send counts and store in recvcount vector on root */
+  MPI_Gather(&localsendcount, 1, MPI_INT, recvcount, 1, MPI_INT, rootprocessID,
+             comm);
 
+  /* Compute displacement vector */
+  displs[0] = 0;
+  for (int i = 1; i < comm_size; i++) {
+    displs[i] = displs[i - 1] + recvcount[i - 1];
+  }
 
+  /* Gatherv the vector */
+  MPI_Gatherv(sendbuffer, localsendcount, MPI_MyReal, recvbuffer, recvcount,
+              displs, MPI_MyReal, rootprocessID, comm);
 
-void MPI_ScatterVector(MyReal*  sendbuffer,
-                       MyReal*  recvbuffer,
-                       int      localrecvcount,
-                       int      rootprocessID,
-                       MPI_Comm comm)
-{
-    int comm_size;
-    MPI_Comm_size(comm, &comm_size);
+  /* Clean up */
+  delete[] recvcount;
+  delete[] displs;
+}
 
-    int* sendcount = new int[comm_size];
-    int* displs    = new int[comm_size];
+void MPI_ScatterVector(MyReal *sendbuffer, MyReal *recvbuffer,
+                       int localrecvcount, int rootprocessID, MPI_Comm comm) {
+  int comm_size;
+  MPI_Comm_size(comm, &comm_size);
 
-    /* Gather the local recveive counts and store in sendcount for root */
-    MPI_Gather(&localrecvcount, 1, MPI_INT, sendcount , 1, MPI_INT, rootprocessID, comm);
+  int *sendcount = new int[comm_size];
+  int *displs = new int[comm_size];
 
-    /* Compute displacement vector */
-    displs[0] = 0;
-    for (int i=1; i<comm_size; i++)
-    {
-        displs[i] = displs[i-1] + sendcount [i-1];
-    }
+  /* Gather the local recveive counts and store in sendcount for root */
+  MPI_Gather(&localrecvcount, 1, MPI_INT, sendcount, 1, MPI_INT, rootprocessID,
+             comm);
 
-    /* Gatherv the vector */
-    MPI_Scatterv(sendbuffer, sendcount, displs, MPI_MyReal, recvbuffer, localrecvcount, MPI_MyReal, rootprocessID, comm);
+  /* Compute displacement vector */
+  displs[0] = 0;
+  for (int i = 1; i < comm_size; i++) {
+    displs[i] = displs[i - 1] + sendcount[i - 1];
+  }
 
-    /* Clean up */
-    delete [] sendcount;
-    delete [] displs;
+  /* Gatherv the vector */
+  MPI_Scatterv(sendbuffer, sendcount, displs, MPI_MyReal, recvbuffer,
+               localrecvcount, MPI_MyReal, rootprocessID, comm);
 
-}                  
\ No newline at end of file
+  /* Clean up */
+  delete[] sendcount;
+  delete[] displs;
+}
\ No newline at end of file
diff --git a/testing/peaks.cfg b/testing/peaks.cfg
index ed67381..842edc5 100644
--- a/testing/peaks.cfg
+++ b/testing/peaks.cfg
@@ -3,22 +3,22 @@
 ################################
 
 # relative data folder location 
-datafolder = data
-# training examples filename
-ftrain_ex = Ytrain_orig.dat
-# training labels/classes filename 
-ftrain_labels = Ctrain_orig.dat
-# validation examples filename
-fval_ex = Yval_orig.dat
-# validation labels/classes filename
-fval_labels = Cval_orig.dat
-# number of training data elements
+datafolder = ../examples/peaks/
+# filename of training data feature vectors
+ftrain_ex = features_training.dat
+# filename of training data labels/classes
+ftrain_labels = labels_training.dat
+# filename of validation data feature 
+fval_ex = features_validation.dat
+# filename of validation data labels/classes 
+fval_labels = labels_validation.dat
+# number of training data elements (that many lines will be read!) 
 ntraining = 5000
-# number of validation data elements
+# number of validation data elements (that many lines will be read!)
 nvalidation = 200
-# number of features in training and validation examples 
+# number of features within the training and validation data set
 nfeatures = 2
-# number of labels/classes
+# number of labels/classes within the training and validation data set
 nclasses = 5
 
 # filename for opening weights and bias (set to NONE if not given)
@@ -32,10 +32,10 @@ weightsclassificationfile = NONE
 
 # number of channels
 nchannels = 8
-# number of layers (minimum two, opening layer and classification layer)
-nlayers = 30
+# number of layers (including opening layer and classification layer) (nlayer >= 3 !)
+nlayers = 32    
 # final time
-T = 5.0
+T = 1.0
 # Activation function ("tanh" or "ReLu" or "SmoothReLu")
 activation = SmoothReLu
 # Type of network ("dense" the default, or "convolutional")
@@ -45,14 +45,14 @@ network_type = dense
 #  "activate": same as replicate, only apply tuned, shifted tanh activation function for MNIST. 
 type_openlayer = activate
 # factor for scaling initial opening layer weights and bias
-weights_open_init = 0e-3
+weights_open_init = 1e-3
 # factor for scaling initial weights and bias of intermediate layers
-weights_init = 1e-3
+weights_init = 0e-3
 # factor for scaling initial classification weights and bias 
 weights_class_init = 1e-3
 
 ################################
-#BRAID 
+# XBraid 
 ################################
 
 # coarsening factor on level 0
@@ -62,15 +62,15 @@ braid_cfactor0 = 2
 # coarsening factor on all other levels
 braid_cfactor = 2 
 # maximum number of levels 
-braid_maxlevels = 10
+braid_maxlevels = 1
 # minimum allowed coarse time time grid size (values in 10-30 are usually best)
 braid_mincoarse = 10
 # maximum number of iterations
-braid_maxiter = 2
+braid_maxiter = 15
 # absolute tolerance
-braid_abstol = 1e-15
+braid_abstol = 1e-10
 # absolute adjoint tolerance
-braid_adjtol = 1e-15
+braid_adjtol = 1e-10
 # printlevel
 braid_printlevel = 1
 # access level
@@ -85,28 +85,28 @@ braid_nrelax = 1
 braid_nrelax0 = 0
 
 ####################################
-#Optimization
+# Optimization
 ####################################
 # Type of batch selection ("deterministic" or "stochastic")
 batch_type = deterministic
 # Batch size
-nbatch = 5000
+nbatch = 200
 # relaxation param for tikhonov term
 gamma_tik = 1e-7
 # relaxation param for time-derivative term
-gamma_ddt = 1e-7
+gamma_ddt = 1e-5
 # relaxation param for tikhonov term of classification weights 
 gamma_class = 1e-7
 # stepsize selection type ("fixed" or "backtrackingLS" or "oneoverk")
 # determines how to choose alpha in design update x_new = x_old - alpha * direction
 # fixed          : constant alpha being the initial stepsize
-# backtrackingLS : find alpha from backtracking linesearch, starting with initial stepsize
+# backtrackingLS : find alpha from backtracking linesearch, starting at initial stepsize
 # oneoverk       : alpha = 1/k  where k is the current optimization iteration index
 stepsize_type = backtrackingLS
-# initial stepsize for fixed alpha and backtracking linesearch selection 
+# initial stepsize
 stepsize = 1.0
 # maximum number of optimization iterations
-optim_maxiter = 20
+optim_maxiter = 100
 # absolute stopping criterion for the gradient norm
 gtol = 1e-4
 # maximum number of linesearch iterations
@@ -117,5 +117,8 @@ ls_factor = 0.5
 hessian_approx = L-BFGS
 # number of stages for l-bfgs method 
 lbfgs_stages = 20
-# level for validation computation: 0 - validate only after optimization finishes. 1 - validate in each optimization iteration
+# level for validation computation: 
+#  -1 = never validate
+#   0 = validate only after optimization finishes. 
+#   1 = validate in each optimization iteration
 validationlevel = 1
diff --git a/testing/peaks.npt1.ml1.optim.dat b/testing/peaks.npt1.ml1.optim.dat
index 8dfb502..9ab3943 100644
--- a/testing/peaks.npt1.ml1.optim.dat
+++ b/testing/peaks.npt1.ml1.optim.dat
@@ -1,15 +1,15 @@
-# Problem setup: datafolder           data 
-#                training examples    Ytrain_orig.dat 
-#                training labels      Ctrain_orig.dat 
-#                validation examples  Yval_orig.dat 
-#                validation labels    Cval_orig.dat 
+# Problem setup: datafolder           ../examples/peaks/ 
+#                training examples    features_training.dat 
+#                training labels      labels_training.dat 
+#                validation examples  features_validation.dat 
+#                validation labels    labels_validation.dat 
 #                ntraining            5000 
 #                nvalidation          200 
 #                nfeatures            2 
 #                nclasses             5 
 #                nchannels            8 
-#                nlayers              30 
-#                T                    5.000000 
+#                nlayers              32 
+#                T                    1.000000 
 #                network type         dense 
 #                Activation           SmoothReLU 
 #                openlayer type       1 
@@ -17,9 +17,9 @@
 #                min coarse           10 
 #                coasening            2 
 #                coasening (level 0)  2 
-#                max. braid iter      2 
-#                abs. tol             1e-15 
-#                abs. toladj          1e-15 
+#                max. braid iter      15 
+#                abs. tol             1e-10 
+#                abs. toladj          1e-10 
 #                print level          1 
 #                access level         0 
 #                skip?                0 
@@ -27,41 +27,31 @@
 #                nrelax (level 0)     0 
 #                nrelax               1 
 # Optimization:  optimization type    deterministic 
-#                nbatch               5000 
+#                nbatch               200 
 #                gamma_tik            1e-07 
-#                gamma_ddt            1e-07 
+#                gamma_ddt            1e-05 
 #                gamma_class          1e-07 
 #                stepsize type        backtracking line-search 
 #                stepsize             1.000000 
-#                max. optim iter      20 
+#                max. optim iter      100 
 #                gtol                 1e-04 
 #                max. ls iter         20 
 #                ls factor            0.500000 
-#                weights_init         0.001000 
-#                weights_open_init    0.000000 
+#                weights_init         0.000000 
+#                weights_open_init    0.001000 
 #                weights_class_init   0.001000 
 #                hessianapprox_type   L-BFGS 
 #                lbfgs_stages         20 
 #                validationlevel      1 
 
 #    || r ||          || r_adj ||      Objective             Loss                  || grad ||            Stepsize  ls_iter   Accur_train  Accur_val   Time(sec)
-000  -1.00000000e+00  -1.00000000e+00  1.60941989767865e+00  1.60941989720218e+00  5.47547521438103e-01  1.000000   0        20.18%      21.00%     0.2
-001  -1.00000000e+00  -1.00000000e+00  1.38071340186225e+00  1.38071338639355e+00  3.06051904945471e-01  1.000000   0        48.00%      46.50%     0.6
-002  -1.00000000e+00  -1.00000000e+00  1.23596247912580e+00  1.23596238748763e+00  1.29397210079374e-01  1.000000   0        50.78%      48.50%     0.9
-003  -1.00000000e+00  -1.00000000e+00  1.20341971297680e+00  1.20341956950837e+00  1.18843456996184e-01  1.000000   0        56.24%      54.50%     1.2
-004  -1.00000000e+00  -1.00000000e+00  1.18418727997486e+00  1.18418706821137e+00  7.57493851234179e-02  1.000000   0        57.00%      54.00%     1.6
-005  -1.00000000e+00  -1.00000000e+00  1.17681154387789e+00  1.17681130238162e+00  4.92897668623518e-02  1.000000   0        58.78%      56.00%     1.9
-006  -1.00000000e+00  -1.00000000e+00  1.16938781109130e+00  1.16938754241913e+00  5.63354947836319e-02  1.000000   0        58.22%      54.00%     2.3
-007  -1.00000000e+00  -1.00000000e+00  1.15847109277098e+00  1.15847078889672e+00  8.74543241159731e-02  1.000000   0        54.00%      50.00%     2.7
-008  -1.00000000e+00  -1.00000000e+00  1.15763751519476e+00  1.15763709061927e+00  2.71593130325443e-01  1.000000   0        56.40%      53.50%     3.0
-009  -1.00000000e+00  -1.00000000e+00  1.14274573639825e+00  1.14274537295253e+00  1.29627926674329e-01  1.000000   0        52.68%      46.00%     3.3
-010  -1.00000000e+00  -1.00000000e+00  1.14122719808983e+00  1.14122681274333e+00  1.55775797203811e-01  1.000000   0        53.10%      48.00%     3.6
-011  -1.00000000e+00  -1.00000000e+00  1.13494201952478e+00  1.13494161706763e+00  6.57957962169811e-02  1.000000   0        55.94%      51.00%     4.0
-012  -1.00000000e+00  -1.00000000e+00  1.13145597537932e+00  1.13145550124762e+00  1.26247044561169e-01  1.000000   0        51.98%      49.50%     4.3
-013  -1.00000000e+00  -1.00000000e+00  1.12368754549753e+00  1.12368705376703e+00  1.10635760888726e-01  1.000000   0        55.82%      54.00%     4.6
-014  -1.00000000e+00  -1.00000000e+00  1.11155460188595e+00  1.11155398029660e+00  1.39385721056845e-01  1.000000   0        57.88%      56.00%     4.9
-015  -1.00000000e+00  -1.00000000e+00  1.10515556053970e+00  1.10515489947227e+00  1.13781734395289e-01  1.000000   0        60.94%      57.00%     5.3
-016  -1.00000000e+00  -1.00000000e+00  1.09562590165864e+00  1.09562515860787e+00  2.52376513334094e-01  1.000000   0        60.68%      57.50%     5.6
-017  -1.00000000e+00  -1.00000000e+00  1.08197155968558e+00  1.08197063660131e+00  5.55013786931782e-01  1.000000   0        59.42%      56.00%     5.9
-018  -1.00000000e+00  -1.00000000e+00  1.07924452229731e+00  1.07924359893069e+00  6.18277564731762e-01  1.000000   0        58.32%      57.00%     6.2
-019  -1.00000000e+00  -1.00000000e+00  1.07415528298030e+00  1.07415447170882e+00  2.75740177515614e-01  0.500000   1        59.28%      55.50%     6.6
+000  -1.00000000e+00  -1.00000000e+00  1.60943791243446e+00  1.60943791243409e+00  9.03286343783022e-01  1.000000   0        100.00%      20.00%     0.0
+001  -1.00000000e+00  -1.00000000e+00  8.93027383470424e-01  8.93027342673751e-01  6.81237374674309e-01  1.000000   0        100.00%      20.00%     0.0
+002  -1.00000000e+00  -1.00000000e+00  1.05036886903368e-03  1.04931297594737e-03  6.51136165937218e-03  1.000000   0        100.00%      20.00%     0.1
+003  -1.00000000e+00  -1.00000000e+00  8.38924054937610e-04  8.37860415154911e-04  5.27459181685681e-03  1.000000   0        100.00%      20.00%     0.1
+004  -1.00000000e+00  -1.00000000e+00  3.09469131150244e-04  3.08370156887922e-04  2.07090184605933e-03  1.000000   0        100.00%      20.00%     0.1
+005  -1.00000000e+00  -1.00000000e+00  1.56742722262840e-04  1.55619105190084e-04  1.09169627784517e-03  1.000000   0        100.00%      20.00%     0.1
+006  -1.00000000e+00  -1.00000000e+00  7.10282645639592e-05  6.98756630906118e-05  5.15099197829775e-04  1.000000   0        100.00%      20.00%     0.1
+007  -1.00000000e+00  -1.00000000e+00  3.41186662095647e-05  3.29388953025643e-05  2.53915778855823e-04  1.000000   0        100.00%      20.00%     0.2
+008  -1.00000000e+00  -1.00000000e+00  1.65032674860585e-05  1.52959730466568e-05  1.23156345893023e-04  1.000000   0        100.00%      20.00%     0.2
+009  -1.00000000e+00  -1.00000000e+00  8.40503071448269e-06  7.17084153786459e-06  6.01241929460480e-05  1.000000   0        100.00%      20.00%     0.2
diff --git a/testing/peaks.npt1.ml10.optim.dat b/testing/peaks.npt1.ml10.optim.dat
index 407c132..837f75d 100644
--- a/testing/peaks.npt1.ml10.optim.dat
+++ b/testing/peaks.npt1.ml10.optim.dat
@@ -1,15 +1,15 @@
-# Problem setup: datafolder           data 
-#                training examples    Ytrain_orig.dat 
-#                training labels      Ctrain_orig.dat 
-#                validation examples  Yval_orig.dat 
-#                validation labels    Cval_orig.dat 
+# Problem setup: datafolder           ../examples/peaks/ 
+#                training examples    features_training.dat 
+#                training labels      labels_training.dat 
+#                validation examples  features_validation.dat 
+#                validation labels    labels_validation.dat 
 #                ntraining            5000 
 #                nvalidation          200 
 #                nfeatures            2 
 #                nclasses             5 
 #                nchannels            8 
-#                nlayers              30 
-#                T                    5.000000 
+#                nlayers              32 
+#                T                    1.000000 
 #                network type         dense 
 #                Activation           SmoothReLU 
 #                openlayer type       1 
@@ -17,9 +17,9 @@
 #                min coarse           10 
 #                coasening            2 
 #                coasening (level 0)  2 
-#                max. braid iter      2 
-#                abs. tol             1e-15 
-#                abs. toladj          1e-15 
+#                max. braid iter      15 
+#                abs. tol             1e-10 
+#                abs. toladj          1e-10 
 #                print level          1 
 #                access level         0 
 #                skip?                0 
@@ -27,41 +27,31 @@
 #                nrelax (level 0)     0 
 #                nrelax               1 
 # Optimization:  optimization type    deterministic 
-#                nbatch               5000 
+#                nbatch               200 
 #                gamma_tik            1e-07 
-#                gamma_ddt            1e-07 
+#                gamma_ddt            1e-05 
 #                gamma_class          1e-07 
 #                stepsize type        backtracking line-search 
 #                stepsize             1.000000 
-#                max. optim iter      20 
+#                max. optim iter      100 
 #                gtol                 1e-04 
 #                max. ls iter         20 
 #                ls factor            0.500000 
-#                weights_init         0.001000 
-#                weights_open_init    0.000000 
+#                weights_init         0.000000 
+#                weights_open_init    0.001000 
 #                weights_class_init   0.001000 
 #                hessianapprox_type   L-BFGS 
 #                lbfgs_stages         20 
 #                validationlevel      1 
 
 #    || r ||          || r_adj ||      Objective             Loss                  || grad ||            Stepsize  ls_iter   Accur_train  Accur_val   Time(sec)
-000  1.05426534e-05  7.01718976e-13  1.60941989766493e+00  1.60941989718846e+00  5.47547519325756e-01  1.000000   0        20.18%      21.00%     1.2
-001  1.57996193e-17  2.03041298e-10  1.38071340216910e+00  1.38071338670040e+00  3.06050567716981e-01  1.000000   0        48.00%      46.50%     3.0
-002  7.29308604e-13  4.22213509e-10  1.23596253137683e+00  1.23596243974067e+00  1.29391394257781e-01  1.000000   0        50.78%      48.50%     4.8
-003  9.81124600e-13  2.07669782e-10  1.20341417342464e+00  1.20341402994788e+00  1.18831935496275e-01  1.000000   0        56.24%      54.50%     6.5
-004  6.26066790e-12  4.49854737e-10  1.18418442453580e+00  1.18418421274440e+00  7.58416506674437e-02  1.000000   0        56.98%      54.00%     8.3
-005  6.78105919e-12  2.17426183e-10  1.17681138506102e+00  1.17681114353766e+00  4.93204360796171e-02  1.000000   0        58.80%      56.00%     10.0
-006  2.30351731e-11  3.40981732e-10  1.16939416065256e+00  1.16939389207214e+00  5.63229160588530e-02  1.000000   0        58.22%      54.00%     11.9
-007  8.88623265e-11  7.08703623e-10  1.15848320762452e+00  1.15848290381382e+00  8.77505478477174e-02  1.000000   0        54.00%      50.00%     13.6
-008  1.53502187e-09  3.43627498e-09  1.15752458099280e+00  1.15752415760565e+00  2.70258261860996e-01  1.000000   0        56.42%      53.50%     15.3
-009  3.35426627e-10  2.10759273e-09  1.14272215805333e+00  1.14272179436899e+00  1.28969655129404e-01  1.000000   0        52.74%      46.00%     16.9
-010  8.65490676e-11  1.05329993e-09  1.14123385478706e+00  1.14123346978230e+00  1.55750623407490e-01  1.000000   0        53.14%      48.00%     18.6
-011  2.41000293e-10  5.31010337e-10  1.13494612729255e+00  1.13494572513913e+00  6.54530806420039e-02  1.000000   0        55.90%      51.00%     20.3
-012  1.84660098e-09  1.94338217e-09  1.13142768073210e+00  1.13142720629247e+00  1.25443138882351e-01  1.000000   0        51.96%      49.00%     21.9
-013  8.54540924e-10  1.12055818e-09  1.12358278356824e+00  1.12358229180074e+00  1.11310852815511e-01  1.000000   0        55.88%      54.50%     23.6
-014  1.64024146e-08  7.14719084e-09  1.11134576834466e+00  1.11134514381860e+00  1.40185628559086e-01  1.000000   0        57.94%      56.50%     25.2
-015  9.73572966e-09  2.81897201e-09  1.10494151034690e+00  1.10494084600873e+00  1.15680085929700e-01  1.000000   0        60.98%      57.00%     26.9
-016  3.27161110e-08  5.09705006e-09  1.09542616968119e+00  1.09542542799467e+00  2.51017228904287e-01  1.000000   0        60.68%      57.50%     28.5
-017  3.55387932e-07  1.33789499e-08  1.08242275846187e+00  1.08242181907883e+00  5.74669282315359e-01  1.000000   0        59.18%      57.50%     30.2
-018  1.35900404e-08  1.27840638e-08  1.07817898750936e+00  1.07817806809747e+00  3.68508030727317e-01  1.000000   0        58.34%      56.00%     31.8
-019  9.54593730e-08  1.10671022e-08  1.07641618944747e+00  1.07641542511829e+00  5.40433679343467e-01  1.000000   0        58.12%      53.50%     33.4
+000  1.48762021e-18  0.00000000e+00  1.60943791243446e+00  1.60943791243409e+00  9.03286343783022e-01  1.000000   0        100.00%      20.00%     0.1
+001  4.87268972e-19  0.00000000e+00  8.93027383470424e-01  8.93027342673751e-01  6.81237374674309e-01  1.000000   0        100.00%      20.00%     0.2
+002  1.20657289e-16  9.58293526e-11  1.05036886903368e-03  1.04931297594737e-03  6.51137546917077e-03  1.000000   0        100.00%      20.00%     0.3
+003  8.86998023e-17  1.34391587e-13  8.38923640767822e-04  8.37860001146083e-04  5.27458994776971e-03  1.000000   0        100.00%      20.00%     0.4
+004  1.21309331e-16  5.43079170e-13  3.09474514735135e-04  3.08375531427215e-04  2.07093191405256e-03  1.000000   0        100.00%      20.00%     0.6
+005  1.43429471e-16  2.41778398e-13  1.56745527599925e-04  1.55621886721240e-04  1.09171100040307e-03  1.000000   0        100.00%      20.00%     0.7
+006  1.78697630e-16  2.03233182e-13  7.10300937789059e-05  6.98774426357128e-05  5.15108834175264e-04  1.000000   0        100.00%      20.00%     0.8
+007  2.10635379e-16  1.27882822e-13  3.41197750696654e-05  3.29399208824851e-05  2.53921341908186e-04  1.000000   0        100.00%      20.00%     0.9
+008  2.22144503e-16  8.82507913e-14  1.65039906218550e-05  1.52965683800520e-05  1.23159632420279e-04  1.000000   0        100.00%      20.00%     1.1
+009  2.60100400e-16  5.80874925e-14  8.40555413834112e-06  7.17118187402434e-06  6.01261027573695e-05  1.000000   0        100.00%      20.00%     1.2
diff --git a/testing/peaks.npt2.ml1.optim.dat b/testing/peaks.npt2.ml1.optim.dat
index f1a8438..11fec9f 100644
--- a/testing/peaks.npt2.ml1.optim.dat
+++ b/testing/peaks.npt2.ml1.optim.dat
@@ -1,15 +1,15 @@
-# Problem setup: datafolder           data 
-#                training examples    Ytrain_orig.dat 
-#                training labels      Ctrain_orig.dat 
-#                validation examples  Yval_orig.dat 
-#                validation labels    Cval_orig.dat 
+# Problem setup: datafolder           ../examples/peaks/ 
+#                training examples    features_training.dat 
+#                training labels      labels_training.dat 
+#                validation examples  features_validation.dat 
+#                validation labels    labels_validation.dat 
 #                ntraining            5000 
 #                nvalidation          200 
 #                nfeatures            2 
 #                nclasses             5 
 #                nchannels            8 
-#                nlayers              30 
-#                T                    5.000000 
+#                nlayers              32 
+#                T                    1.000000 
 #                network type         dense 
 #                Activation           SmoothReLU 
 #                openlayer type       1 
@@ -17,9 +17,9 @@
 #                min coarse           10 
 #                coasening            2 
 #                coasening (level 0)  2 
-#                max. braid iter      2 
-#                abs. tol             1e-15 
-#                abs. toladj          1e-15 
+#                max. braid iter      15 
+#                abs. tol             1e-10 
+#                abs. toladj          1e-10 
 #                print level          1 
 #                access level         0 
 #                skip?                0 
@@ -27,41 +27,31 @@
 #                nrelax (level 0)     0 
 #                nrelax               1 
 # Optimization:  optimization type    deterministic 
-#                nbatch               5000 
+#                nbatch               200 
 #                gamma_tik            1e-07 
-#                gamma_ddt            1e-07 
+#                gamma_ddt            1e-05 
 #                gamma_class          1e-07 
 #                stepsize type        backtracking line-search 
 #                stepsize             1.000000 
-#                max. optim iter      20 
+#                max. optim iter      100 
 #                gtol                 1e-04 
 #                max. ls iter         20 
 #                ls factor            0.500000 
-#                weights_init         0.001000 
-#                weights_open_init    0.000000 
+#                weights_init         0.000000 
+#                weights_open_init    0.001000 
 #                weights_class_init   0.001000 
 #                hessianapprox_type   L-BFGS 
 #                lbfgs_stages         20 
 #                validationlevel      1 
 
 #    || r ||          || r_adj ||      Objective             Loss                  || grad ||            Stepsize  ls_iter   Accur_train  Accur_val   Time(sec)
-000  -1.00000000e+00  -1.00000000e+00  1.60941989767865e+00  1.60941989720218e+00  5.47547521438103e-01  1.000000   0        20.18%      21.00%     0.2
-001  -1.00000000e+00  -1.00000000e+00  1.38071340186225e+00  1.38071338639355e+00  3.06051904945471e-01  1.000000   0        48.00%      46.50%     0.6
-002  -1.00000000e+00  -1.00000000e+00  1.23596247912580e+00  1.23596238748763e+00  1.29397210079374e-01  1.000000   0        50.78%      48.50%     0.9
-003  -1.00000000e+00  -1.00000000e+00  1.20341971297680e+00  1.20341956950837e+00  1.18843456996184e-01  1.000000   0        56.24%      54.50%     1.3
-004  -1.00000000e+00  -1.00000000e+00  1.18418727997486e+00  1.18418706821137e+00  7.57493851234181e-02  1.000000   0        57.00%      54.00%     1.6
-005  -1.00000000e+00  -1.00000000e+00  1.17681154387789e+00  1.17681130238162e+00  4.92897668623518e-02  1.000000   0        58.78%      56.00%     2.0
-006  -1.00000000e+00  -1.00000000e+00  1.16938781109130e+00  1.16938754241913e+00  5.63354947836322e-02  1.000000   0        58.22%      54.00%     2.4
-007  -1.00000000e+00  -1.00000000e+00  1.15847109277098e+00  1.15847078889672e+00  8.74543241159725e-02  1.000000   0        54.00%      50.00%     2.8
-008  -1.00000000e+00  -1.00000000e+00  1.15763751519476e+00  1.15763709061927e+00  2.71593130325434e-01  1.000000   0        56.40%      53.50%     3.1
-009  -1.00000000e+00  -1.00000000e+00  1.14274573639825e+00  1.14274537295253e+00  1.29627926674328e-01  1.000000   0        52.68%      46.00%     3.5
-010  -1.00000000e+00  -1.00000000e+00  1.14122719808982e+00  1.14122681274333e+00  1.55775797203806e-01  1.000000   0        53.10%      48.00%     3.8
-011  -1.00000000e+00  -1.00000000e+00  1.13494201952478e+00  1.13494161706763e+00  6.57957962169820e-02  1.000000   0        55.94%      51.00%     4.2
-012  -1.00000000e+00  -1.00000000e+00  1.13145597537932e+00  1.13145550124762e+00  1.26247044561172e-01  1.000000   0        51.98%      49.50%     4.5
-013  -1.00000000e+00  -1.00000000e+00  1.12368754549752e+00  1.12368705376703e+00  1.10635760888722e-01  1.000000   0        55.82%      54.00%     4.8
-014  -1.00000000e+00  -1.00000000e+00  1.11155460188595e+00  1.11155398029660e+00  1.39385721056834e-01  1.000000   0        57.88%      56.00%     5.2
-015  -1.00000000e+00  -1.00000000e+00  1.10515556053969e+00  1.10515489947227e+00  1.13781734395290e-01  1.000000   0        60.94%      57.00%     5.6
-016  -1.00000000e+00  -1.00000000e+00  1.09562590165864e+00  1.09562515860787e+00  2.52376513334136e-01  1.000000   0        60.68%      57.50%     6.0
-017  -1.00000000e+00  -1.00000000e+00  1.08197155968557e+00  1.08197063660131e+00  5.55013786931869e-01  1.000000   0        59.42%      56.00%     6.3
-018  -1.00000000e+00  -1.00000000e+00  1.07924452229731e+00  1.07924359893069e+00  6.18277564731541e-01  1.000000   0        58.32%      57.00%     6.6
-019  -1.00000000e+00  -1.00000000e+00  1.07415528298030e+00  1.07415447170882e+00  2.75740177515749e-01  0.500000   1        59.28%      55.50%     7.0
+000  -1.00000000e+00  -1.00000000e+00  1.60943791243446e+00  1.60943791243409e+00  9.03286343783022e-01  1.000000   0        100.00%      20.00%     0.0
+001  -1.00000000e+00  -1.00000000e+00  8.93027383470424e-01  8.93027342673751e-01  6.81237374674309e-01  1.000000   0        100.00%      20.00%     0.0
+002  -1.00000000e+00  -1.00000000e+00  1.05036886903367e-03  1.04931297594736e-03  6.51136165937214e-03  1.000000   0        100.00%      20.00%     0.1
+003  -1.00000000e+00  -1.00000000e+00  8.38924054937610e-04  8.37860415154911e-04  5.27459181685679e-03  1.000000   0        100.00%      20.00%     0.1
+004  -1.00000000e+00  -1.00000000e+00  3.09469131150244e-04  3.08370156887922e-04  2.07090184605932e-03  1.000000   0        100.00%      20.00%     0.1
+005  -1.00000000e+00  -1.00000000e+00  1.56742722262840e-04  1.55619105190084e-04  1.09169627784516e-03  1.000000   0        100.00%      20.00%     0.1
+006  -1.00000000e+00  -1.00000000e+00  7.10282645639592e-05  6.98756630906118e-05  5.15099197829774e-04  1.000000   0        100.00%      20.00%     0.1
+007  -1.00000000e+00  -1.00000000e+00  3.41186662095647e-05  3.29388953025643e-05  2.53915778855823e-04  1.000000   0        100.00%      20.00%     0.2
+008  -1.00000000e+00  -1.00000000e+00  1.65032674860585e-05  1.52959730466568e-05  1.23156345893023e-04  1.000000   0        100.00%      20.00%     0.2
+009  -1.00000000e+00  -1.00000000e+00  8.40503071448269e-06  7.17084153786459e-06  6.01241929460480e-05  1.000000   0        100.00%      20.00%     0.2
diff --git a/testing/peaks.npt2.ml10.optim.dat b/testing/peaks.npt2.ml10.optim.dat
index 96b0352..5c95af9 100644
--- a/testing/peaks.npt2.ml10.optim.dat
+++ b/testing/peaks.npt2.ml10.optim.dat
@@ -1,15 +1,15 @@
-# Problem setup: datafolder           data 
-#                training examples    Ytrain_orig.dat 
-#                training labels      Ctrain_orig.dat 
-#                validation examples  Yval_orig.dat 
-#                validation labels    Cval_orig.dat 
+# Problem setup: datafolder           ../examples/peaks/ 
+#                training examples    features_training.dat 
+#                training labels      labels_training.dat 
+#                validation examples  features_validation.dat 
+#                validation labels    labels_validation.dat 
 #                ntraining            5000 
 #                nvalidation          200 
 #                nfeatures            2 
 #                nclasses             5 
 #                nchannels            8 
-#                nlayers              30 
-#                T                    5.000000 
+#                nlayers              32 
+#                T                    1.000000 
 #                network type         dense 
 #                Activation           SmoothReLU 
 #                openlayer type       1 
@@ -17,9 +17,9 @@
 #                min coarse           10 
 #                coasening            2 
 #                coasening (level 0)  2 
-#                max. braid iter      2 
-#                abs. tol             1e-15 
-#                abs. toladj          1e-15 
+#                max. braid iter      15 
+#                abs. tol             1e-10 
+#                abs. toladj          1e-10 
 #                print level          1 
 #                access level         0 
 #                skip?                0 
@@ -27,41 +27,31 @@
 #                nrelax (level 0)     0 
 #                nrelax               1 
 # Optimization:  optimization type    deterministic 
-#                nbatch               5000 
+#                nbatch               200 
 #                gamma_tik            1e-07 
-#                gamma_ddt            1e-07 
+#                gamma_ddt            1e-05 
 #                gamma_class          1e-07 
 #                stepsize type        backtracking line-search 
 #                stepsize             1.000000 
-#                max. optim iter      20 
+#                max. optim iter      100 
 #                gtol                 1e-04 
 #                max. ls iter         20 
 #                ls factor            0.500000 
-#                weights_init         0.001000 
-#                weights_open_init    0.000000 
+#                weights_init         0.000000 
+#                weights_open_init    0.001000 
 #                weights_class_init   0.001000 
 #                hessianapprox_type   L-BFGS 
 #                lbfgs_stages         20 
 #                validationlevel      1 
 
 #    || r ||          || r_adj ||      Objective             Loss                  || grad ||            Stepsize  ls_iter   Accur_train  Accur_val   Time(sec)
-000  1.05426534e-05  7.01718976e-13  1.60941989766493e+00  1.60941989718846e+00  5.47547519325756e-01  1.000000   0        20.18%      21.00%     0.8
-001  1.57996193e-17  2.03041298e-10  1.38071340216910e+00  1.38071338670040e+00  3.06050567716981e-01  1.000000   0        48.00%      46.50%     1.9
-002  7.29308604e-13  4.22213509e-10  1.23596253137683e+00  1.23596243974067e+00  1.29391394257781e-01  1.000000   0        50.78%      48.50%     3.2
-003  9.81124621e-13  2.07669782e-10  1.20341417342464e+00  1.20341402994788e+00  1.18831935496275e-01  1.000000   0        56.24%      54.50%     4.3
-004  6.26066780e-12  4.49854737e-10  1.18418442453580e+00  1.18418421274440e+00  7.58416506674439e-02  1.000000   0        56.98%      54.00%     5.6
-005  6.78105922e-12  2.17426183e-10  1.17681138506102e+00  1.17681114353766e+00  4.93204360796170e-02  1.000000   0        58.80%      56.00%     6.7
-006  2.30351729e-11  3.40981732e-10  1.16939416065256e+00  1.16939389207214e+00  5.63229160588530e-02  1.000000   0        58.22%      54.00%     8.0
-007  8.88623266e-11  7.08703623e-10  1.15848320762452e+00  1.15848290381382e+00  8.77505478477175e-02  1.000000   0        54.00%      50.00%     9.1
-008  1.53502187e-09  3.43627498e-09  1.15752458099280e+00  1.15752415760565e+00  2.70258261860993e-01  1.000000   0        56.42%      53.50%     10.3
-009  3.35426626e-10  2.10759273e-09  1.14272215805333e+00  1.14272179436899e+00  1.28969655129404e-01  1.000000   0        52.74%      46.00%     11.5
-010  8.65490677e-11  1.05329993e-09  1.14123385478706e+00  1.14123346978230e+00  1.55750623407489e-01  1.000000   0        53.14%      48.00%     12.7
-011  2.41000293e-10  5.31010337e-10  1.13494612729255e+00  1.13494572513913e+00  6.54530806420046e-02  1.000000   0        55.90%      51.00%     13.8
-012  1.84660098e-09  1.94338217e-09  1.13142768073210e+00  1.13142720629247e+00  1.25443138882354e-01  1.000000   0        51.96%      49.00%     15.0
-013  8.54540924e-10  1.12055818e-09  1.12358278356824e+00  1.12358229180074e+00  1.11310852815509e-01  1.000000   0        55.88%      54.50%     16.1
-014  1.64024146e-08  7.14719084e-09  1.11134576834466e+00  1.11134514381860e+00  1.40185628559072e-01  1.000000   0        57.94%      56.50%     17.3
-015  9.73572966e-09  2.81897201e-09  1.10494151034690e+00  1.10494084600873e+00  1.15680085929693e-01  1.000000   0        60.98%      57.00%     18.4
-016  3.27161110e-08  5.09705006e-09  1.09542616968119e+00  1.09542542799467e+00  2.51017228904277e-01  1.000000   0        60.68%      57.50%     19.6
-017  3.55387932e-07  1.33789499e-08  1.08242275846187e+00  1.08242181907883e+00  5.74669282315588e-01  1.000000   0        59.18%      57.50%     20.6
-018  1.35900404e-08  1.27840638e-08  1.07817898750937e+00  1.07817806809748e+00  3.68508030728956e-01  1.000000   0        58.34%      56.00%     21.8
-019  9.54593730e-08  1.10671022e-08  1.07641618944750e+00  1.07641542511832e+00  5.40433679341661e-01  1.000000   0        58.12%      53.50%     22.9
+000  1.48762021e-18  0.00000000e+00  1.60943791243446e+00  1.60943791243409e+00  9.03286343783022e-01  1.000000   0        100.00%      20.00%     0.1
+001  4.87268972e-19  0.00000000e+00  8.93027383470424e-01  8.93027342673751e-01  6.81237374674309e-01  1.000000   0        100.00%      20.00%     0.1
+002  1.20416982e-16  9.58293526e-11  1.05036886903367e-03  1.04931297594736e-03  6.51137546917073e-03  1.000000   0        100.00%      20.00%     0.2
+003  8.86650549e-17  1.34391587e-13  8.38923640767822e-04  8.37860001146083e-04  5.27458994776969e-03  1.000000   0        100.00%      20.00%     0.3
+004  1.21304250e-16  5.43079170e-13  3.09474514735135e-04  3.08375531427215e-04  2.07093191405255e-03  1.000000   0        100.00%      20.00%     0.4
+005  1.43171427e-16  2.41778398e-13  1.56745527599925e-04  1.55621886721240e-04  1.09171100040307e-03  1.000000   0        100.00%      20.00%     0.5
+006  1.78721770e-16  2.03233182e-13  7.10300937789059e-05  6.98774426357128e-05  5.15108834175262e-04  1.000000   0        100.00%      20.00%     0.6
+007  2.10562219e-16  1.27882822e-13  3.41197750696654e-05  3.29399208824851e-05  2.53921341908184e-04  1.000000   0        100.00%      20.00%     0.6
+008  2.21803000e-16  8.82507913e-14  1.65039906218550e-05  1.52965683800520e-05  1.23159632420279e-04  1.000000   0        100.00%      20.00%     0.7
+009  2.59882318e-16  5.80874925e-14  8.40555413834112e-06  7.17118187402434e-06  6.01261027573693e-05  1.000000   0        100.00%      20.00%     0.8
diff --git a/testing/peaks.npt5.ml1.optim.dat b/testing/peaks.npt5.ml1.optim.dat
index 12a784f..b064fb6 100644
--- a/testing/peaks.npt5.ml1.optim.dat
+++ b/testing/peaks.npt5.ml1.optim.dat
@@ -1,15 +1,15 @@
-# Problem setup: datafolder           data 
-#                training examples    Ytrain_orig.dat 
-#                training labels      Ctrain_orig.dat 
-#                validation examples  Yval_orig.dat 
-#                validation labels    Cval_orig.dat 
+# Problem setup: datafolder           ../examples/peaks/ 
+#                training examples    features_training.dat 
+#                training labels      labels_training.dat 
+#                validation examples  features_validation.dat 
+#                validation labels    labels_validation.dat 
 #                ntraining            5000 
 #                nvalidation          200 
 #                nfeatures            2 
 #                nclasses             5 
 #                nchannels            8 
-#                nlayers              30 
-#                T                    5.000000 
+#                nlayers              32 
+#                T                    1.000000 
 #                network type         dense 
 #                Activation           SmoothReLU 
 #                openlayer type       1 
@@ -17,9 +17,9 @@
 #                min coarse           10 
 #                coasening            2 
 #                coasening (level 0)  2 
-#                max. braid iter      2 
-#                abs. tol             1e-15 
-#                abs. toladj          1e-15 
+#                max. braid iter      15 
+#                abs. tol             1e-10 
+#                abs. toladj          1e-10 
 #                print level          1 
 #                access level         0 
 #                skip?                0 
@@ -27,41 +27,31 @@
 #                nrelax (level 0)     0 
 #                nrelax               1 
 # Optimization:  optimization type    deterministic 
-#                nbatch               5000 
+#                nbatch               200 
 #                gamma_tik            1e-07 
-#                gamma_ddt            1e-07 
+#                gamma_ddt            1e-05 
 #                gamma_class          1e-07 
 #                stepsize type        backtracking line-search 
 #                stepsize             1.000000 
-#                max. optim iter      20 
+#                max. optim iter      100 
 #                gtol                 1e-04 
 #                max. ls iter         20 
 #                ls factor            0.500000 
-#                weights_init         0.001000 
-#                weights_open_init    0.000000 
+#                weights_init         0.000000 
+#                weights_open_init    0.001000 
 #                weights_class_init   0.001000 
 #                hessianapprox_type   L-BFGS 
 #                lbfgs_stages         20 
 #                validationlevel      1 
 
 #    || r ||          || r_adj ||      Objective             Loss                  || grad ||            Stepsize  ls_iter   Accur_train  Accur_val   Time(sec)
-000  -1.00000000e+00  -1.00000000e+00  1.60941989767865e+00  1.60941989720218e+00  5.47547521438103e-01  1.000000   0        20.18%      21.00%     0.5
-001  -1.00000000e+00  -1.00000000e+00  1.38071340186225e+00  1.38071338639355e+00  3.06051904945471e-01  1.000000   0        48.00%      46.50%     1.2
-002  -1.00000000e+00  -1.00000000e+00  1.23596247912580e+00  1.23596238748763e+00  1.29397210079374e-01  1.000000   0        50.78%      48.50%     1.8
-003  -1.00000000e+00  -1.00000000e+00  1.20341971297680e+00  1.20341956950837e+00  1.18843456996184e-01  1.000000   0        56.24%      54.50%     2.6
-004  -1.00000000e+00  -1.00000000e+00  1.18418727997486e+00  1.18418706821137e+00  7.57493851234178e-02  1.000000   0        57.00%      54.00%     3.2
-005  -1.00000000e+00  -1.00000000e+00  1.17681154387789e+00  1.17681130238162e+00  4.92897668623518e-02  1.000000   0        58.78%      56.00%     3.8
-006  -1.00000000e+00  -1.00000000e+00  1.16938781109130e+00  1.16938754241913e+00  5.63354947836319e-02  1.000000   0        58.22%      54.00%     4.5
-007  -1.00000000e+00  -1.00000000e+00  1.15847109277098e+00  1.15847078889672e+00  8.74543241159734e-02  1.000000   0        54.00%      50.00%     5.1
-008  -1.00000000e+00  -1.00000000e+00  1.15763751519476e+00  1.15763709061927e+00  2.71593130325441e-01  1.000000   0        56.40%      53.50%     5.9
-009  -1.00000000e+00  -1.00000000e+00  1.14274573639825e+00  1.14274537295253e+00  1.29627926674328e-01  1.000000   0        52.68%      46.00%     6.5
-010  -1.00000000e+00  -1.00000000e+00  1.14122719808983e+00  1.14122681274333e+00  1.55775797203811e-01  1.000000   0        53.10%      48.00%     7.3
-011  -1.00000000e+00  -1.00000000e+00  1.13494201952478e+00  1.13494161706763e+00  6.57957962169811e-02  1.000000   0        55.94%      51.00%     7.9
-012  -1.00000000e+00  -1.00000000e+00  1.13145597537932e+00  1.13145550124762e+00  1.26247044561170e-01  1.000000   0        51.98%      49.50%     8.6
-013  -1.00000000e+00  -1.00000000e+00  1.12368754549753e+00  1.12368705376703e+00  1.10635760888728e-01  1.000000   0        55.82%      54.00%     9.2
-014  -1.00000000e+00  -1.00000000e+00  1.11155460188595e+00  1.11155398029660e+00  1.39385721056834e-01  1.000000   0        57.88%      56.00%     9.8
-015  -1.00000000e+00  -1.00000000e+00  1.10515556053969e+00  1.10515489947227e+00  1.13781734395281e-01  1.000000   0        60.94%      57.00%     10.5
-016  -1.00000000e+00  -1.00000000e+00  1.09562590165864e+00  1.09562515860787e+00  2.52376513334124e-01  1.000000   0        60.68%      57.50%     11.3
-017  -1.00000000e+00  -1.00000000e+00  1.08197155968558e+00  1.08197063660131e+00  5.55013786931906e-01  1.000000   0        59.42%      56.00%     11.9
-018  -1.00000000e+00  -1.00000000e+00  1.07924452229732e+00  1.07924359893070e+00  6.18277564731091e-01  1.000000   0        58.32%      57.00%     12.6
-019  -1.00000000e+00  -1.00000000e+00  1.07415528298029e+00  1.07415447170881e+00  2.75740177515766e-01  0.500000   1        59.28%      55.50%     13.6
+000  -1.00000000e+00  -1.00000000e+00  1.60943791243446e+00  1.60943791243409e+00  9.03286343783022e-01  1.000000   0        100.00%      20.00%     0.0
+001  -1.00000000e+00  -1.00000000e+00  8.93027383470424e-01  8.93027342673751e-01  6.81237374674309e-01  1.000000   0        100.00%      20.00%     0.1
+002  -1.00000000e+00  -1.00000000e+00  1.05036886903367e-03  1.04931297594736e-03  6.51136165937214e-03  1.000000   0        100.00%      20.00%     0.1
+003  -1.00000000e+00  -1.00000000e+00  8.38924054937610e-04  8.37860415154911e-04  5.27459181685679e-03  1.000000   0        100.00%      20.00%     0.1
+004  -1.00000000e+00  -1.00000000e+00  3.09469131150244e-04  3.08370156887922e-04  2.07090184605932e-03  1.000000   0        100.00%      20.00%     0.2
+005  -1.00000000e+00  -1.00000000e+00  1.56742722262840e-04  1.55619105190084e-04  1.09169627784516e-03  1.000000   0        100.00%      20.00%     0.2
+006  -1.00000000e+00  -1.00000000e+00  7.10282645639592e-05  6.98756630906118e-05  5.15099197829773e-04  1.000000   0        100.00%      20.00%     0.2
+007  -1.00000000e+00  -1.00000000e+00  3.41186662095647e-05  3.29388953025643e-05  2.53915778855823e-04  1.000000   0        100.00%      20.00%     0.3
+008  -1.00000000e+00  -1.00000000e+00  1.65032674860585e-05  1.52959730466568e-05  1.23156345893023e-04  1.000000   0        100.00%      20.00%     0.3
+009  -1.00000000e+00  -1.00000000e+00  8.40503071448269e-06  7.17084153786459e-06  6.01241929460479e-05  1.000000   0        100.00%      20.00%     0.3
diff --git a/testing/peaks.npt5.ml10.optim.dat b/testing/peaks.npt5.ml10.optim.dat
index cf199f5..1d75665 100644
--- a/testing/peaks.npt5.ml10.optim.dat
+++ b/testing/peaks.npt5.ml10.optim.dat
@@ -1,15 +1,15 @@
-# Problem setup: datafolder           data 
-#                training examples    Ytrain_orig.dat 
-#                training labels      Ctrain_orig.dat 
-#                validation examples  Yval_orig.dat 
-#                validation labels    Cval_orig.dat 
+# Problem setup: datafolder           ../examples/peaks/ 
+#                training examples    features_training.dat 
+#                training labels      labels_training.dat 
+#                validation examples  features_validation.dat 
+#                validation labels    labels_validation.dat 
 #                ntraining            5000 
 #                nvalidation          200 
 #                nfeatures            2 
 #                nclasses             5 
 #                nchannels            8 
-#                nlayers              30 
-#                T                    5.000000 
+#                nlayers              32 
+#                T                    1.000000 
 #                network type         dense 
 #                Activation           SmoothReLU 
 #                openlayer type       1 
@@ -17,9 +17,9 @@
 #                min coarse           10 
 #                coasening            2 
 #                coasening (level 0)  2 
-#                max. braid iter      2 
-#                abs. tol             1e-15 
-#                abs. toladj          1e-15 
+#                max. braid iter      15 
+#                abs. tol             1e-10 
+#                abs. toladj          1e-10 
 #                print level          1 
 #                access level         0 
 #                skip?                0 
@@ -27,41 +27,31 @@
 #                nrelax (level 0)     0 
 #                nrelax               1 
 # Optimization:  optimization type    deterministic 
-#                nbatch               5000 
+#                nbatch               200 
 #                gamma_tik            1e-07 
-#                gamma_ddt            1e-07 
+#                gamma_ddt            1e-05 
 #                gamma_class          1e-07 
 #                stepsize type        backtracking line-search 
 #                stepsize             1.000000 
-#                max. optim iter      20 
+#                max. optim iter      100 
 #                gtol                 1e-04 
 #                max. ls iter         20 
 #                ls factor            0.500000 
-#                weights_init         0.001000 
-#                weights_open_init    0.000000 
+#                weights_init         0.000000 
+#                weights_open_init    0.001000 
 #                weights_class_init   0.001000 
 #                hessianapprox_type   L-BFGS 
 #                lbfgs_stages         20 
 #                validationlevel      1 
 
 #    || r ||          || r_adj ||      Objective             Loss                  || grad ||            Stepsize  ls_iter   Accur_train  Accur_val   Time(sec)
-000  1.05426534e-05  7.01718976e-13  1.60941989766493e+00  1.60941989718846e+00  5.47547519325756e-01  1.000000   0        20.18%      21.00%     1.1
-001  1.57996193e-17  2.03041298e-10  1.38071340216910e+00  1.38071338670040e+00  3.06050567716981e-01  1.000000   0        48.00%      46.50%     2.5
-002  7.29308609e-13  4.22213509e-10  1.23596253137683e+00  1.23596243974067e+00  1.29391394257781e-01  1.000000   0        50.78%      48.50%     4.1
-003  9.81124667e-13  2.07669782e-10  1.20341417342464e+00  1.20341402994788e+00  1.18831935496275e-01  1.000000   0        56.24%      54.50%     5.7
-004  6.26066791e-12  4.49854737e-10  1.18418442453580e+00  1.18418421274440e+00  7.58416506674442e-02  1.000000   0        56.98%      54.00%     7.1
-005  6.78105921e-12  2.17426183e-10  1.17681138506102e+00  1.17681114353766e+00  4.93204360796169e-02  1.000000   0        58.80%      56.00%     8.8
-006  2.30351730e-11  3.40981732e-10  1.16939416065256e+00  1.16939389207214e+00  5.63229160588529e-02  1.000000   0        58.22%      54.00%     10.3
-007  8.88623266e-11  7.08703623e-10  1.15848320762452e+00  1.15848290381382e+00  8.77505478477171e-02  1.000000   0        54.00%      50.00%     12.0
-008  1.53502187e-09  3.43627498e-09  1.15752458099280e+00  1.15752415760565e+00  2.70258261860993e-01  1.000000   0        56.42%      53.50%     13.6
-009  3.35426626e-10  2.10759273e-09  1.14272215805333e+00  1.14272179436899e+00  1.28969655129406e-01  1.000000   0        52.74%      46.00%     15.1
-010  8.65490678e-11  1.05329993e-09  1.14123385478706e+00  1.14123346978230e+00  1.55750623407485e-01  1.000000   0        53.14%      48.00%     16.6
-011  2.41000293e-10  5.31010337e-10  1.13494612729255e+00  1.13494572513913e+00  6.54530806420054e-02  1.000000   0        55.90%      51.00%     18.2
-012  1.84660098e-09  1.94338217e-09  1.13142768073210e+00  1.13142720629247e+00  1.25443138882352e-01  1.000000   0        51.96%      49.00%     19.5
-013  8.54540924e-10  1.12055818e-09  1.12358278356824e+00  1.12358229180074e+00  1.11310852815505e-01  1.000000   0        55.88%      54.50%     21.0
-014  1.64024146e-08  7.14719084e-09  1.11134576834466e+00  1.11134514381860e+00  1.40185628559094e-01  1.000000   0        57.94%      56.50%     22.4
-015  9.73572966e-09  2.81897201e-09  1.10494151034690e+00  1.10494084600873e+00  1.15680085929712e-01  1.000000   0        60.98%      57.00%     23.9
-016  3.27161110e-08  5.09705006e-09  1.09542616968119e+00  1.09542542799467e+00  2.51017228904264e-01  1.000000   0        60.68%      57.50%     25.2
-017  3.55387932e-07  1.33789499e-08  1.08242275846186e+00  1.08242181907883e+00  5.74669282315268e-01  1.000000   0        59.18%      57.50%     26.9
-018  1.35900404e-08  1.27840638e-08  1.07817898750936e+00  1.07817806809747e+00  3.68508030725625e-01  1.000000   0        58.34%      56.00%     28.2
-019  9.54593730e-08  1.10671022e-08  1.07641618944743e+00  1.07641542511825e+00  5.40433679344485e-01  1.000000   0        58.12%      53.50%     29.7
+000  1.48762021e-18  0.00000000e+00  1.60943791243446e+00  1.60943791243409e+00  9.03286343783022e-01  1.000000   0        100.00%      20.00%     0.1
+001  4.87268972e-19  0.00000000e+00  8.93027383470424e-01  8.93027342673751e-01  6.81237374674309e-01  1.000000   0        100.00%      20.00%     0.2
+002  1.20416982e-16  9.58293526e-11  1.05036886903367e-03  1.04931297594736e-03  6.51137546917073e-03  1.000000   0        100.00%      20.00%     0.3
+003  8.86302938e-17  1.34391587e-13  8.38923640767822e-04  8.37860001146083e-04  5.27458994776969e-03  1.000000   0        100.00%      20.00%     0.4
+004  1.21324571e-16  5.43079170e-13  3.09474514735135e-04  3.08375531427215e-04  2.07093191405255e-03  1.000000   0        100.00%      20.00%     0.5
+005  1.43184341e-16  2.41778398e-13  1.56745527599925e-04  1.55621886721240e-04  1.09171100040307e-03  1.000000   0        100.00%      20.00%     0.6
+006  1.78707976e-16  2.03233182e-13  7.10300937789059e-05  6.98774426357128e-05  5.15108834175262e-04  1.000000   0        100.00%      20.00%     0.8
+007  2.10521238e-16  1.27882822e-13  3.41197750696654e-05  3.29399208824851e-05  2.53921341908185e-04  1.000000   0        100.00%      20.00%     1.0
+008  2.22294265e-16  8.82507913e-14  1.65039906218550e-05  1.52965683800520e-05  1.23159632420279e-04  1.000000   0        100.00%      20.00%     1.1
+009  2.59851488e-16  5.80874925e-14  8.40555413834112e-06  7.17118187402434e-06  6.01261027573692e-05  1.000000   0        100.00%      20.00%     1.3
diff --git a/testing/testing.py b/testing/testing.py
index c944173..a314d1a 100644
--- a/testing/testing.py
+++ b/testing/testing.py
@@ -6,6 +6,7 @@
 import copy
 import subprocess
 import string
+sys.path.insert(0, '../pythonutil')
 from config import *
 from util import *
 
@@ -45,12 +46,13 @@
            os.mkdir(testfoldername)
        
         # create a link to training and validation data
-        datafolder = "../" + config.datafolder
-        make_link(datafolder, testfoldername + "/" + config.datafolder)
+        datafolder = config.datafolder
+        make_link(datafolder, testfoldername + "/data" )
         
         # Set the new configuration
         konfig = copy.deepcopy(config)
         konfig.braid_maxlevels = ml 
+        konfig.datafolder = "data"
     
         # create the config file
         testconfig = testname + ".cfg"