diff --git a/include/braid_wrapper.hpp b/include/braid_wrapper.hpp index 91bedb5..eead8ec 100644 --- a/include/braid_wrapper.hpp +++ b/include/braid_wrapper.hpp @@ -2,7 +2,6 @@ #include #include "braid.hpp" -#include "defs.hpp" // #include "_braid.h" #include "dataset.hpp" #include "layer.hpp" @@ -17,13 +16,13 @@ class myBraidVector { int nbatch; /* Number of examples */ int nchannels; /* Number of channels */ - MyReal * + double * *state; /* Network state at one layer, dimensions: nbatch * nchannels */ Layer *layer; /* Pointer to layer information */ /* Flag that determines if the layer and state have just been received and * thus should be free'd after usage (flag > 0) */ - MyReal sendflag; + double sendflag; public: /* Get dimensions */ @@ -31,18 +30,18 @@ class myBraidVector { int getnChannels(); /* Get Pointer to the state at example exampleID */ - MyReal *getState(int exampleID); + double *getState(int exampleID); /* Get pointer to the full state matrix */ - MyReal **getState(); + double **getState(); /* Get and set pointer to the layer */ Layer *getLayer(); void setLayer(Layer *layer); /* Get and set the sendflag */ - MyReal getSendflag(); - void setSendflag(MyReal value); + double getSendflag(); + void setSendflag(double value); /* Constructor */ myBraidVector(int nChannels, int nBatch); @@ -64,7 +63,7 @@ class myBraidApp : public BraidApp { BraidCore *core; /* Braid core for running PinT simulation */ /* Output */ - MyReal objective; /* Objective function */ + double objective; /* Objective function */ public: /* Constructor */ @@ -74,7 +73,7 @@ class myBraidApp : public BraidApp { ~myBraidApp(); /* Return objective function */ - MyReal getObjective(); + double getObjective(); /* Return the core */ BraidCore *getCore(); @@ -83,7 +82,7 @@ class myBraidApp : public BraidApp { void GetGridDistribution(int *ilower_ptr, int *iupper_ptr); /* Return the time step index of current time t */ - braid_Int GetTimeStepIndex(MyReal t); + braid_Int GetTimeStepIndex(double t); /* Apply one time step */ virtual braid_Int Step(braid_Vector u_, braid_Vector ustop_, @@ -131,7 +130,7 @@ class myBraidApp : public BraidApp { virtual braid_Int EvaluateObjective(); /* Run Braid drive, return norm */ - MyReal run(); + double run(); }; /** diff --git a/include/config.hpp b/include/config.hpp index 9eec554..e5186a7 100644 --- a/include/config.hpp +++ b/include/config.hpp @@ -1,5 +1,4 @@ -#include "defs.hpp" - +#include "mpi.h" #include #pragma once @@ -52,13 +51,13 @@ class Config { /* Neural Network */ int nchannels; int nlayers; - MyReal T; + double T; int activation; int network_type; int openlayer_type; - MyReal weights_open_init; - MyReal weights_init; - MyReal weights_class_init; + double weights_open_init; + double weights_init; + double weights_class_init; /* XBraid */ int braid_cfactor0; @@ -66,8 +65,8 @@ class Config { int braid_maxlevels; int braid_mincoarse; int braid_maxiter; - MyReal braid_abstol; - MyReal braid_abstoladj; + double braid_abstol; + double braid_abstoladj; int braid_printlevel; int braid_accesslevel; int braid_setskip; @@ -78,15 +77,15 @@ class Config { /* Optimization */ int batch_type; int nbatch; - MyReal gamma_tik; - MyReal gamma_ddt; - MyReal gamma_class; + double gamma_tik; + double gamma_ddt; + double gamma_class; int stepsize_type; - MyReal stepsize_init; + double stepsize_init; int maxoptimiter; - MyReal gtol; + double gtol; int ls_maxiter; - MyReal ls_factor; + double ls_factor; int hessianapprox_type; int lbfgs_stages; int validationlevel; @@ -105,5 +104,5 @@ class Config { /* Returns a stepsize, depending on the selected stepsize type and current * optimization iteration */ - MyReal getStepsize(int optimiter); + double getStepsize(int optimiter); }; diff --git a/include/dataset.hpp b/include/dataset.hpp index e2008c2..5e63276 100644 --- a/include/dataset.hpp +++ b/include/dataset.hpp @@ -1,7 +1,6 @@ #include #include #include "config.hpp" -#include "defs.hpp" #include "util.hpp" #pragma once @@ -11,8 +10,8 @@ class DataSet { int nfeatures; /* Number of features per element */ int nlabels; /* Number of different labels (i.e. classes) per element */ - MyReal **examples; /* Array of Feature vectors (dim: nelements x nfeatures) */ - MyReal **labels; /* Array of Label vectors (dim: nelements x nlabels) */ + double **examples; /* Array of Feature vectors (dim: nelements x nfeatures) */ + double **labels; /* Array of Label vectors (dim: nelements x nlabels) */ int nbatch; /* Size of the batch */ int *batchIDs; /* Array of batch indicees */ @@ -39,11 +38,11 @@ class DataSet { /* Return the feature vector of a certain batchID. If not stored on this * processor, return NULL */ - MyReal *getExample(int id); + double *getExample(int id); /* Return the label vector of a certain batchID. If not stored on this * processor, return NULL */ - MyReal *getLabel(int id); + double *getLabel(int id); /* Read data from file */ void readData(const char *datafolder, const char *examplefile, diff --git a/include/defs.hpp b/include/defs.hpp deleted file mode 100644 index bdd6900..0000000 --- a/include/defs.hpp +++ /dev/null @@ -1,12 +0,0 @@ -#include -#pragma once - -/* - * Switch between single (float) and double precision by un-/commenting the - * corresponding lines. - */ - -// typedef float MyReal; -// #define MPI_MyReal MPI_FLOAT -typedef double MyReal; -#define MPI_MyReal MPI_DOUBLE \ No newline at end of file diff --git a/include/hessianApprox.hpp b/include/hessianApprox.hpp index 356bb61..9605bcc 100644 --- a/include/hessianApprox.hpp +++ b/include/hessianApprox.hpp @@ -1,5 +1,4 @@ #include -#include "defs.hpp" #include "linalg.hpp" #pragma once @@ -16,12 +15,12 @@ class HessianApprox { /** * Compute the BFGS descent direction */ - virtual void computeAscentDir(int k, MyReal *gradient, MyReal *ascentdir) = 0; + virtual void computeAscentDir(int k, double *gradient, double *ascentdir) = 0; /** * Update the BFGS memory (like s, y, rho, H0...) */ - virtual void updateMemory(int k, MyReal *design, MyReal *gradient) = 0; + virtual void updateMemory(int k, double *design, double *gradient) = 0; }; class L_BFGS : public HessianApprox { @@ -29,36 +28,36 @@ class L_BFGS : public HessianApprox { int M; /* Length of the l-bfgs memory (stages) */ /* L-BFGS memory */ - MyReal **s; /* storing M (x_{k+1} - x_k) vectors */ - MyReal **y; /* storing M (\nabla f_{k+1} - \nabla f_k) vectors */ - MyReal *rho; /* storing M 1/y^Ts values */ - MyReal H0; /* Initial Hessian scaling factor */ - MyReal *design_old; /* Design at previous iteration */ - MyReal *gradient_old; /* Gradient at previous iteration */ + double **s; /* storing M (x_{k+1} - x_k) vectors */ + double **y; /* storing M (\nabla f_{k+1} - \nabla f_k) vectors */ + double *rho; /* storing M 1/y^Ts values */ + double H0; /* Initial Hessian scaling factor */ + double *design_old; /* Design at previous iteration */ + double *gradient_old; /* Gradient at previous iteration */ public: L_BFGS(MPI_Comm comm, int dimN, /* Local design dimension */ int stage); ~L_BFGS(); - void computeAscentDir(int k, MyReal *gradient, MyReal *ascentdir); + void computeAscentDir(int k, double *gradient, double *ascentdir); - void updateMemory(int k, MyReal *design, MyReal *gradient); + void updateMemory(int k, double *design, double *gradient); }; class BFGS : public HessianApprox { private: - MyReal *A; - MyReal *B; - MyReal *Hy; + double *A; + double *B; + double *Hy; protected: - MyReal *s; - MyReal *y; - MyReal + double *s; + double *y; + double *Hessian; /* Storing the Hessian approximation (flattened: dimN*dimN) */ - MyReal *design_old; /* Design at previous iteration */ - MyReal *gradient_old; /* Gradient at previous iteration */ + double *design_old; /* Design at previous iteration */ + double *gradient_old; /* Gradient at previous iteration */ public: BFGS(MPI_Comm comm, int N); @@ -66,9 +65,9 @@ class BFGS : public HessianApprox { void setIdentity(); - void computeAscentDir(int k, MyReal *gradient, MyReal *ascentdir); + void computeAscentDir(int k, double *gradient, double *ascentdir); - void updateMemory(int k, MyReal *design, MyReal *gradient); + void updateMemory(int k, double *design, double *gradient); }; /** @@ -79,7 +78,7 @@ class Identity : public HessianApprox { Identity(MPI_Comm comm, int N); ~Identity(); - void computeAscentDir(int k, MyReal *currgrad, MyReal *ascentdir); + void computeAscentDir(int k, double *currgrad, double *ascentdir); - void updateMemory(int k, MyReal *design, MyReal *gradient); + void updateMemory(int k, double *design, double *gradient); }; diff --git a/include/layer.hpp b/include/layer.hpp index 036ead8..cb393f9 100644 --- a/include/layer.hpp +++ b/include/layer.hpp @@ -3,7 +3,6 @@ #include #include #include "config.hpp" -#include "defs.hpp" #include "linalg.hpp" #pragma once @@ -26,19 +25,19 @@ class Layer { int csize; int index; /* Number of the layer */ - MyReal dt; /* Step size for Layer update */ - MyReal *weights; /* Weight matrix, flattened as a vector */ - MyReal *weights_bar; /* Derivative of the Weight matrix*/ - MyReal *bias; /* Bias */ - MyReal *bias_bar; /* Derivative of bias */ - MyReal + double dt; /* Step size for Layer update */ + double *weights; /* Weight matrix, flattened as a vector */ + double *weights_bar; /* Derivative of the Weight matrix*/ + double *bias; /* Bias */ + double *bias_bar; /* Derivative of bias */ + double gamma_tik; /* Parameter for Tikhonov regularization of weights and bias */ - MyReal gamma_ddt; /* Parameter for DDT regularization of weights and bias */ + double gamma_ddt; /* Parameter for DDT regularization of weights and bias */ int activ; /* Activaation function (enum element) */ int type; /* Type of the layer (enum element) */ - MyReal *update; /* Auxilliary for computing fwd update */ - MyReal *update_bar; /* Auxilliary for computing bwd update */ + double *update; /* Auxilliary for computing fwd update */ + double *update_bar; /* Auxilliary for computing bwd update */ public: /* Available layer types */ @@ -55,30 +54,30 @@ class Layer { Layer(); Layer(int idx, int Type, int dimI, int dimO, int dimB, int dimW, // number of weights - MyReal deltaT, int Activ, MyReal gammatik, MyReal gammaddt); + double deltaT, int Activ, double gammatik, double gammaddt); virtual ~Layer(); /* Set time step size */ - void setDt(MyReal DT); + void setDt(double DT); /* Set design and gradient memory location */ - void setMemory(MyReal *design_memloc, MyReal *gradient_memloc); + void setMemory(double *design_memloc, double *gradient_memloc); /* Some Get..() functions */ - MyReal getDt(); - MyReal getGammaTik(); - MyReal getGammaDDT(); + double getDt(); + double getGammaTik(); + double getGammaDDT(); int getActivation(); int getType(); /* Get pointer to the weights bias*/ - MyReal *getWeights(); - MyReal *getBias(); + double *getWeights(); + double *getBias(); /* Get pointer to the weights bias bar */ - MyReal *getWeightsBar(); - MyReal *getBiasBar(); + double *getWeightsBar(); + double *getBiasBar(); /* Get the dimensions */ int getDimIn(); @@ -94,24 +93,24 @@ class Layer { int getIndex(); /* Prints to screen */ - void print_data(MyReal *data_Out); + void print_data(double *data_Out); /* Activation function and derivative */ - MyReal activation(MyReal x); - MyReal dactivation(MyReal x); + double activation(double x); + double dactivation(double x); /** * Pack weights and bias into a buffer */ - void packDesign(MyReal *buffer, int size); + void packDesign(double *buffer, int size); /** * Unpack weights and bias from a buffer */ - void unpackDesign(MyReal *buffer); + void unpackDesign(double *buffer); /* Scales the weights by a factor and resets the gradient to zero. */ - void scaleDesign(MyReal factor); + void scaleDesign(double factor); /** * Sets the bar variables to zero @@ -122,38 +121,38 @@ class Layer { * Evaluate Tikhonov Regularization * Returns 1/2 * \|weights||^2 + 1/2 * \|bias\|^2 */ - MyReal evalTikh(); + double evalTikh(); /** * Derivative of Tikhonov Regularization */ - void evalTikh_diff(MyReal regul_bar); + void evalTikh_diff(double regul_bar); /** * Regularization for the time-derivative of the layer weights */ - MyReal evalRegulDDT(Layer *layer_prev, MyReal deltat); + double evalRegulDDT(Layer *layer_prev, double deltat); /** * Derivative of ddt-regularization term */ - void evalRegulDDT_diff(Layer *layer_prev, Layer *layer_next, MyReal deltat); + void evalRegulDDT_diff(Layer *layer_prev, Layer *layer_next, double deltat); /** * In opening layers: set pointer to the current example */ - virtual void setExample(MyReal *example_ptr); + virtual void setExample(double *example_ptr); /** * In classification layers: set pointer to the current label */ - virtual void setLabel(MyReal *label_ptr); + virtual void setLabel(double *label_ptr); /** * Forward propagation of an example * In/Out: vector holding the current propagated example */ - virtual void applyFWD(MyReal *state) = 0; + virtual void applyFWD(double *state) = 0; /** * Backward propagation of an example @@ -163,21 +162,21 @@ class Layer { * (i.e. if weights_bar,bias_bar should be updated or not. In general, update * is only done on the finest layer-grid.) */ - virtual void applyBWD(MyReal *state, MyReal *state_bar, + virtual void applyBWD(double *state, double *state_bar, int compute_gradient) = 0; /* ReLu Activation and derivative */ - MyReal ReLu_act(MyReal x); - MyReal dReLu_act(MyReal x); + double ReLu_act(double x); + double dReLu_act(double x); /* Smooth ReLu activation: Uses a quadratic approximation around zero (range: * default 0.1) */ - MyReal SmoothReLu_act(MyReal x); - MyReal dSmoothReLu_act(MyReal x); + double SmoothReLu_act(double x); + double dSmoothReLu_act(double x); /* tanh Activation and derivative */ - MyReal tanh_act(MyReal x); - MyReal dtanh_act(MyReal x); + double tanh_act(double x); + double dtanh_act(double x); }; /** @@ -187,13 +186,13 @@ class Layer { */ class DenseLayer : public Layer { public: - DenseLayer(int idx, int dimI, int dimO, MyReal deltaT, int activation, - MyReal gammatik, MyReal gammaddt); + DenseLayer(int idx, int dimI, int dimO, double deltaT, int activation, + double gammatik, double gammaddt); ~DenseLayer(); - void applyFWD(MyReal *state); + void applyFWD(double *state); - void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient); + void applyBWD(double *state, double *state_bar, int compute_gradient); }; /** @@ -202,17 +201,17 @@ class DenseLayer : public Layer { */ class OpenDenseLayer : public DenseLayer { protected: - MyReal *example; /* Pointer to the current example data */ + double *example; /* Pointer to the current example data */ public: - OpenDenseLayer(int dimI, int dimO, int activation, MyReal gammatik); + OpenDenseLayer(int dimI, int dimO, int activation, double gammatik); ~OpenDenseLayer(); - void setExample(MyReal *example_ptr); + void setExample(double *example_ptr); - void applyFWD(MyReal *state); + void applyFWD(double *state); - void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient); + void applyBWD(double *state, double *state_bar, int compute_gradient); }; /* @@ -220,16 +219,16 @@ class OpenDenseLayer : public DenseLayer { */ class OpenExpandZero : public Layer { protected: - MyReal *example; /* Pointer to the current example data */ + double *example; /* Pointer to the current example data */ public: OpenExpandZero(int dimI, int dimO); ~OpenExpandZero(); - void setExample(MyReal *example_ptr); + void setExample(double *example_ptr); - void applyFWD(MyReal *state); + void applyFWD(double *state); - void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient); + void applyBWD(double *state, double *state_bar, int compute_gradient); }; /** @@ -237,48 +236,48 @@ class OpenExpandZero : public Layer { */ class ClassificationLayer : public Layer { protected: - MyReal *label; /* Pointer to the current label vector */ + double *label; /* Pointer to the current label vector */ - MyReal *probability; /* vector of pedicted class probabilities */ + double *probability; /* vector of pedicted class probabilities */ public: - ClassificationLayer(int idx, int dimI, int dimO, MyReal gammatik); + ClassificationLayer(int idx, int dimI, int dimO, double gammatik); ~ClassificationLayer(); - void setLabel(MyReal *label_ptr); + void setLabel(double *label_ptr); - void applyFWD(MyReal *state); + void applyFWD(double *state); - void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient); + void applyBWD(double *state, double *state_bar, int compute_gradient); /** * Evaluate the cross entropy function */ - MyReal crossEntropy(MyReal *finalstate); + double crossEntropy(double *finalstate); /** * Algorithmic derivative of evaluating cross entropy loss */ - void crossEntropy_diff(MyReal *data_Out, MyReal *data_Out_bar, - MyReal loss_bar); + void crossEntropy_diff(double *data_Out, double *data_Out_bar, + double loss_bar); /** * Compute the class probabilities * return 1 if predicted class was correct, 0 else. * out: *class_id_ptr holding the predicted class */ - int prediction(MyReal *data_out, int *class_id_ptr); + int prediction(double *data_out, int *class_id_ptr); /** * Translate the data: * Substracts the maximum value from all entries */ - void normalize(MyReal *data); + void normalize(double *data); /** * Algorithmic derivative of the normalize funciton */ - void normalize_diff(MyReal *data, MyReal *data_bar); + void normalize_diff(double *data, double *data_bar); }; /** @@ -296,21 +295,21 @@ class ConvLayer : public Layer { public: ConvLayer(int idx, int dimI, int dimO, int csize_in, int nconv_in, - MyReal deltaT, int Activ, MyReal Gammatik, MyReal Gammaddt); + double deltaT, int Activ, double Gammatik, double Gammaddt); ~ConvLayer(); - void applyFWD(MyReal *state); + void applyFWD(double *state); - void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient); + void applyBWD(double *state, double *state_bar, int compute_gradient); - inline MyReal apply_conv( - MyReal *state, // state vector to apply convolution to + inline double apply_conv( + double *state, // state vector to apply convolution to int output_conv, // output convolution int j, // row index int k); // column index - inline MyReal apply_conv_trans( - MyReal *state, // state vector to apply convolution to + inline double apply_conv_trans( + double *state, // state vector to apply convolution to int output_conv, // output convolution int j, // row index int k); // column index @@ -333,9 +332,9 @@ class ConvLayer : public Layer { * * On exit this method modifies weights_bar */ - inline MyReal updateWeightDerivative( - MyReal *state, // state vector - MyReal + inline double updateWeightDerivative( + double *state, // state vector + double *update_bar, // combines derivative and adjoint info (see comments) int output_conv, // output convolution int j, // row index @@ -348,17 +347,17 @@ class ConvLayer : public Layer { */ class OpenConvLayer : public Layer { protected: - MyReal *example; /* Pointer to the current example data */ + double *example; /* Pointer to the current example data */ public: OpenConvLayer(int dimI, int dimO); ~OpenConvLayer(); - void setExample(MyReal *example_ptr); + void setExample(double *example_ptr); - void applyFWD(MyReal *state); + void applyFWD(double *state); - void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient); + void applyBWD(double *state, double *state_bar, int compute_gradient); }; /** @@ -375,7 +374,7 @@ class OpenConvLayerMNIST : public OpenConvLayer { OpenConvLayerMNIST(int dimI, int dimO); ~OpenConvLayerMNIST(); - void applyFWD(MyReal *state); + void applyFWD(double *state); - void applyBWD(MyReal *state, MyReal *state_bar, int compute_gradient); + void applyBWD(double *state, double *state_bar, int compute_gradient); }; diff --git a/include/linalg.hpp b/include/linalg.hpp index 873d6bc..4fdb44a 100644 --- a/include/linalg.hpp +++ b/include/linalg.hpp @@ -1,7 +1,6 @@ #include #include #include -#include "defs.hpp" #pragma once /** @@ -10,7 +9,7 @@ * vectors x and y of dimemsion dimN * Out: returns xTy */ -MyReal vecdot(int dimN, MyReal *x, MyReal *y); +double vecdot(int dimN, double *x, double *y); /** * Parallel dot-product xTy, invokes an MPI_Allreduce call @@ -19,37 +18,37 @@ MyReal vecdot(int dimN, MyReal *x, MyReal *y); * MPI communicator * Out: returns global xTy on all procs */ -MyReal vecdot_par(int dimN, MyReal *x, MyReal *y, MPI_Comm comm); +double vecdot_par(int dimN, double *x, double *y, MPI_Comm comm); /** * Return the maximum value of a vector */ -MyReal vecmax(int dimN, MyReal *x); +double vecmax(int dimN, double *x); /** * Return the index of the maximum entry of the vector */ -int argvecmax(int dimN, MyReal *x); +int argvecmax(int dimN, double *x); /** * Computes square of the l2-norm of x */ -MyReal vecnormsq(int dimN, MyReal *x); +double vecnormsq(int dimN, double *x); /** * Parallel l2-norm computation, invokes an MPI_Allreduce x */ -MyReal vecnorm_par(int dimN, MyReal *x, MPI_Comm comm); +double vecnorm_par(int dimN, double *x, MPI_Comm comm); /** * Copy a vector u into u_copy */ -int vec_copy(int N, MyReal *u, MyReal *u_copy); +int vec_copy(int N, double *u, double *u_copy); /** * Compute matrix x* y^T */ -void vecvecT(int N, MyReal *x, MyReal *y, MyReal *XYT); +void vecvecT(int N, double *x, double *y, double *XYT); /** * Compute Matrix-vector product Hx @@ -58,4 +57,4 @@ void vecvecT(int N, MyReal *x, MyReal *y, MyReal *XYT); * vector x * Out: H*x will be stored in Hx */ -void matvec(int dimN, MyReal *H, MyReal *x, MyReal *Hx); +void matvec(int dimN, double *H, double *x, double *Hx); diff --git a/include/network.hpp b/include/network.hpp index 3b6996d..0ec5c11 100644 --- a/include/network.hpp +++ b/include/network.hpp @@ -15,9 +15,9 @@ class Network { int nlayers_local; /* Number of Layers in this network block */ int nchannels; /* Width of the network */ - MyReal dt; /* Time step size */ - MyReal loss; /* Value of the loss function */ - MyReal accuracy; /* Accuracy of the network prediction (percentage of + double dt; /* Time step size */ + double loss; /* Value of the loss function */ + double accuracy; /* Accuracy of the network prediction (percentage of successfully predicted classes) */ int startlayerID; /* ID of the first layer on that processor */ @@ -28,8 +28,8 @@ class Network { int ndesign_layermax; /* Max. number of design variables of all hidden layers */ - MyReal *design; /* Local vector of design variables*/ - MyReal *gradient; /* Local Gradient */ + double *design; /* Local vector of design variables*/ + double *gradient; /* Local Gradient */ Layer *openlayer; /* At first processor: openinglayer, else: NULL */ Layer **layers; /* Array of hidden layers (includes classification layer at @@ -54,22 +54,22 @@ class Network { int getnLayersGlobal(); /* Get initial time step size */ - MyReal getDT(); + double getDT(); /* Get local storage index of the a layer */ int getLocalID(int ilayer); /* Return value of the loss function */ - MyReal getLoss(); + double getLoss(); /* Return accuracy value */ - MyReal getAccuracy(); + double getAccuracy(); /* Return a pointer to the design vector */ - MyReal *getDesign(); + double *getDesign(); /* Return a pointer to the gradient vector */ - MyReal *getGradient(); + double *getGradient(); /* Get ID of first and last layer on this processor */ int getStartLayerID(); @@ -114,17 +114,17 @@ class Network { /** * Applies the classification and evaluates loss/accuracy */ - void evalClassification(DataSet *data, MyReal **state, int output); + void evalClassification(DataSet *data, double **state, int output); /** * On classification layer: derivative of evalClassification */ - void evalClassification_diff(DataSet *data, MyReal **primalstate, - MyReal **adjointstate, int compute_gradient); + void evalClassification_diff(DataSet *data, double **primalstate, + double **adjointstate, int compute_gradient); /** * Update the network design parameters: new_design = old_design + stepsize * * direction */ - void updateDesign(MyReal stepsize, MyReal *direction, MPI_Comm comm); + void updateDesign(double stepsize, double *direction, MPI_Comm comm); }; diff --git a/include/util.hpp b/include/util.hpp index 2d1793c..50e2793 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -1,32 +1,31 @@ #include #include #include -#include "defs.hpp" #pragma once /** * Read data from file */ -void read_matrix(char *filename, MyReal **var, int dimx, int dimy); +void read_matrix(char *filename, double **var, int dimx, int dimy); /** * Read data from file */ -void read_vector(char *filename, MyReal *var, int dimy); +void read_vector(char *filename, double *var, int dimy); /** * Write data to file */ -void write_vector(char *filename, MyReal *var, int dimN); +void write_vector(char *filename, double *var, int dimN); /** * Gather a local vector of size localsendcount into global recvbuffer at root */ -void MPI_GatherVector(MyReal *sendbuffer, int localsendcount, - MyReal *recvbuffer, int rootprocessID, MPI_Comm comm); +void MPI_GatherVector(double *sendbuffer, int localsendcount, + double *recvbuffer, int rootprocessID, MPI_Comm comm); /** * Scatter parts of a global vector on root to local vectors on each processor * (size localrecvsize) */ -void MPI_ScatterVector(MyReal *sendbuffer, MyReal *recvbuffer, +void MPI_ScatterVector(double *sendbuffer, double *recvbuffer, int localrecvcount, int rootprocessID, MPI_Comm comm); diff --git a/src/braid_wrapper.cpp b/src/braid_wrapper.cpp index 3843242..3158f60 100644 --- a/src/braid_wrapper.cpp +++ b/src/braid_wrapper.cpp @@ -10,9 +10,9 @@ myBraidVector::myBraidVector(int nChannels, int nBatch) { sendflag = -1.0; /* Allocate the state vector */ - state = new MyReal *[nbatch]; + state = new double *[nbatch]; for (int iex = 0; iex < nbatch; iex++) { - state[iex] = new MyReal[nchannels]; + state[iex] = new double[nchannels]; for (int ic = 0; ic < nchannels; ic++) { state[iex][ic] = 0.0; } @@ -32,15 +32,15 @@ int myBraidVector::getnChannels() { return nchannels; } int myBraidVector::getnBatch() { return nbatch; } -MyReal *myBraidVector::getState(int exampleID) { return state[exampleID]; } +double *myBraidVector::getState(int exampleID) { return state[exampleID]; } -MyReal **myBraidVector::getState() { return state; } +double **myBraidVector::getState() { return state; } Layer *myBraidVector::getLayer() { return layer; } void myBraidVector::setLayer(Layer *layerptr) { layer = layerptr; } -MyReal myBraidVector::getSendflag() { return sendflag; } -void myBraidVector::setSendflag(MyReal value) { sendflag = value; } +double myBraidVector::getSendflag() { return sendflag; } +void myBraidVector::setSendflag(double value) { sendflag = value; } /* ========================================================= */ /* ========================================================= */ @@ -78,7 +78,7 @@ myBraidApp::~myBraidApp() { if (core->GetWarmRestart()) delete core; } -MyReal myBraidApp::getObjective() { return objective; } +double myBraidApp::getObjective() { return objective; } BraidCore *myBraidApp::getCore() { return core; } @@ -86,7 +86,7 @@ void myBraidApp::GetGridDistribution(int *ilower_ptr, int *iupper_ptr) { core->GetDistribution(ilower_ptr, iupper_ptr); } -braid_Int myBraidApp::GetTimeStepIndex(MyReal t) { +braid_Int myBraidApp::GetTimeStepIndex(double t) { /* Round to the closes integer */ int ts = round(t / network->getDT()); return ts; @@ -95,8 +95,8 @@ braid_Int myBraidApp::GetTimeStepIndex(MyReal t) { braid_Int myBraidApp::Step(braid_Vector u_, braid_Vector ustop_, braid_Vector fstop_, BraidStepStatus &pstatus) { int ts_stop; - MyReal tstart, tstop; - MyReal deltaT; + double tstart, tstop; + double deltaT; myBraidVector *u = (myBraidVector *)u_; int nbatch = data->getnBatch(); @@ -231,7 +231,7 @@ braid_Int myBraidApp::SpatialNorm(braid_Vector u_, braid_Real *norm_ptr) { int nchannels = network->getnChannels(); int nbatch = data->getnBatch(); - MyReal dot = 0.0; + double dot = 0.0; for (int iex = 0; iex < nbatch; iex++) { dot += vecdot(nchannels, u->getState(iex), u->getState(iex)); } @@ -256,7 +256,7 @@ braid_Int myBraidApp::BufSize(braid_Int *size_ptr, BraidBufferStatus &bstatus) { int nlayerdesign = network->getnDesignLayermax(); /* Set the size */ - *size_ptr = (nuvector + nlayerinfo + nlayerdesign) * sizeof(MyReal); + *size_ptr = (nuvector + nlayerinfo + nlayerdesign) * sizeof(double); return 0; } @@ -266,7 +266,7 @@ braid_Int myBraidApp::BufPack(braid_Vector u_, void *buffer, int size; int nchannels = network->getnChannels(); int nbatch = data->getnBatch(); - MyReal *dbuffer = (MyReal *)buffer; + double *dbuffer = (double *)buffer; myBraidVector *u = (myBraidVector *)u_; /* Store network state */ @@ -277,7 +277,7 @@ braid_Int myBraidApp::BufPack(braid_Vector u_, void *buffer, idx++; } } - size = nchannels * nbatch * sizeof(MyReal); + size = nchannels * nbatch * sizeof(double); int nweights = u->getLayer()->getnWeights(); int nbias = u->getLayer()->getDimBias(); @@ -316,7 +316,7 @@ braid_Int myBraidApp::BufPack(braid_Vector u_, void *buffer, idx++; // dbuffer[idx] = u->layer->getBiasBar()[i]; idx++; } - size += (12 + (nweights + nbias)) * sizeof(MyReal); + size += (12 + (nweights + nbias)) * sizeof(double); bstatus.SetSize(size); @@ -326,7 +326,7 @@ braid_Int myBraidApp::BufPack(braid_Vector u_, void *buffer, braid_Int myBraidApp::BufUnpack(void *buffer, braid_Vector *u_ptr, BraidBufferStatus &bstatus) { Layer *tmplayer = 0; - MyReal *dbuffer = (MyReal *)buffer; + double *dbuffer = (double *)buffer; int nchannels = network->getnChannels(); int nbatch = data->getnBatch(); @@ -399,8 +399,8 @@ braid_Int myBraidApp::BufUnpack(void *buffer, braid_Vector *u_ptr, } /* Allocate design and gradient */ - MyReal *design = new MyReal[nDesign]; - MyReal *gradient = new MyReal[nDesign]; + double *design = new double[nDesign]; + double *gradient = new double[nDesign]; tmplayer->setMemory(design, gradient); /* Set the weights */ for (int i = 0; i < nweights; i++) { @@ -454,8 +454,8 @@ braid_Int myBraidApp::EvaluateObjective() { braid_BaseVector ubase; myBraidVector *u; Layer *layer; - MyReal myobjective; - MyReal regul; + double myobjective; + double regul; /* Get range of locally stored layers */ int startlayerID = network->getStartLayerID(); @@ -491,15 +491,15 @@ braid_Int myBraidApp::EvaluateObjective() { /* Collect objective function from all processors */ myobjective = network->getLoss() + regul; objective = 0.0; - MPI_Allreduce(&myobjective, &objective, 1, MPI_MyReal, MPI_SUM, + MPI_Allreduce(&myobjective, &objective, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); return 0; } -MyReal myBraidApp::run() { +double myBraidApp::run() { int nreq = -1; - MyReal norm; + double norm; SetInitialCondition(); core->Drive(); @@ -537,8 +537,8 @@ braid_Int myAdjointBraidApp::Step(braid_Vector u_, braid_Vector ustop_, BraidStepStatus &pstatus) { int ts_stop; int level, compute_gradient; - MyReal tstart, tstop; - MyReal deltaT; + double tstart, tstop; + double deltaT; int finegrid = 0; int primaltimestep; braid_BaseVector ubaseprimal; @@ -643,7 +643,7 @@ braid_Int myAdjointBraidApp::BufSize(braid_Int *size_ptr, int nchannels = network->getnChannels(); int nbatch = data->getnBatch(); - *size_ptr = nchannels * nbatch * sizeof(MyReal); + *size_ptr = nchannels * nbatch * sizeof(double); return 0; } @@ -652,7 +652,7 @@ braid_Int myAdjointBraidApp::BufPack(braid_Vector u_, void *buffer, int size; int nchannels = network->getnChannels(); int nbatch = data->getnBatch(); - MyReal *dbuffer = (MyReal *)buffer; + double *dbuffer = (double *)buffer; myBraidVector *u = (myBraidVector *)u_; /* Store network state */ @@ -663,7 +663,7 @@ braid_Int myAdjointBraidApp::BufPack(braid_Vector u_, void *buffer, idx++; } } - size = nchannels * nbatch * sizeof(MyReal); + size = nchannels * nbatch * sizeof(double); bstatus.SetSize(size); return 0; @@ -673,7 +673,7 @@ braid_Int myAdjointBraidApp::BufUnpack(void *buffer, braid_Vector *u_ptr, BraidBufferStatus &bstatus) { int nchannels = network->getnChannels(); int nbatch = data->getnBatch(); - MyReal *dbuffer = (MyReal *)buffer; + double *dbuffer = (double *)buffer; /* Allocate the vector */ myBraidVector *u = new myBraidVector(nchannels, nbatch); diff --git a/src/config.cpp b/src/config.cpp index 8d7bd4d..3d8b710 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -1,5 +1,4 @@ #include "config.hpp" - #include #include #include @@ -429,8 +428,8 @@ int Config::writeToFile(FILE *outfile) { return 0; } -MyReal Config::getStepsize(int optimiter) { - MyReal stepsize = 0.0; +double Config::getStepsize(int optimiter) { + double stepsize = 0.0; switch (stepsize_type) { case FIXED: @@ -440,7 +439,7 @@ MyReal Config::getStepsize(int optimiter) { stepsize = stepsize_init; break; case ONEOVERK: - stepsize = 1.0 / (MyReal)(optimiter + + stepsize = 1.0 / (double)(optimiter + 1); // add one because optimiter starts with 0 } diff --git a/src/dataset.cpp b/src/dataset.cpp index e9afd77..d2752bd 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -31,16 +31,16 @@ void DataSet::initialize(int nElements, int nFeatures, int nLabels, int nBatch, /* Allocate feature vectors on first processor */ if (MPIrank == 0) { - examples = new MyReal *[nelements]; + examples = new double *[nelements]; for (int ielem = 0; ielem < nelements; ielem++) { - examples[ielem] = new MyReal[nfeatures]; + examples[ielem] = new double[nfeatures]; } } /* Allocate label vectors on last processor */ if (MPIrank == MPIsize - 1) { - labels = new MyReal *[nelements]; + labels = new double *[nelements]; for (int ielem = 0; ielem < nelements; ielem++) { - labels[ielem] = new MyReal[nlabels]; + labels[ielem] = new double[nlabels]; } } @@ -85,13 +85,13 @@ DataSet::~DataSet() { int DataSet::getnBatch() { return nbatch; } -MyReal *DataSet::getExample(int id) { +double *DataSet::getExample(int id) { if (examples == NULL) return NULL; return examples[batchIDs[id]]; } -MyReal *DataSet::getLabel(int id) { +double *DataSet::getLabel(int id) { if (labels == NULL) return NULL; return labels[batchIDs[id]]; diff --git a/src/hessianApprox.cpp b/src/hessianApprox.cpp index 5f2a83d..f80abcf 100644 --- a/src/hessianApprox.cpp +++ b/src/hessianApprox.cpp @@ -12,11 +12,11 @@ L_BFGS::L_BFGS(MPI_Comm comm, int N, int stages) : HessianApprox(comm) { H0 = 1.0; /* Allocate memory for sk and yk for all stages */ - s = new MyReal *[M]; - y = new MyReal *[M]; + s = new double *[M]; + y = new double *[M]; for (int imem = 0; imem < M; imem++) { - s[imem] = new MyReal[dimN]; - y[imem] = new MyReal[dimN]; + s[imem] = new double[dimN]; + y[imem] = new double[dimN]; for (int i = 0; i < dimN; i++) { s[imem][i] = 0.0; y[imem][i] = 0.0; @@ -24,14 +24,14 @@ L_BFGS::L_BFGS(MPI_Comm comm, int N, int stages) : HessianApprox(comm) { } /* Allocate memory for rho's values */ - rho = new MyReal[M]; + rho = new double[M]; for (int i = 0; i < M; i++) { rho[i] = 0.0; } /* Allocate memory for storing design at previous iteration */ - design_old = new MyReal[dimN]; - gradient_old = new MyReal[dimN]; + design_old = new double[dimN]; + gradient_old = new double[dimN]; } L_BFGS::~L_BFGS() { @@ -48,10 +48,10 @@ L_BFGS::~L_BFGS() { delete[] gradient_old; } -void L_BFGS::computeAscentDir(int iter, MyReal *gradient, MyReal *ascentdir) { +void L_BFGS::computeAscentDir(int iter, double *gradient, double *ascentdir) { int imemory; - MyReal beta; - MyReal *alpha = new MyReal[M]; + double beta; + double *alpha = new double[M]; int imax, imin; /* Initialize the ascentdir with steepest descent */ @@ -98,10 +98,10 @@ void L_BFGS::computeAscentDir(int iter, MyReal *gradient, MyReal *ascentdir) { delete[] alpha; } -void L_BFGS::updateMemory(int iter, MyReal *design, MyReal *gradient) { +void L_BFGS::updateMemory(int iter, double *design, double *gradient) { /* Update lbfgs memory only if iter > 0 */ if (iter > 0) { - MyReal yTy, yTs; + double yTy, yTs; /* Get storing state */ int imemory = (iter - 1) % M; @@ -135,19 +135,19 @@ void L_BFGS::updateMemory(int iter, MyReal *design, MyReal *gradient) { BFGS::BFGS(MPI_Comm comm, int N) : HessianApprox(comm) { dimN = N; - Hessian = new MyReal[N * N]; + Hessian = new double[N * N]; setIdentity(); - y = new MyReal[N]; - s = new MyReal[N]; + y = new double[N]; + s = new double[N]; - Hy = new MyReal[N]; - A = new MyReal[N * N]; - B = new MyReal[N * N]; + Hy = new double[N]; + A = new double[N * N]; + B = new double[N * N]; /* Allocate memory for storing design at previous iteration */ - design_old = new MyReal[dimN]; - gradient_old = new MyReal[dimN]; + design_old = new double[dimN]; + gradient_old = new double[dimN]; /* Sanity check */ int size; @@ -180,7 +180,7 @@ BFGS::~BFGS() { delete[] gradient_old; } -void BFGS::updateMemory(int iter, MyReal *design, MyReal *gradient) { +void BFGS::updateMemory(int iter, double *design, double *gradient) { /* Update BFGS memory for s, y */ for (int idir = 0; idir < dimN; idir++) { y[idir] = gradient[idir] - gradient_old[idir]; @@ -188,9 +188,9 @@ void BFGS::updateMemory(int iter, MyReal *design, MyReal *gradient) { } } -void BFGS::computeAscentDir(int iter, MyReal *gradient, MyReal *ascentdir) { - MyReal yTy, yTs, H0; - MyReal b, rho; +void BFGS::computeAscentDir(int iter, double *gradient, double *ascentdir) { + double yTy, yTs, H0; + double b, rho; /* Steepest descent in first iteration */ if (iter == 0) { @@ -248,9 +248,9 @@ Identity::Identity(MPI_Comm comm, int N) : HessianApprox(comm) { dimN = N; } Identity::~Identity() {} -void Identity::updateMemory(int iter, MyReal *design, MyReal *gradient) {} +void Identity::updateMemory(int iter, double *design, double *gradient) {} -void Identity::computeAscentDir(int iter, MyReal *gradient, MyReal *ascentdir) { +void Identity::computeAscentDir(int iter, double *gradient, double *ascentdir) { /* Steepest descent */ for (int i = 0; i < dimN; i++) { ascentdir[i] = gradient[i]; diff --git a/src/layer.cpp b/src/layer.cpp index 4d24770..a58df4a 100644 --- a/src/layer.cpp +++ b/src/layer.cpp @@ -27,7 +27,7 @@ Layer::Layer() { } Layer::Layer(int idx, int Type, int dimI, int dimO, int dimB, int dimW, - MyReal deltaT, int Activ, MyReal gammatik, MyReal gammaddt) + double deltaT, int Activ, double gammatik, double gammaddt) : Layer() { index = idx; type = Type; @@ -41,8 +41,8 @@ Layer::Layer(int idx, int Type, int dimI, int dimO, int dimB, int dimW, gamma_tik = gammatik; gamma_ddt = gammaddt; - update = new MyReal[dimO]; - update_bar = new MyReal[dimO]; + update = new double[dimO]; + update_bar = new double[dimO]; } Layer::~Layer() { @@ -50,11 +50,11 @@ Layer::~Layer() { delete[] update_bar; } -void Layer::setDt(MyReal DT) { dt = DT; } +void Layer::setDt(double DT) { dt = DT; } -MyReal Layer::getDt() { return dt; } +double Layer::getDt() { return dt; } -void Layer::setMemory(MyReal *design_memloc, MyReal *gradient_memloc) { +void Layer::setMemory(double *design_memloc, double *gradient_memloc) { /* Set design and gradient memory locations */ weights = design_memloc; weights_bar = gradient_memloc; @@ -64,19 +64,19 @@ void Layer::setMemory(MyReal *design_memloc, MyReal *gradient_memloc) { bias_bar = gradient_memloc + nweights; } -MyReal Layer::getGammaTik() { return gamma_tik; } +double Layer::getGammaTik() { return gamma_tik; } -MyReal Layer::getGammaDDT() { return gamma_ddt; } +double Layer::getGammaDDT() { return gamma_ddt; } int Layer::getActivation() { return activ; } int Layer::getType() { return type; } -MyReal *Layer::getWeights() { return weights; } -MyReal *Layer::getBias() { return bias; } +double *Layer::getWeights() { return weights; } +double *Layer::getBias() { return bias; } -MyReal *Layer::getWeightsBar() { return weights_bar; } -MyReal *Layer::getBiasBar() { return bias_bar; } +double *Layer::getWeightsBar() { return weights_bar; } +double *Layer::getBiasBar() { return bias_bar; } int Layer::getDimIn() { return dim_In; } int Layer::getDimOut() { return dim_Out; } @@ -89,7 +89,7 @@ int Layer::getCSize() { return csize; } int Layer::getIndex() { return index; } -void Layer::print_data(MyReal *data) { +void Layer::print_data(double *data) { printf("DATA: "); for (int io = 0; io < dim_Out; io++) { printf("%1.14e ", data[io]); @@ -97,8 +97,8 @@ void Layer::print_data(MyReal *data) { printf("\n"); } -MyReal Layer::activation(MyReal x) { - MyReal y; +double Layer::activation(double x) { + double y; switch (activ) { case TANH: y = Layer::tanh_act(x); @@ -118,8 +118,8 @@ MyReal Layer::activation(MyReal x) { return y; } -MyReal Layer::dactivation(MyReal x) { - MyReal y; +double Layer::dactivation(double x) { + double y; switch (activ) { case TANH: y = Layer::dtanh_act(x); @@ -139,7 +139,7 @@ MyReal Layer::dactivation(MyReal x) { return y; } -void Layer::packDesign(MyReal *buffer, int size) { +void Layer::packDesign(double *buffer, int size) { int nweights = getnWeights(); int nbias = getDimBias(); int idx = 0; @@ -158,7 +158,7 @@ void Layer::packDesign(MyReal *buffer, int size) { } } -void Layer::unpackDesign(MyReal *buffer) { +void Layer::unpackDesign(double *buffer) { int nweights = getnWeights(); int nbias = getDimBias(); @@ -173,7 +173,7 @@ void Layer::unpackDesign(MyReal *buffer) { } } -void Layer::scaleDesign(MyReal factor) { +void Layer::scaleDesign(double factor) { /* Scale design by a factor */ for (int i = 0; i < nweights; i++) { weights[i] = factor * weights[i]; @@ -195,8 +195,8 @@ void Layer::resetBar() { } } -MyReal Layer::evalTikh() { - MyReal tik = 0.0; +double Layer::evalTikh() { + double tik = 0.0; for (int i = 0; i < nweights; i++) { tik += pow(weights[i], 2); } @@ -207,7 +207,7 @@ MyReal Layer::evalTikh() { return gamma_tik / 2.0 * tik; } -void Layer::evalTikh_diff(MyReal regul_bar) { +void Layer::evalTikh_diff(double regul_bar) { regul_bar = gamma_tik * regul_bar; /* Derivative bias term */ @@ -219,11 +219,11 @@ void Layer::evalTikh_diff(MyReal regul_bar) { } } -MyReal Layer::evalRegulDDT(Layer *layer_prev, MyReal deltat) { +double Layer::evalRegulDDT(Layer *layer_prev, double deltat) { if (layer_prev == NULL) return 0.0; // this holds for opening layer - MyReal diff; - MyReal regul_ddt = 0.0; + double diff; + double regul_ddt = 0.0; /* Compute ddt-regularization only if dimensions match */ /* this excludes first intermediate layer and classification layer. */ @@ -246,11 +246,11 @@ MyReal Layer::evalRegulDDT(Layer *layer_prev, MyReal deltat) { } void Layer::evalRegulDDT_diff(Layer *layer_prev, Layer *layer_next, - MyReal deltat) { + double deltat) { if (layer_prev == NULL) return; if (layer_next == NULL) return; - MyReal diff; + double diff; int regul_bar = gamma_ddt / (deltat * deltat); /* Left sided derivative term */ @@ -286,18 +286,18 @@ void Layer::evalRegulDDT_diff(Layer *layer_prev, Layer *layer_next, } } -void Layer::setExample(MyReal *example_ptr) {} +void Layer::setExample(double *example_ptr) {} -void Layer::setLabel(MyReal *example_ptr) {} +void Layer::setLabel(double *example_ptr) {} -DenseLayer::DenseLayer(int idx, int dimI, int dimO, MyReal deltaT, int Activ, - MyReal gammatik, MyReal gammaddt) +DenseLayer::DenseLayer(int idx, int dimI, int dimO, double deltaT, int Activ, + double gammatik, double gammaddt) : Layer(idx, DENSE, dimI, dimO, 1, dimI * dimO, deltaT, Activ, gammatik, gammaddt) {} DenseLayer::~DenseLayer() {} -void DenseLayer::applyFWD(MyReal *state) { +void DenseLayer::applyFWD(double *state) { /* Affine transformation */ for (int io = 0; io < dim_Out; io++) { /* Apply weights */ @@ -313,7 +313,7 @@ void DenseLayer::applyFWD(MyReal *state) { } } -void DenseLayer::applyBWD(MyReal *state, MyReal *state_bar, +void DenseLayer::applyBWD(double *state, double *state_bar, int compute_gradient) { /* state_bar is the adjoint of the state variable, it contains the old time adjoint informationk, and is modified on the way out to @@ -343,7 +343,7 @@ void DenseLayer::applyBWD(MyReal *state, MyReal *state_bar, } } -OpenDenseLayer::OpenDenseLayer(int dimI, int dimO, int Activ, MyReal gammatik) +OpenDenseLayer::OpenDenseLayer(int dimI, int dimO, int Activ, double gammatik) : DenseLayer(-1, dimI, dimO, 1.0, Activ, gammatik, 0.0) { type = OPENDENSE; example = NULL; @@ -351,9 +351,9 @@ OpenDenseLayer::OpenDenseLayer(int dimI, int dimO, int Activ, MyReal gammatik) OpenDenseLayer::~OpenDenseLayer() {} -void OpenDenseLayer::setExample(MyReal *example_ptr) { example = example_ptr; } +void OpenDenseLayer::setExample(double *example_ptr) { example = example_ptr; } -void OpenDenseLayer::applyFWD(MyReal *state) { +void OpenDenseLayer::applyFWD(double *state) { /* affine transformation */ for (int io = 0; io < dim_Out; io++) { /* Apply weights */ @@ -369,7 +369,7 @@ void OpenDenseLayer::applyFWD(MyReal *state) { } } -void OpenDenseLayer::applyBWD(MyReal *state, MyReal *state_bar, +void OpenDenseLayer::applyBWD(double *state, double *state_bar, int compute_gradient) { /* Derivative of step */ for (int io = 0; io < dim_Out; io++) { @@ -405,9 +405,9 @@ OpenExpandZero::OpenExpandZero(int dimI, int dimO) OpenExpandZero::~OpenExpandZero() {} -void OpenExpandZero::setExample(MyReal *example_ptr) { example = example_ptr; } +void OpenExpandZero::setExample(double *example_ptr) { example = example_ptr; } -void OpenExpandZero::applyFWD(MyReal *state) { +void OpenExpandZero::applyFWD(double *state) { for (int ii = 0; ii < dim_In; ii++) { state[ii] = example[ii]; } @@ -416,7 +416,7 @@ void OpenExpandZero::applyFWD(MyReal *state) { } } -void OpenExpandZero::applyBWD(MyReal *state, MyReal *state_bar, +void OpenExpandZero::applyBWD(double *state, double *state_bar, int compute_gradient) { for (int ii = 0; ii < dim_Out; ii++) { state_bar[ii] = 0.0; @@ -437,9 +437,9 @@ OpenConvLayer::OpenConvLayer(int dimI, int dimO) OpenConvLayer::~OpenConvLayer() {} -void OpenConvLayer::setExample(MyReal *example_ptr) { example = example_ptr; } +void OpenConvLayer::setExample(double *example_ptr) { example = example_ptr; } -void OpenConvLayer::applyFWD(MyReal *state) { +void OpenConvLayer::applyFWD(double *state) { // replicate the image data for (int img = 0; img < nconv; img++) { for (int ii = 0; ii < dim_In; ii++) { @@ -448,7 +448,7 @@ void OpenConvLayer::applyFWD(MyReal *state) { } } -void OpenConvLayer::applyBWD(MyReal *state, MyReal *state_bar, +void OpenConvLayer::applyBWD(double *state, double *state_bar, int compute_gradient) { for (int ii = 0; ii < dim_Out; ii++) { state_bar[ii] = 0.0; @@ -462,7 +462,7 @@ OpenConvLayerMNIST::OpenConvLayerMNIST(int dimI, int dimO) OpenConvLayerMNIST::~OpenConvLayerMNIST() {} -void OpenConvLayerMNIST::applyFWD(MyReal *state) { +void OpenConvLayerMNIST::applyFWD(double *state) { // replicate the image data for (int img = 0; img < nconv; img++) { for (int ii = 0; ii < dim_In; ii++) { @@ -476,7 +476,7 @@ void OpenConvLayerMNIST::applyFWD(MyReal *state) { } } -void OpenConvLayerMNIST::applyBWD(MyReal *state, MyReal *state_bar, +void OpenConvLayerMNIST::applyBWD(double *state, double *state_bar, int compute_gradient) { // Derivative of step for (int img = 0; img < nconv; img++) { @@ -492,19 +492,19 @@ void OpenConvLayerMNIST::applyBWD(MyReal *state, MyReal *state_bar, } ClassificationLayer::ClassificationLayer(int idx, int dimI, int dimO, - MyReal gammatik) + double gammatik) : Layer(idx, CLASSIFICATION, dimI, dimO, dimO, dimI * dimO, 1.0, -1, 0.0, 0.0) { gamma_tik = gammatik; /* Allocate the probability vector */ - probability = new MyReal[dimO]; + probability = new double[dimO]; } ClassificationLayer::~ClassificationLayer() { delete[] probability; } -void ClassificationLayer::setLabel(MyReal *label_ptr) { label = label_ptr; } +void ClassificationLayer::setLabel(double *label_ptr) { label = label_ptr; } -void ClassificationLayer::applyFWD(MyReal *state) { +void ClassificationLayer::applyFWD(double *state) { /* Compute affine transformation */ for (int io = 0; io < dim_Out; io++) { /* Apply weights */ @@ -533,7 +533,7 @@ void ClassificationLayer::applyFWD(MyReal *state) { } } -void ClassificationLayer::applyBWD(MyReal *state, MyReal *state_bar, +void ClassificationLayer::applyBWD(double *state, double *state_bar, int compute_gradient) { /* Recompute affine transformation */ for (int io = 0; io < dim_Out; io++) { @@ -567,17 +567,17 @@ void ClassificationLayer::applyBWD(MyReal *state, MyReal *state_bar, } } -void ClassificationLayer::normalize(MyReal *data) { +void ClassificationLayer::normalize(double *data) { /* Find maximum value */ - MyReal max = vecmax(dim_Out, data); + double max = vecmax(dim_Out, data); /* Shift the data vector */ for (int io = 0; io < dim_Out; io++) { data[io] = data[io] - max; } } -void ClassificationLayer::normalize_diff(MyReal *data, MyReal *data_bar) { - MyReal max_b = 0.0; +void ClassificationLayer::normalize_diff(double *data, double *data_bar) { + double max_b = 0.0; /* Derivative of the shift */ for (int io = 0; io < dim_Out; io++) { max_b -= data_bar[io]; @@ -587,9 +587,9 @@ void ClassificationLayer::normalize_diff(MyReal *data, MyReal *data_bar) { data_bar[i_max] += max_b; } -MyReal ClassificationLayer::crossEntropy(MyReal *data_Out) { - MyReal label_pr, exp_sum; - MyReal CELoss; +double ClassificationLayer::crossEntropy(double *data_Out) { + double label_pr, exp_sum; + double CELoss; /* Label projection */ label_pr = vecdot(dim_Out, label, data_Out); @@ -606,11 +606,11 @@ MyReal ClassificationLayer::crossEntropy(MyReal *data_Out) { return CELoss; } -void ClassificationLayer::crossEntropy_diff(MyReal *data_Out, - MyReal *data_Out_bar, - MyReal loss_bar) { - MyReal exp_sum, exp_sum_bar; - MyReal label_pr_bar = -loss_bar; +void ClassificationLayer::crossEntropy_diff(double *data_Out, + double *data_Out_bar, + double loss_bar) { + double exp_sum, exp_sum_bar; + double label_pr_bar = -loss_bar; /* Recompute exp_sum */ exp_sum = 0.0; @@ -630,8 +630,8 @@ void ClassificationLayer::crossEntropy_diff(MyReal *data_Out, } } -int ClassificationLayer::prediction(MyReal *data_Out, int *class_id_ptr) { - MyReal exp_sum, max; +int ClassificationLayer::prediction(double *data_Out, int *class_id_ptr) { + double exp_sum, max; int class_id = -1; int success = 0; @@ -663,16 +663,16 @@ int ClassificationLayer::prediction(MyReal *data_Out, int *class_id_ptr) { return success; } -MyReal Layer::ReLu_act(MyReal x) { - MyReal max = 0.0; +double Layer::ReLu_act(double x) { + double max = 0.0; if (x > 0.0) max = x; return max; } -MyReal Layer::dReLu_act(MyReal x) { - MyReal diff; +double Layer::dReLu_act(double x) { + double diff; if (x >= 0.0) diff = 1.0; else @@ -681,13 +681,13 @@ MyReal Layer::dReLu_act(MyReal x) { return diff; } -MyReal Layer::SmoothReLu_act(MyReal x) { +double Layer::SmoothReLu_act(double x) { /* range of quadratic interpolation */ - MyReal eta = 0.1; + double eta = 0.1; /* Coefficients of quadratic interpolation */ - MyReal a = 1. / (4. * eta); - MyReal b = 1. / 2.; - MyReal c = eta / 4.; + double a = 1. / (4. * eta); + double b = 1. / 2.; + double c = eta / 4.; if (-eta < x && x < eta) { /* Quadratic Activation */ @@ -698,12 +698,12 @@ MyReal Layer::SmoothReLu_act(MyReal x) { } } -MyReal Layer::dSmoothReLu_act(MyReal x) { +double Layer::dSmoothReLu_act(double x) { /* range of quadratic interpolation */ - MyReal eta = 0.1; + double eta = 0.1; /* Coefficients of quadratic interpolation */ - MyReal a = 1. / (4. * eta); - MyReal b = 1. / 2.; + double a = 1. / (4. * eta); + double b = 1. / 2.; if (-eta < x && x < eta) { return 2. * a * x + b; @@ -712,16 +712,16 @@ MyReal Layer::dSmoothReLu_act(MyReal x) { } } -MyReal Layer::tanh_act(MyReal x) { return tanh(x); } +double Layer::tanh_act(double x) { return tanh(x); } -MyReal Layer::dtanh_act(MyReal x) { - MyReal diff = 1.0 - pow(tanh(x), 2); +double Layer::dtanh_act(double x) { + double diff = 1.0 - pow(tanh(x), 2); return diff; } ConvLayer::ConvLayer(int idx, int dimI, int dimO, int csize_in, int nconv_in, - MyReal deltaT, int Activ, MyReal Gammatik, MyReal Gammaddt) + double deltaT, int Activ, double Gammatik, double Gammaddt) : Layer(idx, CONVOLUTION, dimI, dimO, dimI / nconv_in, csize_in * csize_in * nconv_in * nconv_in, deltaT, Activ, Gammatik, Gammaddt) { @@ -755,12 +755,12 @@ ConvLayer::~ConvLayer() {} * Where state_bar _must_ be at the old time. Note that the adjoint variable * state_bar carries withit all the information of the objective derivative. */ -MyReal ConvLayer::updateWeightDerivative( - MyReal *state, MyReal *update_bar, int output_conv, /* output convolution */ +double ConvLayer::updateWeightDerivative( + double *state, double *update_bar, int output_conv, /* output convolution */ int j, /* pixel index */ int k) /* pixel index */ { - MyReal val = 0; + double val = 0; int fcsize_s_l = -fcsize; int fcsize_s_u = fcsize; @@ -793,23 +793,23 @@ MyReal ConvLayer::updateWeightDerivative( for (int input_image = 0; input_image < nconv; input_image++, center_index += img_size, input_wght_idx += csize2) { - MyReal update_val = update_bar[center_index]; + double update_val = update_bar[center_index]; - MyReal *state_base = state + center_index + offset; - MyReal *weights_bar_base = weights_bar + input_wght_idx + wght_idx; + double *state_base = state + center_index + offset; + double *weights_bar_base = weights_bar + input_wght_idx + wght_idx; - MyReal *update_base = update_bar + center_index + offset_adj; - MyReal *weights_base = weights + input_wght_idx + wght_idx_adj; + double *update_base = update_bar + center_index + offset_adj; + double *weights_base = weights + input_wght_idx + wght_idx_adj; // weight derivative for (int s = 0; s <= fcsize_s; s++, state_base += img_size_sqrt, weights_bar_base += csize, update_base -= img_size_sqrt, weights_base += csize) { - MyReal *state_local = state_base; - MyReal *weights_bar_local = weights_bar_base; + double *state_local = state_base; + double *weights_bar_local = weights_bar_base; - MyReal *update_local = update_base; - MyReal *weights_local = weights_base; + double *update_local = update_base; + double *weights_local = weights_base; for (int t = 0; t <= fcsize_t; t++, state_local++, weights_bar_local++, update_local--, weights_local++) { @@ -822,12 +822,12 @@ MyReal ConvLayer::updateWeightDerivative( return val; } -MyReal ConvLayer::apply_conv(MyReal *state, +double ConvLayer::apply_conv(double *state, int output_conv, /* output convolution */ int j, /* pixel index */ int k) /* pixel index */ { - MyReal val = 0.0; + double val = 0.0; int fcsize_s_l = -fcsize; int fcsize_s_u = fcsize; @@ -851,13 +851,13 @@ MyReal ConvLayer::apply_conv(MyReal *state, /* loop over all the images */ for (int input_image = 0; input_image < nconv; input_image++, center_index += img_size, input_wght_idx += csize2) { - MyReal *state_base = state + center_index; - MyReal *weights_base = weights + input_wght_idx; + double *state_base = state + center_index; + double *weights_base = weights + input_wght_idx; for (int s = 0; s <= fcsize_s; s++, state_base += img_size_sqrt, weights_base += csize) { - MyReal *state_local = state_base; - MyReal *weights_local = weights_base; + double *state_local = state_base; + double *weights_local = weights_base; for (int t = 0; t <= fcsize_t; t++, state_local++, weights_local++) { val += (*state_local) * (*weights_local); @@ -868,12 +868,12 @@ MyReal ConvLayer::apply_conv(MyReal *state, return val; } -MyReal ConvLayer::apply_conv_trans(MyReal *state, +double ConvLayer::apply_conv_trans(double *state, int output_conv, /* output convolution */ int j, /* pixel index */ int k) /* pixel index */ { - MyReal val = 0.0; + double val = 0.0; int fcsize_s_l = -fcsize; int fcsize_s_u = fcsize; @@ -896,13 +896,13 @@ MyReal ConvLayer::apply_conv_trans(MyReal *state, int offset = center_index - fcsize_t_l; int wght_idx = input_wght_idx + fcsize * (csize + 1) + fcsize_t_l; - MyReal *state_base = state + offset - img_size_sqrt * fcsize_s_l; - MyReal *weights_base = weights + wght_idx + csize * fcsize_s_l; + double *state_base = state + offset - img_size_sqrt * fcsize_s_l; + double *weights_base = weights + wght_idx + csize * fcsize_s_l; for (int s = 0; s <= fcsize_s; s++, state_base -= img_size_sqrt, weights_base += csize) { - MyReal *state_local = state_base; - MyReal *weights_local = weights_base; + double *state_local = state_base; + double *weights_local = weights_base; for (int t = 0; t <= fcsize_t; t++, state_local--, weights_local++) { val += (*state_local) * (*weights_local); @@ -913,7 +913,7 @@ MyReal ConvLayer::apply_conv_trans(MyReal *state, return val; } -void ConvLayer::applyFWD(MyReal *state) { +void ConvLayer::applyFWD(double *state) { /* Apply step */ for (int io = 0; io < dim_Out; io++) update[io] = state[io]; @@ -921,8 +921,8 @@ void ConvLayer::applyFWD(MyReal *state) { for (int i = 0; i < nconv; i++) { for (int j = 0; j < img_size_sqrt; j++) { int state_index = i * img_size + j * img_size_sqrt; - MyReal *update_local = state + state_index; - MyReal *bias_local = bias + j * img_size_sqrt; + double *update_local = state + state_index; + double *bias_local = bias + j * img_size_sqrt; for (int k = 0; k < img_size_sqrt; k++, update_local++, bias_local++) { // (*update_local) += dt*tanh(apply_conv(update, i, j, k) + @@ -934,7 +934,7 @@ void ConvLayer::applyFWD(MyReal *state) { } } -void ConvLayer::applyBWD(MyReal *state, MyReal *state_bar, +void ConvLayer::applyBWD(double *state, double *state_bar, int compute_gradient) { /* state_bar is the adjoint of the state variable, it contains the old time adjoint information, and is modified on the way out to @@ -943,7 +943,7 @@ void ConvLayer::applyBWD(MyReal *state, MyReal *state_bar, /* Okay, for my own clarity: state = forward state solution state_bar = backward adjoint solution (in - new time, out - current time) - update_bar = update to the bacward solution, this is "MyReal dipped" in + update_bar = update to the bacward solution, this is "double dipped" in that it is used to compute the weight and bias derivative. Note that because this is written as a forward update (the residual is F = u_{n+1} - u_n - dt * sigma(W_n * u_n + b_n) the adjoint variable is also the @@ -984,14 +984,14 @@ void ConvLayer::applyBWD(MyReal *state, MyReal *state_bar, /* loop over full image */ for (int j = 0; j < img_size_sqrt; j++) { int state_index = i * img_size + j * img_size_sqrt; - MyReal *state_bar_local = state_bar + state_index; - MyReal *update_bar_local = update_bar + state_index; - MyReal *bias_local = bias + j * img_size_sqrt; + double *state_bar_local = state_bar + state_index; + double *update_bar_local = update_bar + state_index; + double *bias_local = bias + j * img_size_sqrt; for (int k = 0; k < img_size_sqrt; k++, state_bar_local++, update_bar_local++, bias_local++) { /* compute the affine transformation */ - MyReal local_update = apply_conv(state, i, j, k) + (*bias_local); + double local_update = apply_conv(state, i, j, k) + (*bias_local); /* derivative of the update, this is the contribution from old time */ // (*update_bar_local) = dt * dactivation(local_update) * @@ -1009,9 +1009,9 @@ void ConvLayer::applyBWD(MyReal *state, MyReal *state_bar, for (int j = 0; j < img_size_sqrt; j++) { int state_index = i * img_size + j * img_size_sqrt; - MyReal *state_bar_local = state_bar + state_index; - MyReal *update_bar_local = update_bar + state_index; - MyReal *bias_bar_local = bias_bar + j * img_size_sqrt; + double *state_bar_local = state_bar + state_index; + double *update_bar_local = update_bar + state_index; + double *bias_bar_local = bias_bar + j * img_size_sqrt; for (int k = 0; k < img_size_sqrt; k++, state_bar_local++, update_bar_local++, bias_bar_local++) { diff --git a/src/linalg.cpp b/src/linalg.cpp index 0345661..2165d0b 100644 --- a/src/linalg.cpp +++ b/src/linalg.cpp @@ -1,24 +1,24 @@ #include "linalg.hpp" -MyReal vecdot_par(int dimN, MyReal *x, MyReal *y, MPI_Comm comm) { - MyReal localdot, globaldot; +double vecdot_par(int dimN, double *x, double *y, MPI_Comm comm) { + double localdot, globaldot; localdot = vecdot(dimN, x, y); - MPI_Allreduce(&localdot, &globaldot, 1, MPI_MyReal, MPI_SUM, comm); + MPI_Allreduce(&localdot, &globaldot, 1, MPI_DOUBLE, MPI_SUM, comm); return globaldot; } -MyReal vecdot(int dimN, MyReal *x, MyReal *y) { - MyReal dotprod = 0.0; +double vecdot(int dimN, double *x, double *y) { + double dotprod = 0.0; for (int i = 0; i < dimN; i++) { dotprod += x[i] * y[i]; } return dotprod; } -MyReal vecmax(int dimN, MyReal *x) { - MyReal max = -1e+12; +double vecmax(int dimN, double *x) { + double max = -1e+12; for (int i = 0; i < dimN; i++) { if (x[i] > max) { @@ -28,8 +28,8 @@ MyReal vecmax(int dimN, MyReal *x) { return max; } -int argvecmax(int dimN, MyReal *x) { - MyReal max = -1e+12; +int argvecmax(int dimN, double *x) { + double max = -1e+12; int i_max; for (int i = 0; i < dimN; i++) { if (x[i] > max) { @@ -40,25 +40,25 @@ int argvecmax(int dimN, MyReal *x) { return i_max; } -MyReal vecnormsq(int dimN, MyReal *x) { - MyReal normsq = 0.0; +double vecnormsq(int dimN, double *x) { + double normsq = 0.0; for (int i = 0; i < dimN; i++) { normsq += pow(x[i], 2); } return normsq; } -MyReal vecnorm_par(int dimN, MyReal *x, MPI_Comm comm) { - MyReal localnorm, globalnorm; +double vecnorm_par(int dimN, double *x, MPI_Comm comm) { + double localnorm, globalnorm; localnorm = vecnormsq(dimN, x); - MPI_Allreduce(&localnorm, &globalnorm, 1, MPI_MyReal, MPI_SUM, comm); + MPI_Allreduce(&localnorm, &globalnorm, 1, MPI_DOUBLE, MPI_SUM, comm); globalnorm = sqrt(globalnorm); return globalnorm; } -int vec_copy(int N, MyReal *u, MyReal *u_copy) { +int vec_copy(int N, double *u, double *u_copy) { for (int i = 0; i < N; i++) { u_copy[i] = u[i]; } @@ -66,7 +66,7 @@ int vec_copy(int N, MyReal *u, MyReal *u_copy) { return 0; } -void vecvecT(int N, MyReal *x, MyReal *y, MyReal *XYT) { +void vecvecT(int N, double *x, double *y, double *XYT) { for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { XYT[i * N + j] = x[i] * y[j]; @@ -74,8 +74,8 @@ void vecvecT(int N, MyReal *x, MyReal *y, MyReal *XYT) { } } -void matvec(int dimN, MyReal *H, MyReal *x, MyReal *Hx) { - MyReal sum_j; +void matvec(int dimN, double *H, double *x, double *Hx) { + double sum_j; for (int i = 0; i < dimN; i++) { sum_j = 0.0; diff --git a/src/main.cpp b/src/main.cpp index 37c0b67..3dcd5c9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -31,7 +31,6 @@ #include "braid_wrapper.hpp" #include "config.hpp" #include "dataset.hpp" -#include "defs.hpp" #include "hessianApprox.hpp" #include "layer.hpp" #include "network.hpp" @@ -49,14 +48,14 @@ int main(int argc, char *argv[]) { Network *network; /**< DNN Network architecture */ int ilower, iupper; /**< Index of first and last layer stored on this processor */ - MyReal accur_train = 0.0; /**< Accuracy on training data */ - MyReal accur_val = 0.0; /**< Accuracy on validation data */ - MyReal loss_train = 0.0; /**< Loss function on training data */ - MyReal loss_val = 0.0; /**< Loss function on validation data */ - MyReal losstrain_out = 0.0; - MyReal lossval_out = 0.0; - MyReal accurtrain_out = 0.0; - MyReal accurval_out = 0.0; + double accur_train = 0.0; /**< Accuracy on training data */ + double accur_val = 0.0; /**< Accuracy on validation data */ + double loss_train = 0.0; /**< Loss function on training data */ + double loss_val = 0.0; /**< Loss function on validation data */ + double losstrain_out = 0.0; + double lossval_out = 0.0; + double accurtrain_out = 0.0; + double accurval_out = 0.0; /* --- XBraid --- */ myBraidApp *primaltrainapp; /**< Braid App for training data */ @@ -66,18 +65,18 @@ int main(int argc, char *argv[]) { /* --- Optimization --- */ int ndesign_local; /**< Number of local design variables on this processor */ int ndesign_global; /**< Number of global design variables (sum of local)*/ - MyReal *ascentdir = 0; /**< Direction for design updates */ - MyReal objective; /**< Optimization objective */ - MyReal wolfe; /**< Holding the wolfe condition value */ - MyReal rnorm; /**< Space-time Norm of the state variables */ - MyReal rnorm_adj; /**< Space-time norm of the adjoint variables */ - MyReal gnorm; /**< Norm of the gradient */ - MyReal ls_param; /**< Parameter in wolfe condition test */ - MyReal stepsize; /**< Stepsize used for design update */ + double *ascentdir = 0; /**< Direction for design updates */ + double objective; /**< Optimization objective */ + double wolfe; /**< Holding the wolfe condition value */ + double rnorm; /**< Space-time Norm of the state variables */ + double rnorm_adj; /**< Space-time norm of the adjoint variables */ + double gnorm; /**< Norm of the gradient */ + double ls_param; /**< Parameter in wolfe condition test */ + double stepsize; /**< Stepsize used for design update */ char optimfilename[255]; FILE *optimfile = 0; - MyReal ls_stepsize; - MyReal ls_objective, test_obj; + double ls_stepsize; + double ls_objective, test_obj; int ls_iter; /* --- other --- */ @@ -85,8 +84,8 @@ int main(int argc, char *argv[]) { int myid; int size; struct rusage r_usage; - MyReal StartTime, StopTime, myMB, globalMB; - MyReal UsedTime = 0.0; + double StartTime, StopTime, myMB, globalMB; + double UsedTime = 0.0; /* Initialize MPI */ MPI_Init(&argc, &argv); @@ -168,7 +167,7 @@ int main(int argc, char *argv[]) { /* Allocate ascent direction for design updates */ /* Initialize optimization parameters */ - ascentdir = new MyReal[ndesign_local]; + ascentdir = new double[ndesign_local]; stepsize = config->getStepsize(0); gnorm = 0.0; objective = 0.0; @@ -232,13 +231,13 @@ int main(int argc, char *argv[]) { /* Communicate loss and accuracy. This is actually only needed for output. * TODO: Remove it. */ - MPI_Allreduce(&loss_train, &losstrain_out, 1, MPI_MyReal, MPI_SUM, + MPI_Allreduce(&loss_train, &losstrain_out, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&loss_val, &lossval_out, 1, MPI_MyReal, MPI_SUM, + MPI_Allreduce(&loss_val, &lossval_out, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&accur_train, &accurtrain_out, 1, MPI_MyReal, MPI_SUM, + MPI_Allreduce(&accur_train, &accurtrain_out, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&accur_val, &accurval_out, 1, MPI_MyReal, MPI_SUM, + MPI_Allreduce(&accur_val, &accurval_out, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); /* Output */ @@ -359,8 +358,8 @@ int main(int argc, char *argv[]) { StopTime = MPI_Wtime(); UsedTime = StopTime - StartTime; getrusage(RUSAGE_SELF, &r_usage); - myMB = (MyReal)r_usage.ru_maxrss / 1024.0; - MPI_Allreduce(&myMB, &globalMB, 1, MPI_MyReal, MPI_SUM, MPI_COMM_WORLD); + myMB = (double)r_usage.ru_maxrss / 1024.0; + MPI_Allreduce(&myMB, &globalMB, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // printf("%d; Memory Usage: %.2f MB\n",myid, myMB); if (myid == MASTER_NODE) { diff --git a/src/network.cpp b/src/network.cpp index 82f02d0..389c115 100644 --- a/src/network.cpp +++ b/src/network.cpp @@ -35,7 +35,7 @@ void Network::createNetworkBlock(int StartLayerID, int EndLayerID, nlayers_local = endlayerID - startlayerID + 1; nlayers_global = config->nlayers; nchannels = config->nchannels; - dt = (config->T) / (MyReal)(config->nlayers - 2); // nlayers-2 = nhiddenlayers + dt = (config->T) / (double)(config->nlayers - 2); // nlayers-2 = nhiddenlayers comm = Comm; /* --- Create the layers --- */ @@ -77,8 +77,8 @@ void Network::createNetworkBlock(int StartLayerID, int EndLayerID, ndesign_layermax = computeLayermax(); /* Allocate memory for network design and gradient variables */ - design = new MyReal[ndesign_local]; - gradient = new MyReal[ndesign_local]; + design = new double[ndesign_local]; + gradient = new double[ndesign_local]; /* Set the memory locations for all layers */ int istart = 0; @@ -97,14 +97,14 @@ void Network::createNetworkBlock(int StartLayerID, int EndLayerID, /* left anr right neighbouring layer design, if exists */ if (layer_left != NULL) { - MyReal *left_design = new MyReal[layer_left->getnDesign()]; - MyReal *left_gradient = new MyReal[layer_left->getnDesign()]; + double *left_design = new double[layer_left->getnDesign()]; + double *left_gradient = new double[layer_left->getnDesign()]; layer_left->setMemory(left_design, left_gradient); } /* Create and initialize right neighbouring layer design, if exists */ if (layer_right != NULL) { - MyReal *right_design = new MyReal[layer_right->getnDesign()]; - MyReal *right_gradient = new MyReal[layer_right->getnDesign()]; + double *right_design = new double[layer_right->getnDesign()]; + double *right_gradient = new double[layer_right->getnDesign()]; layer_right->setMemory(right_design, right_gradient); } } @@ -141,24 +141,24 @@ int Network::getnChannels() { return nchannels; } int Network::getnLayersGlobal() { return nlayers_global; } -MyReal Network::getDT() { return dt; } +double Network::getDT() { return dt; } int Network::getLocalID(int ilayer) { int idx = ilayer - startlayerID; return idx; } -MyReal Network::getLoss() { return loss; } +double Network::getLoss() { return loss; } -MyReal Network::getAccuracy() { return accuracy; } +double Network::getAccuracy() { return accuracy; } int Network::getnDesignLocal() { return ndesign_local; } int Network::getnDesignGlobal() { return ndesign_global; } -MyReal *Network::getDesign() { return design; } +double *Network::getDesign() { return design; } -MyReal *Network::getGradient() { return gradient; } +double *Network::getGradient() { return gradient; } int Network::getStartLayerID() { return startlayerID; } int Network::getEndLayerID() { return endlayerID; } @@ -258,8 +258,8 @@ int Network::computeLayermax() { } void Network::setInitialDesign(Config *config) { - MyReal factor; - MyReal *design_init; + double factor; + double *design_init; char filename[255]; int myid; MPI_Comm_rank(comm, &myid); @@ -268,9 +268,9 @@ void Network::setInitialDesign(Config *config) { * scaling test) */ if (myid == 0) { srand(1.0); - design_init = new MyReal[ndesign_global]; + design_init = new double[ndesign_global]; for (int i = 0; i < ndesign_global; i++) { - design_init[i] = (MyReal)rand() / ((MyReal)RAND_MAX); + design_init[i] = (double)rand() / ((double)RAND_MAX); } } /* Scatter initial design to all processors */ @@ -334,10 +334,10 @@ void Network::MPI_CommunicateNeighbours(MPI_Comm comm) { int size_left = -1; int size_right = -1; - MyReal *sendlast = 0; - MyReal *recvlast = 0; - MyReal *sendfirst = 0; - MyReal *recvfirst = 0; + double *sendlast = 0; + double *recvlast = 0; + double *sendfirst = 0; + double *recvfirst = 0; /* --- All but the first process receive the last layer from left neighbour * --- */ @@ -346,23 +346,23 @@ void Network::MPI_CommunicateNeighbours(MPI_Comm comm) { int source = myid - 1; size_left = layer_left->getnDesign(); - recvlast = new MyReal[size_left]; + recvlast = new double[size_left]; - MPI_Irecv(recvlast, size_left, MPI_MyReal, source, 0, comm, &recvlastreq); + MPI_Irecv(recvlast, size_left, MPI_DOUBLE, source, 0, comm, &recvlastreq); } /* --- All but the last process sent their last layer to right neighbour --- */ if (myid < comm_size - 1) { size_left = layers[getLocalID(endlayerID)]->getnDesign(); - sendlast = new MyReal[size_left]; + sendlast = new double[size_left]; /* Pack the last layer into a buffer */ layers[getLocalID(endlayerID)]->packDesign(sendlast, size_left); /* Send to right neighbour */ int receiver = myid + 1; - MPI_Isend(sendlast, size_left, MPI_MyReal, receiver, 0, comm, &sendlastreq); + MPI_Isend(sendlast, size_left, MPI_DOUBLE, receiver, 0, comm, &sendlastreq); } /* --- All but the last processor recv the first layer from the right @@ -372,9 +372,9 @@ void Network::MPI_CommunicateNeighbours(MPI_Comm comm) { int source = myid + 1; size_right = layer_right->getnDesign(); - recvfirst = new MyReal[size_right]; + recvfirst = new double[size_right]; - MPI_Irecv(recvfirst, size_right, MPI_MyReal, source, 1, comm, + MPI_Irecv(recvfirst, size_right, MPI_DOUBLE, source, 1, comm, &recvfirstreq); } @@ -382,14 +382,14 @@ void Network::MPI_CommunicateNeighbours(MPI_Comm comm) { * neighbour --- */ if (myid > 0) { size_right = layers[getLocalID(startlayerID)]->getnDesign(); - sendfirst = new MyReal[size_right]; + sendfirst = new double[size_right]; /* Pack the first layer into a buffer */ layers[getLocalID(startlayerID)]->packDesign(sendfirst, size_right); /* Send to left neighbour */ int receiver = myid - 1; - MPI_Isend(sendfirst, size_right, MPI_MyReal, receiver, 1, comm, + MPI_Isend(sendfirst, size_right, MPI_DOUBLE, receiver, 1, comm, &sendfirstreq); } @@ -416,8 +416,8 @@ void Network::MPI_CommunicateNeighbours(MPI_Comm comm) { if (recvfirst != 0) delete[] recvfirst; } -void Network::evalClassification(DataSet *data, MyReal **state, int output) { - MyReal *tmpstate = new MyReal[nchannels]; +void Network::evalClassification(DataSet *data, double **state, int output) { + double *tmpstate = new double[nchannels]; int class_id; int success, success_local; @@ -454,7 +454,7 @@ void Network::evalClassification(DataSet *data, MyReal **state, int output) { if (output) fprintf(classfile, "%d %d\n", class_id, success_local); } loss = 1. / data->getnBatch() * loss; - accuracy = 100.0 * ((MyReal)success) / data->getnBatch(); + accuracy = 100.0 * ((double)success) / data->getnBatch(); // printf("Classification %d: %1.14e using layer %1.14e state %1.14e // tmpstate[0] %1.14e\n", getIndex(), loss, weights[0], state[1][1], // tmpstate[0]); @@ -465,10 +465,10 @@ void Network::evalClassification(DataSet *data, MyReal **state, int output) { delete[] tmpstate; } -void Network::evalClassification_diff(DataSet *data, MyReal **primalstate, - MyReal **adjointstate, +void Network::evalClassification_diff(DataSet *data, double **primalstate, + double **adjointstate, int compute_gradient) { - MyReal *tmpstate = new MyReal[nchannels]; + double *tmpstate = new double[nchannels]; ClassificationLayer *classificationlayer; /* Get classification layer */ @@ -480,7 +480,7 @@ void Network::evalClassification_diff(DataSet *data, MyReal **primalstate, } int nbatch = data->getnBatch(); - MyReal loss_bar = 1. / nbatch; + double loss_bar = 1. / nbatch; for (int iex = 0; iex < nbatch; iex++) { /* Recompute the Classification */ @@ -503,7 +503,7 @@ void Network::evalClassification_diff(DataSet *data, MyReal **primalstate, delete[] tmpstate; } -void Network::updateDesign(MyReal stepsize, MyReal *direction, MPI_Comm comm) { +void Network::updateDesign(double stepsize, double *direction, MPI_Comm comm) { /* Update design locally on this network-block */ for (int id = 0; id < ndesign_local; id++) { design[id] += stepsize * direction[id]; diff --git a/src/util.cpp b/src/util.cpp index f0f2f0d..e821d70 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -1,8 +1,8 @@ #include "util.hpp" -void read_matrix(char *filename, MyReal **var, int dimx, int dimy) { +void read_matrix(char *filename, double **var, int dimx, int dimy) { FILE *file; - MyReal tmp; + double tmp; /* Open file */ file = fopen(filename, "r"); @@ -23,9 +23,9 @@ void read_matrix(char *filename, MyReal **var, int dimx, int dimy) { fclose(file); } -void read_vector(char *filename, MyReal *var, int dimx) { +void read_vector(char *filename, double *var, int dimx) { FILE *file; - MyReal tmp; + double tmp; /* Open file */ file = fopen(filename, "r"); @@ -44,7 +44,7 @@ void read_vector(char *filename, MyReal *var, int dimx) { fclose(file); } -void write_vector(char *filename, MyReal *var, int dimN) { +void write_vector(char *filename, double *var, int dimN) { FILE *file; int i; @@ -65,8 +65,8 @@ void write_vector(char *filename, MyReal *var, int dimN) { fclose(file); } -void MPI_GatherVector(MyReal *sendbuffer, int localsendcount, - MyReal *recvbuffer, int rootprocessID, MPI_Comm comm) { +void MPI_GatherVector(double *sendbuffer, int localsendcount, + double *recvbuffer, int rootprocessID, MPI_Comm comm) { int comm_size; MPI_Comm_size(comm, &comm_size); @@ -84,15 +84,15 @@ void MPI_GatherVector(MyReal *sendbuffer, int localsendcount, } /* Gatherv the vector */ - MPI_Gatherv(sendbuffer, localsendcount, MPI_MyReal, recvbuffer, recvcount, - displs, MPI_MyReal, rootprocessID, comm); + MPI_Gatherv(sendbuffer, localsendcount, MPI_DOUBLE, recvbuffer, recvcount, + displs, MPI_DOUBLE, rootprocessID, comm); /* Clean up */ delete[] recvcount; delete[] displs; } -void MPI_ScatterVector(MyReal *sendbuffer, MyReal *recvbuffer, +void MPI_ScatterVector(double *sendbuffer, double *recvbuffer, int localrecvcount, int rootprocessID, MPI_Comm comm) { int comm_size; MPI_Comm_size(comm, &comm_size); @@ -111,8 +111,8 @@ void MPI_ScatterVector(MyReal *sendbuffer, MyReal *recvbuffer, } /* Gatherv the vector */ - MPI_Scatterv(sendbuffer, sendcount, displs, MPI_MyReal, recvbuffer, - localrecvcount, MPI_MyReal, rootprocessID, comm); + MPI_Scatterv(sendbuffer, sendcount, displs, MPI_DOUBLE, recvbuffer, + localrecvcount, MPI_DOUBLE, rootprocessID, comm); /* Clean up */ delete[] sendcount;