From 79d54372a00dc500e86ef54e2a35b1373b36f8f7 Mon Sep 17 00:00:00 2001 From: Roland Siegbert Date: Sun, 14 Jul 2019 10:46:15 +0200 Subject: [PATCH 1/8] Fix null pointer constant in HessianApprox --- src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index 37c0b67..0eee9ca 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) { ndesign_global); /* Initialize Hessian approximation */ - HessianApprox *hessian = 0; + HessianApprox *hessian; switch (config->hessianapprox_type) { case BFGS_SERIAL: hessian = new BFGS(MPI_COMM_WORLD, ndesign_local); From 63a0d5b50c58436c85d8dde3a135c4fc5bcdde3b Mon Sep 17 00:00:00 2001 From: Stefanie Guenther Date: Mon, 22 Jul 2019 22:43:27 -0700 Subject: [PATCH 2/8] Update peaks example config. The peaks example now runs with N=32 layers. Validation accuracy about 90% after 130 iteraions. Uses One-shot, i.e. only 2 xBraid iterations in each optimization cycle. --- examples/peaks/peaks.cfg | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/peaks/peaks.cfg b/examples/peaks/peaks.cfg index fb0af31..9f1d445 100644 --- a/examples/peaks/peaks.cfg +++ b/examples/peaks/peaks.cfg @@ -3,7 +3,7 @@ ################################ # relative data folder location -datafolder = examples/peaks +datafolder = ./ # filename of training data feature vectors ftrain_ex = features_training.dat # filename of training data labels/classes @@ -35,7 +35,7 @@ nchannels = 8 # number of layers (including opening layer and classification layer) (nlayer >= 3 !) nlayers = 32 # final time -T = 5.0 +T = 1.0 # Activation function ("tanh" or "ReLu" or "SmoothReLu") activation = SmoothReLu # Type of network ("dense" the default, or "convolutional") @@ -47,7 +47,7 @@ type_openlayer = activate # factor for scaling initial opening layer weights and bias weights_open_init = 1e-3 # factor for scaling initial weights and bias of intermediate layers -weights_init = 0e-3 +weights_init = 1e-3 # factor for scaling initial classification weights and bias weights_class_init = 1e-3 @@ -66,7 +66,7 @@ braid_maxlevels = 10 # minimum allowed coarse time time grid size (values in 10-30 are usually best) braid_mincoarse = 10 # maximum number of iterations -braid_maxiter = 15 +braid_maxiter = 2 # absolute tolerance braid_abstol = 1e-15 # absolute adjoint tolerance @@ -94,7 +94,7 @@ nbatch = 5000 # relaxation param for tikhonov term gamma_tik = 1e-7 # relaxation param for time-derivative term -gamma_ddt = 1e-7 +gamma_ddt = 1e-5 # relaxation param for tikhonov term of classification weights gamma_class = 1e-7 # stepsize selection type ("fixed" or "backtrackingLS" or "oneoverk") @@ -106,19 +106,19 @@ stepsize_type = backtrackingLS # initial stepsize stepsize = 1.0 # maximum number of optimization iterations -optim_maxiter = 10 +optim_maxiter = 130 # absolute stopping criterion for the gradient norm gtol = 1e-4 # maximum number of linesearch iterations -ls_maxiter = 20 +ls_maxiter = 15 # factor for modifying the stepsize within a linesearch iteration ls_factor = 0.5 # Hessian Approximation ("BFGS", "L-BFGS" or "Identity") hessian_approx = L-BFGS # number of stages for l-bfgs method -lbfgs_stages = 20 +lbfgs_stages = 10 # level for validation computation: # -1 = never validate # 0 = validate only after optimization finishes. # 1 = validate in each optimization iteration -validationlevel = 0 +validationlevel = 1 From 85f6622556b109681944b8f5858492a95d0e2e00 Mon Sep 17 00:00:00 2001 From: Roland Siegbert Date: Tue, 23 Jul 2019 15:12:25 +0200 Subject: [PATCH 3/8] Initialize hessian with NULL --- src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index 0eee9ca..c370df5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) { ndesign_global); /* Initialize Hessian approximation */ - HessianApprox *hessian; + HessianApprox *hessian = NULL; switch (config->hessianapprox_type) { case BFGS_SERIAL: hessian = new BFGS(MPI_COMM_WORLD, ndesign_local); From 9be5dd6bb78ee6ded78478a5efe1f11e43edbc01 Mon Sep 17 00:00:00 2001 From: steffi7574 Date: Tue, 23 Jul 2019 10:00:25 -0700 Subject: [PATCH 4/8] Update email address in Readme.me Contributors --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e7803d6..e7765df 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ An optimization history file 'optim.dat' will be flushed to the examples subfold ## Contributors -* Stefanie Guenther +* Stefanie Guenther * Eric C. Cyr * J.B. Schroder * Roland A. Siegbert From 969d65c32b770004e3a942baa891fcda90ad152f Mon Sep 17 00:00:00 2001 From: Stefanie Guenther Date: Wed, 24 Jul 2019 22:23:39 -0700 Subject: [PATCH 5/8] Minor comment changes in main --- src/main.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index ceafb6a..68965f0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -77,15 +77,14 @@ int main(int argc, char *argv[]) { MyReal ls_objective, test_obj; int ls_iter; - /* --- other --- */ - // TODO: What is this? Why do you need it? - int myid; - int size; + /* --- Time measurements --- */ struct rusage r_usage; MyReal StartTime, StopTime, myMB, globalMB; MyReal UsedTime = 0.0; /* Initialize MPI */ + int myid; + int size; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &myid); MPI_Comm_size(MPI_COMM_WORLD, &size); @@ -162,8 +161,6 @@ int main(int argc, char *argv[]) { return 0; } - /* Allocate ascent direction for design updates */ - /* Initialize optimization parameters */ ascentdir = new MyReal[ndesign_local]; stepsize = config->getStepsize(0); From d7c0fbfb8e99391fd03305d66e99e57be414f33d Mon Sep 17 00:00:00 2001 From: Stefanie Guenther Date: Mon, 19 Aug 2019 22:03:48 -0700 Subject: [PATCH 6/8] Adding some comments for Network and Layer headers. Not very much though... Sorry. --- include/layer.hpp | 7 ++++--- include/network.hpp | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/layer.hpp b/include/layer.hpp index 036ead8..bc0ea6c 100644 --- a/include/layer.hpp +++ b/include/layer.hpp @@ -62,7 +62,8 @@ class Layer { /* Set time step size */ void setDt(MyReal DT); - /* Set design and gradient memory location */ + /* Set design and gradient memory location. + * The design vector is allocated within the Network block. For each layer in the block, the local memory location within the network's design vector is passed here to the layer, and stored as *weights and *bias (and their derivatives weights_bar and bias_bar). */ void setMemory(MyReal *design_memloc, MyReal *gradient_memloc); /* Some Get..() functions */ @@ -90,13 +91,13 @@ class Layer { int getnConv(); int getCSize(); - /* Get the layer index (i.e. the time step) */ + /* Get the layers ID (i.e. the time step number) */ int getIndex(); /* Prints to screen */ void print_data(MyReal *data_Out); - /* Activation function and derivative */ + /* Applies the activation function and derivative */ MyReal activation(MyReal x); MyReal dactivation(MyReal x); diff --git a/include/network.hpp b/include/network.hpp index 3b6996d..2337263 100644 --- a/include/network.hpp +++ b/include/network.hpp @@ -9,13 +9,20 @@ #include "util.hpp" #pragma once +/* + * The Network class logically connects the layers. + * Each processor instantiates one object of this class containing + * a sub-block of layers from [startlayerID, endlayerID], where those ID's are anything between -1 (being the opening layer) and nlayers_global-1 (being the classification layer). The distribution for the actual startlayerIDs and endlayerIDs at each processor come from Xbraid. + * All layers are stored in the vector **layer, except for the opening layer, which is in *openlayer. + * Each network block contains (and allocates!) the *design and *gradient vector, which are the vectorized weights and biases at each layer (see createNetworkBlock). + */ class Network { protected: int nlayers_global; /* Total number of Layers of the network */ int nlayers_local; /* Number of Layers in this network block */ int nchannels; /* Width of the network */ - MyReal dt; /* Time step size */ + MyReal dt; /* Time step size (distance between two layers).*/ MyReal loss; /* Value of the loss function */ MyReal accuracy; /* Accuracy of the network prediction (percentage of successfully predicted classes) */ @@ -44,6 +51,10 @@ class Network { ~Network(); + /* + * This calls the layer's constructor for all layers in [StartlayerID, EndLayerID]. + * + * */ void createNetworkBlock(int StartLayerID, int EndLayerID, Config *config, MPI_Comm Comm); @@ -105,6 +116,8 @@ class Network { */ void setInitialDesign(Config *config); + /* Helper function for createNetworkBlock. It basically checks what kind of layer is required at this index and calls the corresponding layer constructor. + */ Layer *createLayer(int index, Config *config); /* Replace the layer with one that is received from the left neighbouring @@ -113,6 +126,8 @@ class Network { /** * Applies the classification and evaluates loss/accuracy + * This routine should only be called at the last processor, which contains the classification layer. + * Maybe this one should not be inside the Network class? Don't know. */ void evalClassification(DataSet *data, MyReal **state, int output); @@ -125,6 +140,7 @@ class Network { /** * Update the network design parameters: new_design = old_design + stepsize * * direction + * I guess this might rather be a routine for the optimizer... */ void updateDesign(MyReal stepsize, MyReal *direction, MPI_Comm comm); }; From 16f3fc50b095179788c01d5cfe0c0fe2d0dfebf4 Mon Sep 17 00:00:00 2001 From: Stefanie Guenther Date: Mon, 19 Aug 2019 22:21:16 -0700 Subject: [PATCH 7/8] more comments --- include/braid_wrapper.hpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/braid_wrapper.hpp b/include/braid_wrapper.hpp index 91bedb5..cf461d8 100644 --- a/include/braid_wrapper.hpp +++ b/include/braid_wrapper.hpp @@ -10,15 +10,14 @@ #pragma once /** - * Define the state vector at one time-step + * Define the network state one layer. It contains the transformed data batch in the vector **state, and a pointer to the actual layer. */ class myBraidVector { protected: int nbatch; /* Number of examples */ int nchannels; /* Number of channels */ - MyReal * - *state; /* Network state at one layer, dimensions: nbatch * nchannels */ + MyReal **state; /* Network state at one layer, dimensions: nbatch * nchannels */ Layer *layer; /* Pointer to layer information */ /* Flag that determines if the layer and state have just been received and @@ -51,7 +50,7 @@ class myBraidVector { }; /** - * Wrapper for the primal braid app. + * Wrapper for the primal braid app. Most important routines are the Step function, which applies the layer transformation (and hence steps forward to the next layer) and the EvaluateObjective function, which (surprise!) evaluates the loss function and adds the regularization terms to get the objective funcioon value. * virtual function are overwritten from the adjoint app class */ class myBraidApp : public BraidApp { From 6978cdd28adfbe3db208ed3f66f6f79235683354 Mon Sep 17 00:00:00 2001 From: Stefanie Guenther Date: Mon, 19 Aug 2019 22:22:01 -0700 Subject: [PATCH 8/8] more comments --- include/braid_wrapper.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/braid_wrapper.hpp b/include/braid_wrapper.hpp index cf461d8..5af32a3 100644 --- a/include/braid_wrapper.hpp +++ b/include/braid_wrapper.hpp @@ -50,8 +50,9 @@ class myBraidVector { }; /** - * Wrapper for the primal braid app. Most important routines are the Step function, which applies the layer transformation (and hence steps forward to the next layer) and the EvaluateObjective function, which (surprise!) evaluates the loss function and adds the regularization terms to get the objective funcioon value. - * virtual function are overwritten from the adjoint app class + * Wrapper for the primal braid app. Most important routines are the Step function, which applies the layer transformations (and hence steps forward to the next layer), the SetInitialCondition, which applies the opening layer and the EvaluateObjective function, which (surprise!) evaluates the loss function and adds the regularization terms to get the objective function value. + * + * The adjoint braid app inherits from this class, and overwrites those with the corresponding derivatives. */ class myBraidApp : public BraidApp { protected: