Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ An optimization history file 'optim.dat' will be flushed to the examples subfold

## Contributors

* Stefanie Guenther <stefanie.guenther@scicomp.uni-kl.de>
* Stefanie Guenther <guenther5@llnl.gov>
* Eric C. Cyr <eccyr@sandia.gov>
* J.B. Schroder <jbschroder@unm.edu>
* Roland A. Siegbert <roland.siegbert@rwth-aachen.de>
16 changes: 8 additions & 8 deletions examples/peaks/peaks.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
################################

# relative data folder location
datafolder = examples/peaks
datafolder = ./
# filename of training data feature vectors
ftrain_ex = features_training.dat
# filename of training data labels/classes
Expand Down Expand Up @@ -47,7 +47,7 @@ type_openlayer = activate
# factor for scaling initial opening layer weights and bias
weights_open_init = 1e-3
# factor for scaling initial weights and bias of intermediate layers
weights_init = 0e-3
weights_init = 1e-3
# factor for scaling initial classification weights and bias
weights_class_init = 1e-3

Expand All @@ -66,7 +66,7 @@ braid_maxlevels = 10
# minimum allowed coarse time time grid size (values in 10-30 are usually best)
braid_mincoarse = 10
# maximum number of iterations
braid_maxiter = 15
braid_maxiter = 2
# absolute tolerance
braid_abstol = 1e-15
# absolute adjoint tolerance
Expand Down Expand Up @@ -100,7 +100,7 @@ nbatch = 5000
# relaxation param for tikhonov term
gamma_tik = 1e-7
# relaxation param for time-derivative term
gamma_ddt = 1e-7
gamma_ddt = 1e-5
# relaxation param for tikhonov term of classification weights
gamma_class = 1e-7
# stepsize selection type ("fixed" or "backtrackingLS" or "oneoverk")
Expand All @@ -112,19 +112,19 @@ stepsize_type = backtrackingLS
# initial stepsize
stepsize = 1.0
# maximum number of optimization iterations
optim_maxiter = 10
optim_maxiter = 130
# absolute stopping criterion for the gradient norm
gtol = 1e-4
# maximum number of linesearch iterations
ls_maxiter = 20
ls_maxiter = 15
# factor for modifying the stepsize within a linesearch iteration
ls_factor = 0.5
# Hessian Approximation ("BFGS", "L-BFGS" or "Identity")
hessian_approx = L-BFGS
# number of stages for l-bfgs method
lbfgs_stages = 20
lbfgs_stages = 10
# level for validation computation:
# -1 = never validate
# 0 = validate only after optimization finishes.
# 1 = validate in each optimization iteration
validationlevel = 0
validationlevel = 1
10 changes: 5 additions & 5 deletions include/braid_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@
#pragma once

/**
* Define the state vector at one time-step
* Define the network state one layer. It contains the transformed data batch in the vector **state, and a pointer to the actual layer.
*/
class myBraidVector {
protected:
int nbatch; /* Number of examples */
int nchannels; /* Number of channels */

MyReal *
*state; /* Network state at one layer, dimensions: nbatch * nchannels */
MyReal **state; /* Network state at one layer, dimensions: nbatch * nchannels */
Layer *layer; /* Pointer to layer information */

/* Flag that determines if the layer and state have just been received and
Expand Down Expand Up @@ -51,8 +50,9 @@ class myBraidVector {
};

/**
* Wrapper for the primal braid app.
* virtual function are overwritten from the adjoint app class
* Wrapper for the primal braid app. Most important routines are the Step function, which applies the layer transformations (and hence steps forward to the next layer), the SetInitialCondition, which applies the opening layer and the EvaluateObjective function, which (surprise!) evaluates the loss function and adds the regularization terms to get the objective function value.
*
* The adjoint braid app inherits from this class, and overwrites those with the corresponding derivatives.
*/
class myBraidApp : public BraidApp {
protected:
Expand Down
7 changes: 4 additions & 3 deletions include/layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class Layer {
/* Set time step size */
void setDt(MyReal DT);

/* Set design and gradient memory location */
/* Set design and gradient memory location.
* The design vector is allocated within the Network block. For each layer in the block, the local memory location within the network's design vector is passed here to the layer, and stored as *weights and *bias (and their derivatives weights_bar and bias_bar). */
void setMemory(MyReal *design_memloc, MyReal *gradient_memloc);

/* Some Get..() functions */
Expand Down Expand Up @@ -90,13 +91,13 @@ class Layer {
int getnConv();
int getCSize();

/* Get the layer index (i.e. the time step) */
/* Get the layers ID (i.e. the time step number) */
int getIndex();

/* Prints to screen */
void print_data(MyReal *data_Out);

/* Activation function and derivative */
/* Applies the activation function and derivative */
MyReal activation(MyReal x);
MyReal dactivation(MyReal x);

Expand Down
18 changes: 17 additions & 1 deletion include/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,20 @@
#include "util.hpp"
#pragma once

/*
* The Network class logically connects the layers.
* Each processor instantiates one object of this class containing
* a sub-block of layers from [startlayerID, endlayerID], where those ID's are anything between -1 (being the opening layer) and nlayers_global-1 (being the classification layer). The distribution for the actual startlayerIDs and endlayerIDs at each processor come from Xbraid.
* All layers are stored in the vector **layer, except for the opening layer, which is in *openlayer.
* Each network block contains (and allocates!) the *design and *gradient vector, which are the vectorized weights and biases at each layer (see createNetworkBlock).
*/
class Network {
protected:
int nlayers_global; /* Total number of Layers of the network */
int nlayers_local; /* Number of Layers in this network block */

int nchannels; /* Width of the network */
MyReal dt; /* Time step size */
MyReal dt; /* Time step size (distance between two layers).*/
MyReal loss; /* Value of the loss function */
MyReal accuracy; /* Accuracy of the network prediction (percentage of
successfully predicted classes) */
Expand Down Expand Up @@ -44,6 +51,10 @@ class Network {

~Network();

/*
* This calls the layer's constructor for all layers in [StartlayerID, EndLayerID].
*
* */
void createNetworkBlock(int StartLayerID, int EndLayerID, Config *config,
MPI_Comm Comm);

Expand Down Expand Up @@ -105,6 +116,8 @@ class Network {
*/
void setInitialDesign(Config *config);

/* Helper function for createNetworkBlock. It basically checks what kind of layer is required at this index and calls the corresponding layer constructor.
*/
Layer *createLayer(int index, Config *config);

/* Replace the layer with one that is received from the left neighbouring
Expand All @@ -113,6 +126,8 @@ class Network {

/**
* Applies the classification and evaluates loss/accuracy
* This routine should only be called at the last processor, which contains the classification layer.
* Maybe this one should not be inside the Network class? Don't know.
*/
void evalClassification(DataSet *data, MyReal **state, int output);

Expand All @@ -125,6 +140,7 @@ class Network {
/**
* Update the network design parameters: new_design = old_design + stepsize *
* direction
* I guess this might rather be a routine for the optimizer...
*/
void updateDesign(MyReal stepsize, MyReal *direction, MPI_Comm comm);
};
11 changes: 4 additions & 7 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,14 @@ int main(int argc, char *argv[]) {
MyReal ls_objective, test_obj;
int ls_iter;

/* --- other --- */
// TODO: What is this? Why do you need it?
int myid;
int size;
/* --- Time measurements --- */
struct rusage r_usage;
MyReal StartTime, StopTime, myMB, globalMB;
MyReal UsedTime = 0.0;

/* Initialize MPI */
int myid;
int size;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &size);
Expand Down Expand Up @@ -146,7 +145,7 @@ int main(int argc, char *argv[]) {
ndesign_global);

/* Initialize Hessian approximation */
HessianApprox *hessian = 0;
HessianApprox *hessian = NULL;
switch (config->hessianapprox_type) {
case BFGS_SERIAL:
hessian = new BFGS(MPI_COMM_WORLD, ndesign_local);
Expand All @@ -162,8 +161,6 @@ int main(int argc, char *argv[]) {
return 0;
}

/* Allocate ascent direction for design updates */

/* Initialize optimization parameters */
ascentdir = new MyReal[ndesign_local];
stepsize = config->getStepsize(0);
Expand Down