Adding comments. #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

steffi7574 wants to merge 14 commits into rscircus:develop from steffi7574:commentsfortheDoctoR

README.md

-Original file line number
+Diff line change
@@ Expand Up @@
     ## Contributors
-    * Stefanie Guenther <stefanie.guenther@scicomp.uni-kl.de>
+    * Stefanie Guenther <guenther5@llnl.gov>
     * Eric C. Cyr <eccyr@sandia.gov>
     * J.B. Schroder <jbschroder@unm.edu>
     * Roland A. Siegbert <roland.siegbert@rwth-aachen.de>

examples/peaks/peaks.cfg

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -3,7 +3,7 @@
  
    ################################

    # relative data folder location 

    datafolder = examples/peaks

    datafolder = ./

    # filename of training data feature vectors

    ftrain_ex = features_training.dat

    # filename of training data labels/classes

    @@ -47,7 +47,7 @@ type_openlayer = activate
  
    # factor for scaling initial opening layer weights and bias

    weights_open_init = 1e-3

    # factor for scaling initial weights and bias of intermediate layers

    weights_init = 0e-3

    weights_init = 1e-3

    # factor for scaling initial classification weights and bias 

    weights_class_init = 1e-3

    @@ -66,7 +66,7 @@ braid_maxlevels = 10
  
    # minimum allowed coarse time time grid size (values in 10-30 are usually best)

    braid_mincoarse = 10

    # maximum number of iterations

    braid_maxiter = 15

    braid_maxiter = 2

    # absolute tolerance

    braid_abstol = 1e-15

    # absolute adjoint tolerance

    @@ -100,7 +100,7 @@ nbatch = 5000
  
    # relaxation param for tikhonov term

    gamma_tik = 1e-7

    # relaxation param for time-derivative term

    gamma_ddt = 1e-7

    gamma_ddt = 1e-5

    # relaxation param for tikhonov term of classification weights 

    gamma_class = 1e-7

    # stepsize selection type ("fixed" or "backtrackingLS" or "oneoverk")

    @@ -112,19 +112,19 @@ stepsize_type = backtrackingLS
  
    # initial stepsize

    stepsize = 1.0

    # maximum number of optimization iterations

    optim_maxiter = 10

    optim_maxiter = 130

    # absolute stopping criterion for the gradient norm

    gtol = 1e-4

    # maximum number of linesearch iterations

    ls_maxiter = 20

    ls_maxiter = 15

    # factor for modifying the stepsize within a linesearch iteration

    ls_factor = 0.5

    # Hessian Approximation ("BFGS", "L-BFGS" or "Identity")

    hessian_approx = L-BFGS

    # number of stages for l-bfgs method 

    lbfgs_stages = 20

    lbfgs_stages = 10

    # level for validation computation: 

    #  -1 = never validate

    #   0 = validate only after optimization finishes. 

    #   1 = validate in each optimization iteration

    validationlevel = 0

    validationlevel = 1

include/braid_wrapper.hpp

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -10,15 +10,14 @@
  
    #pragma once

    /**

     * Define the state vector at one time-step

     * Define the network state one layer. It contains the transformed data batch in the vector **state, and a pointer to the actual layer. 

     */

    class myBraidVector {

     protected:

      int nbatch;    /* Number of examples */

      int nchannels; /* Number of channels */

      MyReal *

          *state;   /* Network state at one layer, dimensions: nbatch * nchannels */

      MyReal **state;   /* Network state at one layer, dimensions: nbatch * nchannels */

      Layer *layer; /* Pointer to layer information */

      /* Flag that determines if the layer and state have just been received and

    @@ -51,8 +50,9 @@ class myBraidVector {
  
    };

    /**

     * Wrapper for the primal braid app.

     * virtual function are overwritten from the adjoint app class

     * Wrapper for the primal braid app. Most important routines are the Step function, which applies the layer transformations (and hence steps forward to the next layer), the SetInitialCondition, which applies the opening layer and the EvaluateObjective function, which (surprise!) evaluates the loss function and adds the regularization terms to get the objective function value.

     * 

     * The adjoint braid app inherits from this class, and overwrites those with the corresponding derivatives. 

     */

    class myBraidApp : public BraidApp {

     protected:

include/layer.hpp

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -62,7 +62,8 @@ class Layer {
  
      /* Set time step size */

      void setDt(MyReal DT);

      /* Set design and gradient memory location */

      /* Set design and gradient memory location.

       * The design vector is allocated within the Network block. For each layer in the block, the local memory location within the network's design vector is passed here to the layer, and stored as *weights and *bias (and their derivatives weights_bar and bias_bar). */

      void setMemory(MyReal *design_memloc, MyReal *gradient_memloc);

      /* Some Get..() functions */

    @@ -90,13 +91,13 @@ class Layer {
  
      int getnConv();

      int getCSize();

      /* Get the layer index (i.e. the time step) */

      /* Get the layers ID (i.e. the time step number) */

      int getIndex();

      /* Prints to screen */

      void print_data(MyReal *data_Out);

      /* Activation function and derivative */

      /* Applies the activation function and derivative */

      MyReal activation(MyReal x);

      MyReal dactivation(MyReal x);

include/network.hpp

-Original file line number
+Diff line change
@@ Expand Up / @@ -9,13 +9,20 @@ @@
     #include "util.hpp"
     #pragma once
+    /*
+     * The Network class logically connects the layers.
+     * Each processor instantiates one object of this class containing
+     * a sub-block of layers from [startlayerID, endlayerID], where those ID's are anything between -1 (being the opening layer) and nlayers_global-1 (being the classification layer). The distribution for the actual startlayerIDs and endlayerIDs at each processor come from Xbraid.
+     * All layers are stored in the vector **layer, except for the opening layer, which is in *openlayer.
+     * Each network block contains (and allocates!) the *design and *gradient vector, which are the vectorized weights and biases at each layer (see createNetworkBlock).
+     */
     class Network {
      protected:
       int nlayers_global; /* Total number of Layers of the network */
       int nlayers_local;  /* Number of Layers in this network block */
       int nchannels;   /* Width of the network */
-      MyReal dt;       /* Time step size */
+      MyReal dt;       /* Time step size (distance between two layers).*/
       MyReal loss;     /* Value of the loss function */
       MyReal accuracy; /* Accuracy of the network prediction (percentage of
                           successfully predicted classes) */
@@ Expand Down Expand Up / @@ -44,6 +51,10 @@ class Network { @@
       ~Network();
+      /*
+       * This calls the layer's constructor for all layers in [StartlayerID, EndLayerID].
+       *
+       * */
       void createNetworkBlock(int StartLayerID, int EndLayerID, Config *config,
                               MPI_Comm Comm);
@@ Expand Down Expand Up / @@ -105,6 +116,8 @@ class Network { @@
        */
       void setInitialDesign(Config *config);
+      /* Helper function for createNetworkBlock. It basically checks what kind of layer is required at this index and calls the corresponding layer constructor.
+      */
       Layer *createLayer(int index, Config *config);
       /* Replace the layer with one that is received from the left neighbouring
@@ Expand All / @@ -113,6 +126,8 @@ class Network { @@
       /**
        * Applies the classification and evaluates loss/accuracy
+       * This routine should only be called at the last processor, which contains the classification layer.
+       * Maybe this one should not be inside the Network class? Don't know.
        */
       void evalClassification(DataSet *data, MyReal **state, int output);
@@ Expand All / @@ -125,6 +140,7 @@ class Network { @@
       /**
        * Update the network design parameters: new_design = old_design + stepsize *
        * direction
+       * I guess this might rather be a routine for the optimizer...
        */
       void updateDesign(MyReal stepsize, MyReal *direction, MPI_Comm comm);
     };

src/main.cpp

-Original file line number
+Diff line change
@@ Expand Up / @@ -77,15 +77,14 @@ int main(int argc, char *argv[]) { @@
       MyReal ls_objective, test_obj;
       int ls_iter;
-      /* --- other --- */
-      // TODO: What is this? Why do you need it?
-      int myid;
-      int size;
+      /* --- Time measurements --- */
       struct rusage r_usage;
       MyReal StartTime, StopTime, myMB, globalMB;
       MyReal UsedTime = 0.0;
       /* Initialize MPI */
+      int myid;
+      int size;
       MPI_Init(&argc, &argv);
       MPI_Comm_rank(MPI_COMM_WORLD, &myid);
       MPI_Comm_size(MPI_COMM_WORLD, &size);
@@ Expand Down Expand Up / @@ -146,7 +145,7 @@ int main(int argc, char *argv[]) { @@
              ndesign_global);
       /* Initialize Hessian approximation */
-      HessianApprox *hessian = 0;
+      HessianApprox *hessian = NULL;
       switch (config->hessianapprox_type) {
         case BFGS_SERIAL:
           hessian = new BFGS(MPI_COMM_WORLD, ndesign_local);
@@ Expand All / @@ -162,8 +161,6 @@ int main(int argc, char *argv[]) { @@
           return 0;
       }
-      /* Allocate ascent direction for design updates */
       /* Initialize optimization parameters */
       ascentdir = new MyReal[ndesign_local];
       stepsize = config->getStepsize(0);
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Adding comments. #12

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Adding comments. #12

Are you sure you want to change the base?

Uh oh!

Adding comments. #12

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing