diff --git a/tools/TrainLib_Deployer/TrainLib_Deployer.py b/tools/TrainLib_Deployer/TrainLib_Deployer.py
index d05c4968..e16305db 100644
--- a/tools/TrainLib_Deployer/TrainLib_Deployer.py
+++ b/tools/TrainLib_Deployer/TrainLib_Deployer.py
@@ -1,5 +1,5 @@
 '''
-Copyright (C) 2021-2022 ETH Zurich and University of Bologna
+Copyright (C) 2021-2024 ETH Zurich and University of Bologna
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 '''
 
 '''
-Authors: Davide Nadalini
+Authors: Davide Nadalini, Cristian Cioflan, Axel Vanoni
 '''
 
 """
@@ -41,6 +41,15 @@
 'SGD'       -> Stochastic Gradient Descent
 """
 
+import argparse
+import onnx
+import os
+
+from onnx import shape_inference, numpy_helper
+
+import numpy as np
+
+
 import deployer_utils.DNN_Reader     as reader
 import deployer_utils.DNN_Composer   as composer
 
@@ -48,9 +57,21 @@
 # --- USER SETTINGS ---
 # ---------------------
 
+parser = argparse.ArgumentParser(
+                    prog='Deployer',
+                    description='Generating C code for on-device training')
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--project_name', type=str, default="Test_CNN", help='Project name')
+parser.add_argument('--project_path', type=str, default="./", help='Project path')
+parser.add_argument('--model_path', type=str, default=None, help='Pretrained model path')
+parser.add_argument('--start_at', type=str, default=None, help='At which node to start generating')
+args = parser.parse_args()
+
+
 # GENERAL PROPERTIES
-project_name    = 'Test_CNN'
-project_path    = './' 
+project_name    = args.project_name
+project_path    = args.project_path
 proj_folder     = project_path + project_name + '/'
 
 
@@ -87,6 +108,8 @@
 # Data type list for layer-by-layer deployment (mixed precision)
 data_type_list      = ['FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16']
 #data_type_list     = ['FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32']
+# Placeholder for pretrained parameters
+weight_list           = []
 # Data layout list (CHW or HWC) 
 data_layout_list    = ['CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW']   # TO DO
 # Bias
@@ -111,7 +134,7 @@
 PRINT_TRAIN_LOSS = True                 # Set to true if you want to print the train loss for each epoch
 # OTHER PROPERTIES
 # Select if to read the network from an external source
-READ_MODEL_ARCH = False                # NOT IMPLEMENTED!!
+READ_MODEL_ARCH = args.model_path is not None      
 
 # ---------------------------
 # --- END OF USER SETTING ---
@@ -123,9 +146,311 @@
 BACKEND
 """
 
+class ONNXGraphParser:
+    def __init__(self, onnx_model):
+        onnx.checker.check_model(onnx_model)
+        self.model = shape_inference.infer_shapes(onnx_model)
+        self.graph = self.model.graph
+        self.value_info_lookup = {v.name: i for i, v in enumerate(self.graph.value_info)}
+        self.node_lookup = {v.name: i for i, v in enumerate(self.graph.node)}
+        self.input_lookup = {v.name: i for i, v in enumerate(self.graph.input)}
+        self.output_lookup = {v.name: i for i, v in enumerate(self.graph.output)}
+        self.init_lookup = {v.name: i for i, v in enumerate(self.graph.initializer)}
+        self.get_precision() # make sure precision is fine
+
+    def _get_type(self, node_name):
+        if node_name in self.value_info_lookup:
+            return self.graph.value_info[self.value_info_lookup[node_name]].type
+        elif node_name in self.input_lookup:
+            return self.graph.input[self.input_lookup[node_name]].type
+        elif node_name in self.output_lookup:
+            return self.graph.output[self.output_lookup[node_name]].type
+        else:
+            raise KeyError(f"Node {node_name} not found")
+
+    def _get_node_attr(self, node_name, attr):
+        for a in self.graph.node[self.node_lookup[node_name]].attribute:
+            if a.name == attr:
+                return a
+        else:
+            raise ValueError(f"Node {node_name} has no {attr} attribute")
+
+    def is_pointwise(self, node_name):
+        hk, wk = self.get_kernel_size(node_name)
+        return hk == wk == 1
+
+    def is_depthwise(self, node_name):
+        node = self.graph.node[self.node_lookup[node_name]]
+        try:
+            groups = self._get_node_attr(node_name, "group").i
+        except ValueError:
+            return False
+        in_ch = self.get_channel_count(node.input[0])
+        out_ch = self.get_channel_count(node.output[0])
+        if groups <= 1:
+            return False
+        assert in_ch == out_ch == groups, "For depthwise convolutions, input and output channels must be the same as groups"
+        return True
+
+    def get_channel_count(self, node_name):
+        tensor_type = self._get_type(node_name)
+        # Data layout is B, C, H, W
+        return tensor_type.tensor_type.shape.dim[1].dim_value
+
+    def get_hw(self, node_name):
+        tensor_type = self._get_type(node_name)
+        shape = tensor_type.tensor_type.shape.dim
+        return shape[2].dim_value, shape[3].dim_value
+
+    def get_activation_size(self, node_name):
+        tensor_type = self._get_type(node_name)
+        dims = tensor_type.tensor_type.shape.dim
+        # Data layout is B, C, H, W
+        return dims[2].dim_value, dims[3].dim_value
+
+    def get_init(self, node_name):
+        index = self.init_lookup.get(node_name, -1)
+        if index == -1:
+            raise KeyError(f"Node {node_name} has no initializer")
+        init = self.graph.initializer[index]
+        return numpy_helper.to_array(init)
+
+    def get_kernel_size(self, node_name):
+        ksize = self._get_node_attr(node_name, "kernel_shape")
+        return ksize.ints[0], ksize.ints[1]
+
+    def get_stride(self, node_name):
+        stride = self._get_node_attr(node_name, "strides")
+        return stride.ints[0], stride.ints[1]
+
+    def get_pad(self, node_name):
+        pad = self._get_node_attr(node_name, "pads").ints
+        assert pad[0] == pad[2] and pad[1] == pad[3], "Only symmetric padding is supported."
+        return pad[0], pad[1]
+
+    def get_precision(self):
+        elem_type = self.graph.value_info[0].type.tensor_type.elem_type
+        if elem_type == onnx.TensorProto.FLOAT:
+            return "FP32"
+        elif elem_type == onnx.TensorProto.FLOAT16:
+            return "FP16"
+        elif elem_type == onnx.TensorProto.BFLOAT16:
+            raise NotImplementedError("Numpy does not support bfloat16 and converts it to FP32. We need to change how we save and load weights.")
+        else:
+            raise ValueError("Only FP32 and FP16 are supported")
+
+
 # Call the DNN Reader and then the DNN Composer 
 if READ_MODEL_ARCH :
-    pass
+
+
+    layer_list = []
+    in_ch_list = []
+    out_ch_list = []
+    hk_list = []
+    wk_list = []
+    hin_list = []
+    win_list = []
+    h_pad_list = []
+    w_pad_list = []
+    opt_mm_fw_list = []
+    opt_mm_wg_list = []
+    opt_mm_ig_list = []
+    data_type_list = []
+    data_layout_list = []
+    
+    if (args.model_path.split('.')[-1] == "onnx"):
+        onnx_model = onnx.load(args.model_path)
+        onnx.checker.check_model(onnx_model)
+        graph = ONNXGraphParser(onnx_model)
+        found_start = args.start_at is None
+
+        if args.start_at is not None:
+            node_names = [n.op_type for n in graph.graph.node if n.op_type != 'Constant']
+            assert args.start_at in node_names, f"{args.start_at} is not a valid layer name. Layer names are: {node_names}"
+            # CIOFLANC: temporary reconciling pseudo-sparse update implementations 
+            update_layer_list = [1] * (len(node_names) -  node_names.index(args.start_at))
+            # update_layer_list[node_names.index(args.start_at)] = 1
+
+        for onnx_node in graph.graph.node:
+
+            if not found_start:
+                if onnx_node.op_type != args.start_at:
+                    continue
+                else:
+                    found_start = True
+
+            if (onnx_node.op_type == 'Gemm') or (onnx_node.op_type == 'MatMul'):
+                in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
+                out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
+                layer_list.append('linear')
+                hk_list.append(1)
+                wk_list.append(1)
+                hin_list.append(1)
+                win_list.append(1)
+                h_str_list.append(1)
+                w_str_list.append(1)
+                h_pad_list.append(0)
+                w_pad_list.append(0)
+                opt_mm_fw_list.append(0)
+                opt_mm_wg_list.append(0)
+                opt_mm_ig_list.append(0)
+                # TODO: Read from file
+                data_type_list.append(graph.get_precision())
+                # TODO: Read from file
+                # Note that this also determines the read position for in_ch_list and out_ch_list
+                data_layout_list.append('CHW')
+                weight_init = graph.get_init(onnx_node.input[1])
+                # Gemm node does y = x*B, but torch uses y = A*x, so transpose B to get A back
+                # This also aligns with how trainlib does things
+                weight_init = weight_init.transpose(1,0)
+                try:
+                    bias_init = graph.get_init(onnx_node.input[2])
+                    raise NotImplementedError("Biases are not implemented in trainlib")
+                except (KeyError, IndexError):
+                    bias_init = []
+                weight_list.append((weight_init, bias_init))
+                sumnode_connections.append(0)
+            elif onnx_node.op_type == 'AveragePool':
+                in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
+                out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
+                layer_list.append('AvgPool')
+                (hk, wk) = graph.get_kernel_size(onnx_node.op_type)
+                hk_list.append(hk)
+                wk_list.append(wk)
+                (hin, win) = graph.get_activation_size(onnx_node.input[0])
+                hin_list.append(hin)
+                win_list.append(win)
+                (hstr, wstr) = graph.get_stride(onnx_node.op_type)
+                h_str_list.append(hstr)
+                w_str_list.append(wstr)
+                (hpad, wpad) = graph.get_pad(onnx_node.op_type)
+                h_pad_list.append(hpad)
+                w_pad_list.append(wpad)
+                opt_mm_fw_list.append(0)
+                opt_mm_wg_list.append(0)
+                opt_mm_ig_list.append(0)
+                data_type_list.append(graph.get_precision())
+                data_layout_list.append('CHW')
+                weight_list.append(([], [])) # kernels
+                sumnode_connections.append(0)
+            elif onnx_node.op_type == 'GlobalAveragePool':
+                hk, wk = graph.get_hw(onnx_node.input[0])
+                if hk == 1 and wk == 1:
+                    # There is nothing to average, skip this node
+                    continue
+                in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
+                out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
+                layer_list.append('AvgPool')
+                hk_list.append(hk)
+                wk_list.append(wk)
+                (hin, win) = graph.get_activation_size(onnx_node.input[0])
+                hin_list.append(hin)
+                win_list.append(win)
+                h_str_list.append(1)
+                w_str_list.append(1)
+                h_pad_list.append(0)
+                w_pad_list.append(0)
+                opt_mm_fw_list.append(0)
+                opt_mm_wg_list.append(0)
+                opt_mm_ig_list.append(0)
+                data_type_list.append(graph.get_precision())
+                data_layout_list.append('CHW')
+                weight_list.append(([], [])) # kernels
+                sumnode_connections.append(0)
+            elif onnx_node.op_type == 'Conv':
+                in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
+                out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
+                if graph.is_pointwise(onnx_node.op_type):
+                    ty = "PW"
+                elif graph.is_depthwise(onnx_node.op_type):
+                    ty = "DW"
+                else:
+                    ty = "conv2d"
+                layer_list.append(ty)
+                (hk, wk) = graph.get_kernel_size(onnx_node.op_type)
+                hk_list.append(hk)
+                wk_list.append(wk)
+                (hin, win) = graph.get_activation_size(onnx_node.input[0])
+                hin_list.append(hin)
+                win_list.append(win)
+                (hstr, wstr) = graph.get_stride(onnx_node.op_type)
+                h_str_list.append(hstr)
+                w_str_list.append(wstr)
+                (hpad, wpad) = graph.get_pad(onnx_node.op_type)
+                h_pad_list.append(hpad)
+                w_pad_list.append(wpad)
+                opt_mm_fw_list.append(0)
+                opt_mm_wg_list.append(0)
+                opt_mm_ig_list.append(0)
+                data_type_list.append(graph.get_precision())
+                # TODO: Read from file
+                # Note that this also determines the read position for in_ch_list and out_ch_list
+                data_layout_list.append('CHW')
+                weight_init = graph.get_init(onnx_node.input[1])
+                try:
+                    bias_init = graph.get_init(onnx_node.input[2])
+                    raise NotImplementedError("Biases are not implemented in trainlib")
+                except (KeyError, IndexError):
+                    # Ignore missing bias
+                    bias_init = []
+                    pass
+                weight_list.append((weight_init, bias_init)) # kernels
+                sumnode_connections.append(0)
+            elif onnx_node.op_type == 'Clip':
+                # This does not handle ReLU6, as it is not supported by trainlib
+                layer_list.append('ReLU')
+                in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
+                out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
+                hk_list.append(1)
+                wk_list.append(1)
+                hin_list.append(1)
+                win_list.append(1)
+                h_str_list.append(1)
+                w_str_list.append(1)
+                h_pad_list.append(0)
+                w_pad_list.append(0)
+                opt_mm_fw_list.append(0)
+                opt_mm_wg_list.append(0)
+                opt_mm_ig_list.append(0)
+                data_layout_list.append('CHW')
+                data_type_list.append(graph.get_precision())
+                weight_list.append(([], []))
+                sumnode_connections.append(0)
+    else:
+        raise NotImplementedError("Model format not supported.")
+
+    data_dir = proj_folder+'data/'
+    if not os.path.exists(data_dir):
+        os.makedirs(data_dir)
+    for i, (weight_init, bias_init) in enumerate(weight_list):
+        np.save(data_dir+f"l{i}w.npy", np.array(weight_list[i][0], dtype=("float32" if data_type_list[i] == "FP32" else "float16")))
+        np.save(data_dir+f"l{i}b.npy", np.array(weight_list[i][1], dtype=("float32" if data_type_list[i] == "FP32" else "float16")))
+
+    print("Generating project at location "+proj_folder)
+
+    # Check if Residual Connections are valid
+    sumnode_connections = composer.AdjustResConnList(sumnode_connections)
+
+    composer.CheckResConn(layer_list, in_ch_list, out_ch_list, hin_list, win_list, sumnode_connections, update_layer_list) 
+
+    # Check if the network training fits L1
+    memocc = composer.DNN_Size_Checker(layer_list, in_ch_list, out_ch_list, hk_list, wk_list, hin_list, win_list, 
+                                h_str_list, w_str_list, h_pad_list, w_pad_list,
+                                data_type_list, bias_list, update_layer_list, 
+                                L1_SIZE_BYTES, USE_DMA, CONV2D_USE_IM2COL)
+
+    print("DNN memory occupation: {} bytes of {} available L1 bytes ({}%).".format(memocc, L1_SIZE_BYTES, (memocc/L1_SIZE_BYTES)*100))
+
+    # Call DNN Composer on the user-provided graph
+    composer.DNN_Composer(proj_folder, project_name, 
+                            layer_list, in_ch_list, out_ch_list, hk_list, wk_list, 
+                            hin_list, win_list, h_str_list, w_str_list, h_pad_list, w_pad_list,
+                            epochs, batch_size, learning_rate, optimizer, loss_fn,
+                            NUM_CORES, data_type_list, weight_list, bias_list, update_layer_list, opt_mm_fw_list, opt_mm_wg_list, opt_mm_ig_list, sumnode_connections,
+                            USE_DMA, PROFILE_SINGLE_LAYERS, SEPARATE_BACKWARD_STEPS, CONV2D_USE_IM2COL, PRINT_TRAIN_LOSS)
+
+    print("PULP project generation successful!")
 
 
 else:
@@ -151,9 +476,9 @@
                             layer_list, in_ch_list, out_ch_list, hk_list, wk_list, 
                             hin_list, win_list, h_str_list, w_str_list, h_pad_list, w_pad_list,
                             epochs, batch_size, learning_rate, optimizer, loss_fn,
-                            NUM_CORES, data_type_list, bias_list, update_layer_list, opt_mm_fw_list, opt_mm_wg_list, opt_mm_ig_list,
+                            NUM_CORES, data_type_list, weight_list, bias_list, update_layer_list, opt_mm_fw_list, opt_mm_wg_list, opt_mm_ig_list,
                             sumnode_connections, USE_DMA, PROFILE_SINGLE_LAYERS, SEPARATE_BACKWARD_STEPS, CONV2D_USE_IM2COL, PRINT_TRAIN_LOSS)
 
     print("PULP project generation successful!")
 
-    pass
+    pass
\ No newline at end of file
diff --git a/tools/TrainLib_Deployer/deployer_utils/DNN_Composer.py b/tools/TrainLib_Deployer/deployer_utils/DNN_Composer.py
index 95b5376e..440e9a07 100644
--- a/tools/TrainLib_Deployer/deployer_utils/DNN_Composer.py
+++ b/tools/TrainLib_Deployer/deployer_utils/DNN_Composer.py
@@ -1,5 +1,5 @@
 '''
-Copyright (C) 2021-2022 ETH Zurich and University of Bologna
+Copyright (C) 2021-2024 ETH Zurich and University of Bologna
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 '''
 
 '''
-Authors: Davide Nadalini
+Authors: Davide Nadalini, Cristian Cioflan, Axel Vanoni
 '''
 
 import deployer_utils.deployment_utils_single_buffer as utilsSB
@@ -213,7 +213,7 @@ def DNN_Composer (proj_folder_path, project_name,
                   layers_l, in_ch_l, out_ch_l, hk_l, wk_l, hin_l, win_l,
                   h_str_l, w_str_l, h_pad_l, w_pad_l,
                   epochs, batch_size, learning_rate, optimizer, loss_fn,
-                  NUM_CORES, data_type_l, bias_l, update_layer_l, opt_mm_fw_list, opt_mm_wg_list, opt_mm_ig_list,
+                  NUM_CORES, data_type_l, weight_l, bias_l, update_layer_l, opt_mm_fw_list, opt_mm_wg_list, opt_mm_ig_list, 
                   sumnode_connections, USE_DMA, PROFILE_SINGLE_LAYERS, SEPARATE_BACKWARD_STEPS, CONV2D_USE_IM2COL, PRINT_TRAIN_LOSS):
 
     # Initialize project (copy the prefab files and create folder)
@@ -227,7 +227,7 @@ def DNN_Composer (proj_folder_path, project_name,
                         layers_l, in_ch_l, out_ch_l, hk_l, wk_l, hin_l, win_l,
                         h_str_l, w_str_l, h_pad_l, w_pad_l,
                         epochs, batch_size, learning_rate, optimizer, loss_fn,
-                        data_type_l, bias_l, update_layer_l, sumnode_connections, USE_DMA)
+                        data_type_l, weight_l, bias_l, update_layer_l, sumnode_connections, USE_DMA)
 
 
     global MAX_LAYER_DIM
diff --git a/tools/TrainLib_Deployer/deployer_utils/deployment_utils.py b/tools/TrainLib_Deployer/deployer_utils/deployment_utils.py
index 2e302484..5f861c7b 100644
--- a/tools/TrainLib_Deployer/deployer_utils/deployment_utils.py
+++ b/tools/TrainLib_Deployer/deployer_utils/deployment_utils.py
@@ -1,5 +1,5 @@
 '''
-Copyright (C) 2021-2022 ETH Zurich and University of Bologna
+Copyright (C) 2021-2024 ETH Zurich and University of Bologna
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 '''
 
 '''
-Authors: Davide Nadalini, Giacomo Saporetti
+Authors: Davide Nadalini, Giacomo Saporetti, Cristian Cioflan, Axel Vanoni
 '''
 
 import os
@@ -250,8 +250,8 @@ def InitProject(proj_folder_path):
     utils_folder = proj_folder + 'utils/'
     trainlib_dest_folder = proj_folder + 'lib/' 
     
-    os.mkdir(proj_folder)
-    os.mkdir(utils_folder)
+    os.makedirs(proj_folder, exist_ok = True)
+    os.makedirs(utils_folder, exist_ok = True)
 
     shutil.copy2('./deployer_utils/srcfiles/main.c', proj_folder)
     shutil.copy2('./deployer_utils/srcfiles/stats.h', proj_folder)
@@ -289,7 +289,7 @@ def GenerateMakefile(proj_folder_path, project_name, layers_l, NUM_CORES, data_t
         f.write('MATMUL_TYPE_IG_L'+str(layer)+'?='+str(opt_mm_ig_list[layer])+'         # Selects which optimized matmul to be used in IN GRAD (see mm_manager_list.txt or "MM_manager()" body to verify which one is called)' + '\n')
     f.write('# End of user settings\n\n')
 
-    f.write('NUM_MATMULS?=24		# Available standard matmuls in the library' + '\n')
+    f.write('NUM_MATMULS?=24        # Available standard matmuls in the library' + '\n')
     f.write('TRAIN_LIB=./lib\n')
     f.write('TRAIN_LIB_SRCS=$(TRAIN_LIB)/sources\n')
     f.write('APP_SRCS = main.c net.c\n\n')
@@ -370,7 +370,7 @@ def GenerateGM(proj_folder_path, project_name,
                 layers_l, in_ch_l, out_ch_l, hk_l, wk_l, hin_l, win_l,
                 h_str_l, w_str_l, h_pad_l, w_pad_l,
                 epochs, batch_size, learning_rate, optimizer, loss_fn,
-                data_type_l, bias_l, update_layer_l, sumnode_connections, USE_DMA):
+                data_type_l, weight_l, bias_l, update_layer_l, sumnode_connections, USE_DMA):
 
     # Check if GPU is available, else keep fake FP16
     cuda_is_on = torch.cuda.is_available()
@@ -390,6 +390,7 @@ def GenerateGM(proj_folder_path, project_name,
     f.write("import torch.optim as optim\n")
     f.write("import dump_utils as dump\n")
     f.write("import math\n")
+    f.write("import numpy\n")
     f.write("\n")
 
     f.write("# Set device\n")
@@ -574,7 +575,18 @@ def GenerateGM(proj_folder_path, project_name,
     f.write("\n# Initialize network\n")
     f.write("net = DNN().to(device)\n")
     f.write("for p in net.parameters():\n")
-    f.write("\tnn.init.normal_(p, mean=0.0, std=0.01)\n")
+    f.write("\tnn.init.normal_(p, mean=0.0, std=1.0)\n")
+    if (weight_l):
+        f.write("from pathlib import Path\n")
+        f.write("basedir = Path(__file__).resolve().parent.parent\n")
+        for layer in range(len(layers_l)):
+            if data_type_l[layer] == 'FP16':
+                to = ".to(device).half()"
+            else:
+                to = ".to(device)"
+            f.write(f"net.l{layer}.weight = torch.nn.Parameter(torch.from_numpy(numpy.load(basedir / 'data/l{layer}w.npy')){to}, requires_grad=True)\n")
+            # TODO: uncomment once biases are implemented in trainlib
+            # f.write(f"net.l{layer}.bias = torch.nn.Parameter(torch.from_numpy(numpy.load(basedir / 'data/l{layer}b.npy')){to}, requires_grad=True)\n")
     f.write("net.zero_grad()\n\n")
 
     # Freeze layers excluded from sparse update
@@ -1805,5 +1817,4 @@ def GenerateNet(proj_folder_path, project_name,
 
     f.close()
 
-    return
-
+    return
\ No newline at end of file
diff --git a/tools/TrainLib_Deployer/deployer_utils/deployment_utils_double_buffer.py b/tools/TrainLib_Deployer/deployer_utils/deployment_utils_double_buffer.py
index 11c7963f..d8725182 100644
--- a/tools/TrainLib_Deployer/deployer_utils/deployment_utils_double_buffer.py
+++ b/tools/TrainLib_Deployer/deployer_utils/deployment_utils_double_buffer.py
@@ -81,7 +81,7 @@ def GenerateNet(proj_folder_path, project_name,
                 layers_l, in_ch_l, out_ch_l, hk_l, wk_l, hin_l, win_l,
                 h_str_l, w_str_l, h_pad_l, w_pad_l,
                 epochs, batch_size, learning_rate, optimizer, loss_fn,
-                data_type_l, bias_l, update_layer_l, sumnode_connections, MAX_LAYER_DIM,
+                data_type_l, weight_l, bias_l, update_layer_l, sumnode_connections, MAX_LAYER_DIM,
                 PROFILE_SINGLE_LAYERS, SEPARATE_BACKWARD_STEPS):
 
 
@@ -159,7 +159,8 @@ def GenerateNet(proj_folder_path, project_name,
     f.write("\n// Define structures and pointers to data in L1 memory\n")
     if data_type == 'FP32':
         #f.write("PI_L1 float * D0, * d0, * W0, * w0, * D1, * d1, * W1, *w1;\n")
-        f.write("PI_L1 float BUFF[MAX_SIZE];\n")
+        # f.write("PI_L1 float BUFF[MAX_SIZE];\n")
+        f.write("PI_L1 float * BUFF;\n")
         f.write("PI_L1 struct blob d1_blob;\n")
         f.write("PI_L1 struct blob w1_blob;\n")
         f.write("PI_L1 struct blob b1_blob;\n")
@@ -182,7 +183,8 @@ def GenerateNet(proj_folder_path, project_name,
         #f.write("PI_L1 float * t;\n")
     elif data_type == 'FP16':
         f.write("PI_L1 fp16 * D1, * d1, * W1, * w1, * D0, * d0, * W0, *w0;\n")
-        f.write("PI_L1 fp16 BUFF[MAX_SIZE];\n")
+        # f.write("PI_L1 fp16 BUFF[MAX_SIZE];\n")
+        f.write("PI_L1 fp16 * BUFF;\n")
         f.write("PI_L1 struct blob_fp16 d1_blob;\n")
         f.write("PI_L1 struct blob_fp16 w1_blob;\n")
         f.write("PI_L1 struct blob_fp16 d0_blob;\n")
@@ -398,7 +400,7 @@ def GenerateNet(proj_folder_path, project_name,
     im2col_byte_length = 0
     im2col_max_data_type = 'FP32'
     for layer in range(len(layers_l)):
-        if layers_l[layer] == 'conv2d' and CONV2D_USE_IM2COL == True: # or layers_l[layer] == 'DW':
+        if layers_l[layer] == 'conv2d': # or layers_l[layer] == 'DW':
             if data_type_l[layer] == 'FP32':
                 im2col_byte_length = 4
             elif data_type_l[layer] == 'FP16':
@@ -436,6 +438,7 @@ def GenerateNet(proj_folder_path, project_name,
             else:
                 print("[deployment_utils.GenerateNet] Invalid data type for im2col!!")
                 exit()
+
     # No im2col buffer
     allocate_no_im2col = False
     for layer in range(len(layers_l)):
@@ -453,10 +456,10 @@ def GenerateNet(proj_folder_path, project_name,
     for layer in range(len(layers_l)):
         # Check layer data layout
         data_layout = 'CHW'     # Change to input list of data layouts
-        if ((layers_l[layer] == 'conv2d' and CONV2D_USE_IM2COL == True) or layers_l[layer] == 'PW') and layer == 0:
+        if (layers_l[layer] == 'conv2d' or layers_l[layer] == 'PW') and layer == 0:
             bt_flag = True
             bt_layer_index = 0
-        elif ((layers_l[layer] == 'conv2d' and CONV2D_USE_IM2COL == True) or layers_l[layer] == 'PW') and layer > 0:
+        elif (layers_l[layer] == 'conv2d' or layers_l[layer] == 'PW') and layer > 0:
             bt_flag = True
             bt_mem = in_ch_l[layer] * hk_l[layer] * wk_l[layer] * out_ch_l[layer]
             if bt_mem > bt_max_memocc:
@@ -497,13 +500,7 @@ def GenerateNet(proj_folder_path, project_name,
         else:
             print("[deployment_utils.GenerateNet] Invalid data type for pw transp buffer definition!\n")
             exit()
-    # No blocktranspose buffer
-    if (bt_flag == False):
-        print("No blockstranspose buffer detected\n")
-        f.write("PI_L1 float bt_buffer[1];\n")
 
-    # Define label buffer
-    f.write("PI_L1 float label_temp[Tout_C_l"+str(len(layers_l)-1)+"*Tout_H_l"+str(len(layers_l)-1)+"*Tout_W_l"+str(len(layers_l)-1)+"];\n")
 
     # Define tensors to backpropagate the output error
     f.write("\n// Define error propagation tensors\n")
@@ -594,6 +591,10 @@ def GenerateNet(proj_folder_path, project_name,
     f.write("\n// DNN initialization function\n")
     f.write("void DNN_init()\n{\n")
     f.write("\n// Assign pointers in L1\n")
+    if (data_type == "FP32"):
+        f.write("  BUFF = (float *) pi_l1_malloc(NULL, MAX_SIZE*sizeof(float));\n")
+    elif (data_type == "FP16"):
+        f.write("  BUFF = (fp16 *) pi_l1_malloc(NULL, MAX_SIZE*sizeof(float));\n")
     f.write("  d0_blob.data = BUFF;\n")
     f.write("  d0_blob.diff = BUFF;\n")
     f.write("  w0_blob.data = BUFF;\n")
@@ -872,16 +873,16 @@ def GenerateNet(proj_folder_path, project_name,
             skip_inputgrad = 0
         # Write configuration templates
         if layers_l[layer] == 'linear':
-            f.write(ntemp.linear_config_template(layer, skip_inputgrad, data_type_l[layer], 1))
+            f.write(ntemp.linear_config_template(layer, skip_inputgrad, data_type_l[layer]))
         elif layers_l[layer] == 'conv2d':
             IM2COL_USEIT = 1
             if CONV2D_USE_IM2COL == False:
                 IM2COL_USEIT = 0
             f.write(ntemp.conv2d_config_template(layer, h_pad_l[layer], w_pad_l[layer], h_str_l[layer], w_str_l[layer], skip_inputgrad, data_type_l[layer], bias_l[layer], IM2COL_USEIT, 1))
         elif layers_l[layer] == 'PW':
-            f.write(ntemp.PW_config_template(layer, skip_inputgrad, data_type_l[layer], 1))
+            f.write(ntemp.PW_config_template(layer, skip_inputgrad, data_type_l[layer]))
         elif layers_l[layer] == 'DW':
-            f.write(ntemp.DW_config_template(layer, h_pad_l[layer], w_pad_l[layer], h_str_l[layer], w_str_l[layer], skip_inputgrad, data_type_l[layer], 1))
+            f.write(ntemp.DW_config_template(layer, h_pad_l[layer], w_pad_l[layer], h_str_l[layer], w_str_l[layer], skip_inputgrad, data_type_l[layer]))
         elif layers_l[layer] == 'ReLU':
             f.write(ntemp.ReLU_config_template(layer, data_type_l[layer]))
         elif layers_l[layer] == 'MaxPool':
@@ -1312,25 +1313,7 @@ def GenerateNet(proj_folder_path, project_name,
         elif data_type_l[-1] == 'FP16':
             f.write("  pulp_MSELoss_fp16(&loss_args);\n")
         else:
-            print("[deployment_utils.GenerateNet]: Invalid loss type!")
-            exit()
-        f.write(f"\tstore((uint32_t) out.diff, (uint32_t) layer{len(layers_l)-1}_out.diff, {float_size}*OUT_SIZE);\n")
-    elif loss_fn == "CrossEntropyLoss":
-        float_size = 2
-        if data_type_l[0] == 'FP32':
-            float_size = 4
-        f.write("\tloss_args.output = &out;\n")
-        f.write("\tloss_args.target = out.diff;\n")
-        f.write("\tloss_args.wr_loss = &loss;\n")
-        f.write(f"\tload((uint32_t) LABEL, (uint32_t) out.diff, {float_size}*OUT_SIZE);\n")
-        f.write("\tpi_cl_dma_cmd_wait(cmd_load);\n")
-
-        if data_type_l[-1] == 'FP32':
-            f.write("  pulp_CrossEntropyLoss(&loss_args);\n")
-        elif data_type_l[-1] == 'FP16':
-            f.write("  pulp_CrossEntropyLoss_fp16(&loss_args);\n")
-        else:
-            print("[deployment_utils.GenerateNet]: Invalid loss type!")
+            print("[deplyment_utils.GenerateNet]: Invalid loss type!")
             exit()
         f.write(f"\tstore((uint32_t) out.diff, (uint32_t) layer{len(layers_l)-1}_out.diff, {float_size}*OUT_SIZE);\n")
     else:
@@ -1520,11 +1503,6 @@ def GenerateNet(proj_folder_path, project_name,
     f.write("  for (int epoch=0; epoch<EPOCHS; epoch++)\n  {\n")
     f.write("    forward();\n")
     f.write("    compute_loss();\n")
-    if PRINT_TRAIN_LOSS == True:
-        f.write("    /* Stop profiling */ pi_perf_stop();\n")
-        f.write("    if (epoch == 0) printf(\"\\n\");\n")
-        f.write("    printf(\">>> EPOCH %d: train_loss = %f (GM: %f)\\n\", epoch, loss, TRAIN_LOSS[epoch]);\n")
-        f.write("    /* Continue profiling */ pi_perf_start();\n")
     f.write("    backward();\n")
     f.write("    update_weights();\n")
     f.write("  }\n\n")
@@ -1541,6 +1519,9 @@ def GenerateNet(proj_folder_path, project_name,
     f.write("  check_post_training_output();\n")
     f.write("  print_output();\n")
 
+    f.write("  // Free l1 buffer\n")
+    f.write("  pi_l1_free(NULL, BUFF, MAX_SIZE*sizeof(float));\n")
+
     f.write("}\n")
 
     data_size = 0
diff --git a/tools/TrainLib_Deployer/deployer_utils/deployment_utils_single_buffer.py b/tools/TrainLib_Deployer/deployer_utils/deployment_utils_single_buffer.py
index faf7677f..ce4252de 100644
--- a/tools/TrainLib_Deployer/deployer_utils/deployment_utils_single_buffer.py
+++ b/tools/TrainLib_Deployer/deployer_utils/deployment_utils_single_buffer.py
@@ -1798,5 +1798,4 @@ def GenerateNet(proj_folder_path, project_name,
 
 
 
-    return
-
+    return
\ No newline at end of file