Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
341 changes: 333 additions & 8 deletions tools/TrainLib_Deployer/TrainLib_Deployer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
'''
Copyright (C) 2021-2022 ETH Zurich and University of Bologna
Copyright (C) 2021-2024 ETH Zurich and University of Bologna

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,7 @@
'''

'''
Authors: Davide Nadalini
Authors: Davide Nadalini, Cristian Cioflan, Axel Vanoni
'''

"""
Expand All @@ -41,16 +41,37 @@
'SGD' -> Stochastic Gradient Descent
"""

import argparse
import onnx
import os

from onnx import shape_inference, numpy_helper

import numpy as np


import deployer_utils.DNN_Reader as reader
import deployer_utils.DNN_Composer as composer

# ---------------------
# --- USER SETTINGS ---
# ---------------------

parser = argparse.ArgumentParser(
prog='Deployer',
description='Generating C code for on-device training')

parser = argparse.ArgumentParser()
parser.add_argument('--project_name', type=str, default="Test_CNN", help='Project name')
parser.add_argument('--project_path', type=str, default="./", help='Project path')
parser.add_argument('--model_path', type=str, default=None, help='Pretrained model path')
parser.add_argument('--start_at', type=str, default=None, help='At which node to start generating')
args = parser.parse_args()


# GENERAL PROPERTIES
project_name = 'Test_CNN'
project_path = './'
project_name = args.project_name
project_path = args.project_path
proj_folder = project_path + project_name + '/'


Expand Down Expand Up @@ -87,6 +108,8 @@
# Data type list for layer-by-layer deployment (mixed precision)
data_type_list = ['FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16', 'FP16']
#data_type_list = ['FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32', 'FP32']
# Placeholder for pretrained parameters
weight_list = []
# Data layout list (CHW or HWC)
data_layout_list = ['CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW', 'CHW'] # TO DO
# Bias
Expand All @@ -111,7 +134,7 @@
PRINT_TRAIN_LOSS = True # Set to true if you want to print the train loss for each epoch
# OTHER PROPERTIES
# Select if to read the network from an external source
READ_MODEL_ARCH = False # NOT IMPLEMENTED!!
READ_MODEL_ARCH = args.model_path is not None

# ---------------------------
# --- END OF USER SETTING ---
Expand All @@ -123,9 +146,311 @@
BACKEND
"""

class ONNXGraphParser:
def __init__(self, onnx_model):
onnx.checker.check_model(onnx_model)
self.model = shape_inference.infer_shapes(onnx_model)
self.graph = self.model.graph
self.value_info_lookup = {v.name: i for i, v in enumerate(self.graph.value_info)}
self.node_lookup = {v.name: i for i, v in enumerate(self.graph.node)}
self.input_lookup = {v.name: i for i, v in enumerate(self.graph.input)}
self.output_lookup = {v.name: i for i, v in enumerate(self.graph.output)}
self.init_lookup = {v.name: i for i, v in enumerate(self.graph.initializer)}
self.get_precision() # make sure precision is fine

def _get_type(self, node_name):
if node_name in self.value_info_lookup:
return self.graph.value_info[self.value_info_lookup[node_name]].type
elif node_name in self.input_lookup:
return self.graph.input[self.input_lookup[node_name]].type
elif node_name in self.output_lookup:
return self.graph.output[self.output_lookup[node_name]].type
else:
raise KeyError(f"Node {node_name} not found")

def _get_node_attr(self, node_name, attr):
for a in self.graph.node[self.node_lookup[node_name]].attribute:
if a.name == attr:
return a
else:
raise ValueError(f"Node {node_name} has no {attr} attribute")

def is_pointwise(self, node_name):
hk, wk = self.get_kernel_size(node_name)
return hk == wk == 1

def is_depthwise(self, node_name):
node = self.graph.node[self.node_lookup[node_name]]
try:
groups = self._get_node_attr(node_name, "group").i
except ValueError:
return False
in_ch = self.get_channel_count(node.input[0])
out_ch = self.get_channel_count(node.output[0])
if groups <= 1:
return False
assert in_ch == out_ch == groups, "For depthwise convolutions, input and output channels must be the same as groups"
return True

def get_channel_count(self, node_name):
tensor_type = self._get_type(node_name)
# Data layout is B, C, H, W
return tensor_type.tensor_type.shape.dim[1].dim_value

def get_hw(self, node_name):
tensor_type = self._get_type(node_name)
shape = tensor_type.tensor_type.shape.dim
return shape[2].dim_value, shape[3].dim_value

def get_activation_size(self, node_name):
tensor_type = self._get_type(node_name)
dims = tensor_type.tensor_type.shape.dim
# Data layout is B, C, H, W
return dims[2].dim_value, dims[3].dim_value

def get_init(self, node_name):
index = self.init_lookup.get(node_name, -1)
if index == -1:
raise KeyError(f"Node {node_name} has no initializer")
init = self.graph.initializer[index]
return numpy_helper.to_array(init)

def get_kernel_size(self, node_name):
ksize = self._get_node_attr(node_name, "kernel_shape")
return ksize.ints[0], ksize.ints[1]

def get_stride(self, node_name):
stride = self._get_node_attr(node_name, "strides")
return stride.ints[0], stride.ints[1]

def get_pad(self, node_name):
pad = self._get_node_attr(node_name, "pads").ints
assert pad[0] == pad[2] and pad[1] == pad[3], "Only symmetric padding is supported."
return pad[0], pad[1]

def get_precision(self):
elem_type = self.graph.value_info[0].type.tensor_type.elem_type
if elem_type == onnx.TensorProto.FLOAT:
return "FP32"
elif elem_type == onnx.TensorProto.FLOAT16:
return "FP16"
elif elem_type == onnx.TensorProto.BFLOAT16:
raise NotImplementedError("Numpy does not support bfloat16 and converts it to FP32. We need to change how we save and load weights.")
else:
raise ValueError("Only FP32 and FP16 are supported")


# Call the DNN Reader and then the DNN Composer
if READ_MODEL_ARCH :
pass


layer_list = []
in_ch_list = []
out_ch_list = []
hk_list = []
wk_list = []
hin_list = []
win_list = []
h_pad_list = []
w_pad_list = []
opt_mm_fw_list = []
opt_mm_wg_list = []
opt_mm_ig_list = []
data_type_list = []
data_layout_list = []

if (args.model_path.split('.')[-1] == "onnx"):
onnx_model = onnx.load(args.model_path)
onnx.checker.check_model(onnx_model)
graph = ONNXGraphParser(onnx_model)
found_start = args.start_at is None

if args.start_at is not None:
node_names = [n.op_type for n in graph.graph.node if n.op_type != 'Constant']
assert args.start_at in node_names, f"{args.start_at} is not a valid layer name. Layer names are: {node_names}"
# CIOFLANC: temporary reconciling pseudo-sparse update implementations
update_layer_list = [1] * (len(node_names) - node_names.index(args.start_at))
# update_layer_list[node_names.index(args.start_at)] = 1

for onnx_node in graph.graph.node:

if not found_start:
if onnx_node.op_type != args.start_at:
continue
else:
found_start = True

if (onnx_node.op_type == 'Gemm') or (onnx_node.op_type == 'MatMul'):
in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
layer_list.append('linear')
hk_list.append(1)
wk_list.append(1)
hin_list.append(1)
win_list.append(1)
h_str_list.append(1)
w_str_list.append(1)
h_pad_list.append(0)
w_pad_list.append(0)
opt_mm_fw_list.append(0)
opt_mm_wg_list.append(0)
opt_mm_ig_list.append(0)
# TODO: Read from file
data_type_list.append(graph.get_precision())
# TODO: Read from file
# Note that this also determines the read position for in_ch_list and out_ch_list
data_layout_list.append('CHW')
weight_init = graph.get_init(onnx_node.input[1])
# Gemm node does y = x*B, but torch uses y = A*x, so transpose B to get A back
# This also aligns with how trainlib does things
weight_init = weight_init.transpose(1,0)
try:
bias_init = graph.get_init(onnx_node.input[2])
raise NotImplementedError("Biases are not implemented in trainlib")
except (KeyError, IndexError):
bias_init = []
weight_list.append((weight_init, bias_init))
sumnode_connections.append(0)
elif onnx_node.op_type == 'AveragePool':
in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
layer_list.append('AvgPool')
(hk, wk) = graph.get_kernel_size(onnx_node.op_type)
hk_list.append(hk)
wk_list.append(wk)
(hin, win) = graph.get_activation_size(onnx_node.input[0])
hin_list.append(hin)
win_list.append(win)
(hstr, wstr) = graph.get_stride(onnx_node.op_type)
h_str_list.append(hstr)
w_str_list.append(wstr)
(hpad, wpad) = graph.get_pad(onnx_node.op_type)
h_pad_list.append(hpad)
w_pad_list.append(wpad)
opt_mm_fw_list.append(0)
opt_mm_wg_list.append(0)
opt_mm_ig_list.append(0)
data_type_list.append(graph.get_precision())
data_layout_list.append('CHW')
weight_list.append(([], [])) # kernels
sumnode_connections.append(0)
elif onnx_node.op_type == 'GlobalAveragePool':
hk, wk = graph.get_hw(onnx_node.input[0])
if hk == 1 and wk == 1:
# There is nothing to average, skip this node
continue
in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
layer_list.append('AvgPool')
hk_list.append(hk)
wk_list.append(wk)
(hin, win) = graph.get_activation_size(onnx_node.input[0])
hin_list.append(hin)
win_list.append(win)
h_str_list.append(1)
w_str_list.append(1)
h_pad_list.append(0)
w_pad_list.append(0)
opt_mm_fw_list.append(0)
opt_mm_wg_list.append(0)
opt_mm_ig_list.append(0)
data_type_list.append(graph.get_precision())
data_layout_list.append('CHW')
weight_list.append(([], [])) # kernels
sumnode_connections.append(0)
elif onnx_node.op_type == 'Conv':
in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
if graph.is_pointwise(onnx_node.op_type):
ty = "PW"
elif graph.is_depthwise(onnx_node.op_type):
ty = "DW"
else:
ty = "conv2d"
layer_list.append(ty)
(hk, wk) = graph.get_kernel_size(onnx_node.op_type)
hk_list.append(hk)
wk_list.append(wk)
(hin, win) = graph.get_activation_size(onnx_node.input[0])
hin_list.append(hin)
win_list.append(win)
(hstr, wstr) = graph.get_stride(onnx_node.op_type)
h_str_list.append(hstr)
w_str_list.append(wstr)
(hpad, wpad) = graph.get_pad(onnx_node.op_type)
h_pad_list.append(hpad)
w_pad_list.append(wpad)
opt_mm_fw_list.append(0)
opt_mm_wg_list.append(0)
opt_mm_ig_list.append(0)
data_type_list.append(graph.get_precision())
# TODO: Read from file
# Note that this also determines the read position for in_ch_list and out_ch_list
data_layout_list.append('CHW')
weight_init = graph.get_init(onnx_node.input[1])
try:
bias_init = graph.get_init(onnx_node.input[2])
raise NotImplementedError("Biases are not implemented in trainlib")
except (KeyError, IndexError):
# Ignore missing bias
bias_init = []
pass
weight_list.append((weight_init, bias_init)) # kernels
sumnode_connections.append(0)
elif onnx_node.op_type == 'Clip':
# This does not handle ReLU6, as it is not supported by trainlib
layer_list.append('ReLU')
in_ch_list.append(graph.get_channel_count(onnx_node.input[0]))
out_ch_list.append(graph.get_channel_count(onnx_node.output[0]))
hk_list.append(1)
wk_list.append(1)
hin_list.append(1)
win_list.append(1)
h_str_list.append(1)
w_str_list.append(1)
h_pad_list.append(0)
w_pad_list.append(0)
opt_mm_fw_list.append(0)
opt_mm_wg_list.append(0)
opt_mm_ig_list.append(0)
data_layout_list.append('CHW')
data_type_list.append(graph.get_precision())
weight_list.append(([], []))
sumnode_connections.append(0)
else:
raise NotImplementedError("Model format not supported.")

data_dir = proj_folder+'data/'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
for i, (weight_init, bias_init) in enumerate(weight_list):
np.save(data_dir+f"l{i}w.npy", np.array(weight_list[i][0], dtype=("float32" if data_type_list[i] == "FP32" else "float16")))
np.save(data_dir+f"l{i}b.npy", np.array(weight_list[i][1], dtype=("float32" if data_type_list[i] == "FP32" else "float16")))

print("Generating project at location "+proj_folder)

# Check if Residual Connections are valid
sumnode_connections = composer.AdjustResConnList(sumnode_connections)

composer.CheckResConn(layer_list, in_ch_list, out_ch_list, hin_list, win_list, sumnode_connections, update_layer_list)

# Check if the network training fits L1
memocc = composer.DNN_Size_Checker(layer_list, in_ch_list, out_ch_list, hk_list, wk_list, hin_list, win_list,
h_str_list, w_str_list, h_pad_list, w_pad_list,
data_type_list, bias_list, update_layer_list,
L1_SIZE_BYTES, USE_DMA, CONV2D_USE_IM2COL)

print("DNN memory occupation: {} bytes of {} available L1 bytes ({}%).".format(memocc, L1_SIZE_BYTES, (memocc/L1_SIZE_BYTES)*100))

# Call DNN Composer on the user-provided graph
composer.DNN_Composer(proj_folder, project_name,
layer_list, in_ch_list, out_ch_list, hk_list, wk_list,
hin_list, win_list, h_str_list, w_str_list, h_pad_list, w_pad_list,
epochs, batch_size, learning_rate, optimizer, loss_fn,
NUM_CORES, data_type_list, weight_list, bias_list, update_layer_list, opt_mm_fw_list, opt_mm_wg_list, opt_mm_ig_list, sumnode_connections,
USE_DMA, PROFILE_SINGLE_LAYERS, SEPARATE_BACKWARD_STEPS, CONV2D_USE_IM2COL, PRINT_TRAIN_LOSS)

print("PULP project generation successful!")


else:
Expand All @@ -151,9 +476,9 @@
layer_list, in_ch_list, out_ch_list, hk_list, wk_list,
hin_list, win_list, h_str_list, w_str_list, h_pad_list, w_pad_list,
epochs, batch_size, learning_rate, optimizer, loss_fn,
NUM_CORES, data_type_list, bias_list, update_layer_list, opt_mm_fw_list, opt_mm_wg_list, opt_mm_ig_list,
NUM_CORES, data_type_list, weight_list, bias_list, update_layer_list, opt_mm_fw_list, opt_mm_wg_list, opt_mm_ig_list,
sumnode_connections, USE_DMA, PROFILE_SINGLE_LAYERS, SEPARATE_BACKWARD_STEPS, CONV2D_USE_IM2COL, PRINT_TRAIN_LOSS)

print("PULP project generation successful!")

pass
pass
Loading