Source code for mlonmcu.flow.tvm.backend.wrapper

#
# Copyright (c) 2022 TUM Department of Electrical and Computer Engineering.
#
# This file is part of MLonMCU.
# See https://github.com/tum-ei-eda/mlonmcu.git for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""TODO"""

import string
from datetime import datetime
from math import ceil, log2

# TODO: use this
# from tvm.relay.backend.utils import mangle_module_name


[docs] def calc_pages(workspace_size, page_size=2**10): # Determine the number of required pages assert workspace_size >= 0, "Workspace size can not be negative" crtPageSizeLog2 = log2(page_size) assert crtPageSizeLog2 == int(crtPageSizeLog2), "Page size has to be a power of two" crtNumPages = ceil(workspace_size / page_size) return int(crtNumPages), int(crtPageSizeLog2)
[docs] def generate_wrapper_header(): out = """#ifndef TVM_WRAPPER_H #define TVM_WRAPPER_H #include <stddef.h> int TVMWrap_Init(); void *TVMWrap_GetInputPtr(int index); size_t TVMWrap_GetInputSize(int index); size_t TVMWrap_GetNumInputs(); int TVMWrap_Run(); void *TVMWrap_GetOutputPtr(int index); size_t TVMWrap_GetOutputSize(int index); size_t TVMWrap_GetNumOutputs(); #endif // TVM_WRAPPER_H """ return out
[docs] def generate_header(): time = datetime.now() header = f"""// This file is generated. Do not edit. // Generated on: {time} """ return header
[docs] def generate_common_includes(): return """ #include <stdlib.h> #include <stdarg.h> #include <dlpack/dlpack.h> #include "tvm/runtime/crt/error_codes.h" #include "tvm/runtime/c_runtime_api.h" #include "printing.h" #include "exit.h" """
[docs] def generate_graph_includes(): out = generate_common_includes() out += """ #include "tvm/runtime/crt/packed_func.h" #include "tvm/runtime/crt/crt.h" #include "tvm/runtime/crt/graph_executor.h" #include "tvm/runtime/crt/page_allocator.h" """ return out
[docs] def generate_aot_includes(allocator): out = generate_common_includes() if allocator: out += '#include "tvm/runtime/crt/stack_allocator.h"\n' return out
[docs] def fill(template, **kwargs): return string.Template(template).substitute(**kwargs)
[docs] def getSizes(tensors): out = "size_t sizes[] = { " for t in tensors: out += str(t.size) + ", " out += "};" return out
[docs] def write_tvmrt_wrapper(path, graph, params, model_info, workspace_size): with open(path, "w") as f: text = generate_tvmrt_wrapper(graph, params, model_info, workspace_size) f.write(text)
[docs] def generate_tvmrt_wrapper(graph, params, model_info, workspace_size, debug_arena=False): crtNumPages, crtPageSizeLog2 = calc_pages(workspace_size) def escapeJson(j): return j.replace('"', '\\"').replace("\n", "\\\n") def toCArray(bin): result = "" for c in bin: result += hex(c) + ", " return result def getMeta(tensors, withNames=False): out = "" if withNames: out = "const char *names[] = { " for t in tensors: out += '"' + t.name + '", ' out += "};\n " out += "DLDataType dtypes[] = {" for t in tensors: if t.dtype == "float32": out += "{kDLFloat, 32, 1}" elif t.dtype == "uint8": out += "{kDLUInt, 8, 1}" elif t.dtype == "int8": out += "{kDLInt, 8, 1}" elif t.dtype == "uint64": out += "{kDLUInt, 64, 1}" elif t.dtype == "int64": out += "{kDLInt, 64, 1}" else: raise RuntimeError(f"Invalid type: {t.dtype}") out += ", " out += "};\n " for i, t in enumerate(tensors): out += "int64_t shape_" + str(i) + "[] = { " for s in t.shape: out += str(s) + ", " out += "};\n " out += "int64_t *shapes[] = { " for i, t in enumerate(tensors): out += "shape_" + str(i) + ", " out += "};\n" out += " size_t ndims[] = { " for i, t in enumerate(tensors): out += str(len(t.shape)) + ", " out += "};\n " for i, t in enumerate(tensors): out += "static uint8_t data_" + str(i) + "[" + str(t.size) + "];\n" out += " uint8_t *data[] = { " for i, t in enumerate(tensors): out += "data_" + str(i) + ", " out += "};" return out out = "" out += generate_header() out += generate_graph_includes() out += 'const char * const g_graph = "' + escapeJson(graph) + '";\n' out += "const char g_params[] = { " + toCArray(params) + "\n};\n" out += "const uint64_t g_params_size = " + str(len(params)) + ";\n" mainCode = """ #define CRT_MEMORY_NUM_PAGES ${numPages} #define CRT_MEMORY_PAGE_SIZE_LOG2 ${pageSizeLog2} """ if debug_arena: # This will enable the feature only if it is not overwritten by the user/compiler mainCode += """ #ifndef DEBUG_ARENA_USAGE #define DEBUG_ARENA_USAGE 1 #endif """ mainCode += """ #ifdef DEBUG_ARENA_USAGE size_t max_arena_usage = 0; #endif static uint8_t g_crt_memory[CRT_MEMORY_NUM_PAGES * (1 << CRT_MEMORY_PAGE_SIZE_LOG2)]; static MemoryManagerInterface* g_memory_manager; /*! \\brief macro to do C API call */ #define TVM_CCALL(func) \\ do { \\ tvm_crt_error_t ret = (func); \\ if (ret != kTvmErrorNoError) { \\ TVMLogf("%s: %d: error: %s\\n", __FILE__, __LINE__, TVMGetLastError()); \\ TVMPlatformAbort(ret); \\ } \\ } while (0) TVMModuleHandle TVMArgs_AsModuleHandle(const TVMArgs* args, size_t index); void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code) { mlonmcu_exit(1); } void TVMLogf(const char* msg, ...) { va_list args; va_start(args, msg); DBGPRINTF(msg, args); va_end(args); } tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) { tvm_crt_error_t ret = g_memory_manager->Allocate(g_memory_manager, num_bytes, dev, out_ptr); #ifdef DEBUG_ARENA_USAGE // Use this to estimate the required number of pages // Run in DEBUG mode in insert value of MAX printed last into the following equation: // (This will round to the next power of 2 which might not be wanted!) // num_pages = 2**ceil(log2(MAX/page_size)) size_t end = (size_t)(*out_ptr-(void*)g_crt_memory)+num_bytes; if (end > max_arena_usage) { max_arena_usage = end; } #endif return ret; } tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) { return g_memory_manager->Free(g_memory_manager, ptr, dev); } tvm_crt_error_t TVMPlatformTimerStart() { return kTvmErrorFunctionCallNotImplemented; } tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) { return kTvmErrorFunctionCallNotImplemented; } void *g_handle = NULL; int TVMWrap_Init() { int64_t device_type = kDLCPU; int64_t device_id = 0; TVMByteArray params; params.data = g_params; params.size = g_params_size; DLDevice dev; dev.device_type = (DLDeviceType)device_type; dev.device_id = device_id; // get pointers TVM_CCALL(PageMemoryManagerCreate(&g_memory_manager, g_crt_memory, sizeof(g_crt_memory), CRT_MEMORY_PAGE_SIZE_LOG2)); TVM_CCALL(TVMInitializeRuntime()); TVMPackedFunc pf; TVMArgs args = TVMArgs_Create(NULL, NULL, 0); TVM_CCALL(TVMPackedFunc_InitGlobalFunc(&pf, "runtime.SystemLib", &args)); TVM_CCALL(TVMPackedFunc_Call(&pf)); TVMModuleHandle mod_syslib = TVMArgs_AsModuleHandle(&pf.ret_value, 0); // run modules TVMGraphExecutor* graph_executor = NULL; TVM_CCALL(TVMGraphExecutor_Create(g_graph, mod_syslib, &dev, &graph_executor)); TVM_CCALL(TVMGraphExecutor_LoadParams(graph_executor, params.data, params.size)); //return graph_executor; g_handle = graph_executor; return 0; // TODO } void *TVMWrap_GetInputPtr(int index) { ${inMeta} DLTensor input; input.data = (void*)data[index]; DLDevice device = {kDLCPU, 0}; input.device = device; input.ndim = ndims[index]; input.dtype = dtypes[index]; input.shape = shapes[index]; input.strides = NULL; input.byte_offset = 0; TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)g_handle; TVMGraphExecutor_SetInput(graph_executor, names[index], &input); return data[index]; } size_t TVMWrap_GetInputSize(int index) { ${inSizes} return sizes[index]; } size_t TVMWrap_GetNumInputs() { return ${numInputs}; } int TVMWrap_Run() { TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)g_handle; TVMGraphExecutor_Run(graph_executor); #if DEBUG_ARENA_USAGE DBGPRINTF("\\nGraph executor arena max usage after model invocation: %lu bytes\\n", max_arena_usage); #endif // DEBUG_ARENA_USAGE return 0; // TODO } void *TVMWrap_GetOutputPtr(int index) { ${outMeta} DLTensor output; output.data = (void*)data[index]; DLDevice device = {kDLCPU, 0}; output.device = device; output.ndim = ndims[index]; output.dtype = dtypes[index]; output.shape = shapes[index]; output.strides = NULL; output.byte_offset = 0; TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)g_handle; TVMGraphExecutor_GetOutput(graph_executor, index, &output); return data[index]; } size_t TVMWrap_GetOutputSize(int index) { ${outSizes} return sizes[index]; } size_t TVMWrap_GetNumOutputs() { return ${numOutputs}; } """ out += fill( mainCode, inMeta=getMeta(model_info.in_tensors, True), outMeta=getMeta(model_info.out_tensors), inSizes=getSizes(model_info.in_tensors), outSizes=getSizes(model_info.out_tensors), numInputs=len(model_info.in_tensors), numOutputs=len(model_info.out_tensors), numPages=crtNumPages, pageSizeLog2=crtPageSizeLog2, ) return out
[docs] def generate_tvmaot_wrapper(model_info, workspace_size, mod_name, api="c", debug_arena=False): modPrefix = f"tvmgen_{mod_name}" def writeTensors(in_tensors, out_tensors, modPrefix, api): if api == "c": retStr = """ // Define data for input and output tensors """ def writeTensorsHelper(tensors, prefix, out=False): lenTensors = len(tensors) direction = "out" if out else "in" ret = "" names = [f"{direction}put{i}_data" for i in range(lenTensors)] for i, t in enumerate(tensors): ret += "char " + names[i] + "[" + str(t.size) + "];\n" ret += f"void* {direction}puts[] = {{" + ", ".join(names) + "};\n" ret += f"struct {prefix}_{direction}puts {prefix}_{direction}puts = {{" + "\n" for i, t in enumerate(tensors): tensor_name = t.name.replace(":", "_").replace("/", "_").replace(".", "_").replace(";", "_") ret += f" .{tensor_name} = {names[i]}," + "\n" ret += "};\n" return ret retStr += writeTensorsHelper(in_tensors, modPrefix, False) retStr += writeTensorsHelper(out_tensors, modPrefix, True) return retStr elif api == "packed": retStr = """ // Define data for input and output tensors """ def writeTensorsHelper(tensors, prefix, out=False): lenTensors = len(tensors) direction = "out" if out else "in" ret = "" names = [f"{direction}put{i}_data" for i in range(lenTensors)] for i, t in enumerate(tensors): ret += "char " + names[i] + "[" + str(t.size) + "];\n" ret += f"void* {direction}puts[] = {{" + ", ".join(names) + "};\n" return ret retStr += writeTensorsHelper(in_tensors, modPrefix, False) retStr += writeTensorsHelper(out_tensors, modPrefix, True) return retStr else: raise RuntimeError("api has to be either 'c' or 'packed'") out = "" out += generate_header() includes = generate_aot_includes(workspace_size > 0) if api == "c": includes += '#include "${modPrefix}.h"\n' out += fill(includes, modPrefix=modPrefix) out += "\n" out += writeTensors(model_info.in_tensors, model_info.out_tensors, modPrefix, api) logging_code = """ void TVMLogf(const char* msg, ...) { va_list args; va_start(args, msg); DBGPRINTF(msg, args); va_end(args); } """ out += logging_code if workspace_size > 0: workspace_code = """ #define WORKSPACE_SIZE (${workspaceBytes}) static uint8_t g_aot_memory[WORKSPACE_SIZE]; tvm_workspace_t app_workspace; """ if debug_arena: # This will enable the feature only if it is not overwritten by the user/compiler workspace_code += """ #ifndef DEBUG_ARENA_USAGE #define DEBUG_ARENA_USAGE 1 #endif #ifndef TVMAOT_DEBUG_ALLOCATIONS #define TVMAOT_DEBUG_ALLOCATIONS 1 #endif """ workspace_code += """ #ifdef DEBUG_ARENA_USAGE size_t max_arena_usage = 0; #endif tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) { #ifdef TVMAOT_DEBUG_ALLOCATIONS if (num_bytes > (app_workspace.workspace + app_workspace.workspace_size - app_workspace.next_alloc)) { TVMLogf("TVMPlatformMemoryAllocate(%lu): Allocation would overflow arena!\\n", num_bytes); return kTvmErrorPlatformNoMemory; } #endif tvm_crt_error_t ret = StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr); #ifdef DEBUG_ARENA_USAGE // Use this to estimate the required number of bytes for the arena size_t end = app_workspace.next_alloc-app_workspace.workspace; if (end > max_arena_usage) { max_arena_usage = end; } #endif return ret; } tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) { #ifdef TVMAOT_DEBUG_ALLOCATIONS if ((uint8_t*)ptr < app_workspace.workspace || (uint8_t*)ptr >= app_workspace.next_alloc) { TVMLogf("TVMPlatformMemoryFree(%p): Invalid Memory region to be free'd!\\n", ptr); return kTvmErrorPlatformNoMemory; } #endif return StackMemoryManager_Free(&app_workspace, ptr); } """ out += fill(workspace_code, workspaceBytes=int(workspace_size)) else: workspace_code = """ tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) { return kTvmErrorFunctionCallNotImplemented; } tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) { return kTvmErrorFunctionCallNotImplemented; } """ out += workspace_code mainCode = "" if api == "packed": mainCode += ( "int32_t ${modPrefix}_run(void* args, void* type_code, int num_args," + " void* out_value, void* out_type_code, void* resource_handle);\n" ) mainCode += """ void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code) { mlonmcu_exit(1); } TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { return 0; } int TVMWrap_Init() { """ if workspace_size > 0: mainCode += " StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);" mainCode += """ return 0; // TODO } void *TVMWrap_GetInputPtr(int index) { return inputs[index]; } size_t TVMWrap_GetInputSize(int index) { ${inSizes} return sizes[index]; } size_t TVMWrap_GetNumInputs() { return ${numInputs}; } int TVMWrap_Run() {""" if api == "c": mainCode += """ int ret_val = ${modPrefix}_run(&${modPrefix}_inputs, &${modPrefix}_outputs); if (ret_val) { TVMPlatformAbort(kTvmErrorPlatformCheckFailure); } """ elif api == "packed": mainCode += """ static DLDevice fake_device = {kDLCPU, 0}; static int64_t fake_dims = 0; static int64_t fake_shape = {0}; DLTensor tensors[${numInputs} + ${numOutputs}]; TVMValue values[${numInputs} + ${numOutputs}]; int32_t typeids[${numInputs} + ${numOutputs}]; for (size_t i = 0; i < ${numInputs}+${numOutputs}; i++) { tensors[i].device = fake_device; tensors[i].data = (i < ${numInputs}) ? inputs[i] : outputs[i - ${numInputs}]; tensors[i].shape = &fake_shape; tensors[i].ndim = fake_dims; tensors[i].byte_offset = 0; tensors[i].strides = NULL; values[i].v_handle = &tensors[i]; } int ret_val = ${modPrefix}_run(values, typeids, 0, NULL, 0, NULL); if (ret_val) { TVMPlatformAbort(kTvmErrorPlatformCheckFailure); } return 0; """ else: raise RuntimeError("api can only be 'c' or 'packed'") if workspace_size > 0: mainCode += """ #if DEBUG_ARENA_USAGE DBGPRINTF("\\nAoT executor arena max usage after model invocation: %lu bytes\\n", max_arena_usage); #endif // DEBUG_ARENA_USAGE """ mainCode += """ return 0; // TODO } void *TVMWrap_GetOutputPtr(int index) { return outputs[index]; } size_t TVMWrap_GetOutputSize(int index) { ${outSizes} return sizes[index]; } size_t TVMWrap_GetNumOutputs() { return ${numOutputs}; } """ out += fill( mainCode, inSizes=getSizes(model_info.in_tensors), outSizes=getSizes(model_info.out_tensors), numInputs=len(model_info.in_tensors), numOutputs=len(model_info.out_tensors), modPrefix=modPrefix, ) return out
[docs] def write_tvmaot_wrapper(path, model_info, workspace_size, mod_name, api="c"): with open(path, "w") as f: text = generate_tvmaot_wrapper(model_info, workspace_size, mod_name, api=api) f.write(text)