#
# Copyright (c) 2022 TUM Department of Electrical and Computer Engineering.
#
# This file is part of MLonMCU.
# See https://github.com/tum-ei-eda/mlonmcu.git for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""TODO"""
import string
from datetime import datetime
from math import ceil, log2
# TODO: use this
# from tvm.relay.backend.utils import mangle_module_name
[docs]
def calc_pages(workspace_size, page_size=2**10):
# Determine the number of required pages
assert workspace_size >= 0, "Workspace size can not be negative"
crtPageSizeLog2 = log2(page_size)
assert crtPageSizeLog2 == int(crtPageSizeLog2), "Page size has to be a power of two"
crtNumPages = ceil(workspace_size / page_size)
return int(crtNumPages), int(crtPageSizeLog2)
[docs]
def generate_common_includes():
return """
#include <stdlib.h>
#include <stdarg.h>
#include <dlpack/dlpack.h>
#include "tvm/runtime/crt/error_codes.h"
#include "tvm/runtime/c_runtime_api.h"
#include "printing.h"
#include "exit.h"
"""
[docs]
def generate_graph_includes():
out = generate_common_includes()
out += """
#include "tvm/runtime/crt/packed_func.h"
#include "tvm/runtime/crt/crt.h"
#include "tvm/runtime/crt/graph_executor.h"
#include "tvm/runtime/crt/page_allocator.h"
"""
return out
[docs]
def generate_aot_includes(allocator):
out = generate_common_includes()
if allocator:
out += '#include "tvm/runtime/crt/stack_allocator.h"\n'
return out
[docs]
def fill(template, **kwargs):
return string.Template(template).substitute(**kwargs)
[docs]
def getSizes(tensors):
out = "size_t sizes[] = { "
for t in tensors:
out += str(t.size) + ", "
out += "};"
return out
[docs]
def write_tvmrt_wrapper(path, graph, params, model_info, workspace_size):
with open(path, "w") as f:
text = generate_tvmrt_wrapper(graph, params, model_info, workspace_size)
f.write(text)
[docs]
def generate_tvmrt_wrapper(graph, params, model_info, workspace_size, debug_arena=False):
crtNumPages, crtPageSizeLog2 = calc_pages(workspace_size)
def escapeJson(j):
return j.replace('"', '\\"').replace("\n", "\\\n")
def toCArray(bin):
result = ""
for c in bin:
result += hex(c) + ", "
return result
def getMeta(tensors, withNames=False):
out = ""
if withNames:
out = "const char *names[] = { "
for t in tensors:
out += '"' + t.name + '", '
out += "};\n "
out += "DLDataType dtypes[] = {"
for t in tensors:
if t.dtype == "float32":
out += "{kDLFloat, 32, 1}"
elif t.dtype == "uint8":
out += "{kDLUInt, 8, 1}"
elif t.dtype == "int8":
out += "{kDLInt, 8, 1}"
elif t.dtype == "uint64":
out += "{kDLUInt, 64, 1}"
elif t.dtype == "int64":
out += "{kDLInt, 64, 1}"
else:
raise RuntimeError(f"Invalid type: {t.dtype}")
out += ", "
out += "};\n "
for i, t in enumerate(tensors):
out += "int64_t shape_" + str(i) + "[] = { "
for s in t.shape:
out += str(s) + ", "
out += "};\n "
out += "int64_t *shapes[] = { "
for i, t in enumerate(tensors):
out += "shape_" + str(i) + ", "
out += "};\n"
out += " size_t ndims[] = { "
for i, t in enumerate(tensors):
out += str(len(t.shape)) + ", "
out += "};\n "
for i, t in enumerate(tensors):
out += "static uint8_t data_" + str(i) + "[" + str(t.size) + "];\n"
out += " uint8_t *data[] = { "
for i, t in enumerate(tensors):
out += "data_" + str(i) + ", "
out += "};"
return out
out = ""
out += generate_header()
out += generate_graph_includes()
out += 'const char * const g_graph = "' + escapeJson(graph) + '";\n'
out += "const char g_params[] = { " + toCArray(params) + "\n};\n"
out += "const uint64_t g_params_size = " + str(len(params)) + ";\n"
mainCode = """
#define CRT_MEMORY_NUM_PAGES ${numPages}
#define CRT_MEMORY_PAGE_SIZE_LOG2 ${pageSizeLog2}
"""
if debug_arena: # This will enable the feature only if it is not overwritten by the user/compiler
mainCode += """
#ifndef DEBUG_ARENA_USAGE
#define DEBUG_ARENA_USAGE 1
#endif
"""
mainCode += """
#ifdef DEBUG_ARENA_USAGE
size_t max_arena_usage = 0;
#endif
static uint8_t g_crt_memory[CRT_MEMORY_NUM_PAGES * (1 << CRT_MEMORY_PAGE_SIZE_LOG2)];
static MemoryManagerInterface* g_memory_manager;
/*! \\brief macro to do C API call */
#define TVM_CCALL(func) \\
do { \\
tvm_crt_error_t ret = (func); \\
if (ret != kTvmErrorNoError) { \\
TVMLogf("%s: %d: error: %s\\n", __FILE__, __LINE__, TVMGetLastError()); \\
TVMPlatformAbort(ret); \\
} \\
} while (0)
TVMModuleHandle TVMArgs_AsModuleHandle(const TVMArgs* args, size_t index);
void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code)
{
mlonmcu_exit(1);
}
void TVMLogf(const char* msg, ...)
{
va_list args;
va_start(args, msg);
DBGPRINTF(msg, args);
va_end(args);
}
tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr)
{
tvm_crt_error_t ret = g_memory_manager->Allocate(g_memory_manager, num_bytes, dev, out_ptr);
#ifdef DEBUG_ARENA_USAGE
// Use this to estimate the required number of pages
// Run in DEBUG mode in insert value of MAX printed last into the following equation:
// (This will round to the next power of 2 which might not be wanted!)
// num_pages = 2**ceil(log2(MAX/page_size))
size_t end = (size_t)(*out_ptr-(void*)g_crt_memory)+num_bytes;
if (end > max_arena_usage)
{
max_arena_usage = end;
}
#endif
return ret;
}
tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev)
{
return g_memory_manager->Free(g_memory_manager, ptr, dev);
}
tvm_crt_error_t TVMPlatformTimerStart()
{
return kTvmErrorFunctionCallNotImplemented;
}
tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds)
{
return kTvmErrorFunctionCallNotImplemented;
}
void *g_handle = NULL;
int TVMWrap_Init()
{
int64_t device_type = kDLCPU;
int64_t device_id = 0;
TVMByteArray params;
params.data = g_params;
params.size = g_params_size;
DLDevice dev;
dev.device_type = (DLDeviceType)device_type;
dev.device_id = device_id;
// get pointers
TVM_CCALL(PageMemoryManagerCreate(&g_memory_manager, g_crt_memory, sizeof(g_crt_memory),
CRT_MEMORY_PAGE_SIZE_LOG2));
TVM_CCALL(TVMInitializeRuntime());
TVMPackedFunc pf;
TVMArgs args = TVMArgs_Create(NULL, NULL, 0);
TVM_CCALL(TVMPackedFunc_InitGlobalFunc(&pf, "runtime.SystemLib", &args));
TVM_CCALL(TVMPackedFunc_Call(&pf));
TVMModuleHandle mod_syslib = TVMArgs_AsModuleHandle(&pf.ret_value, 0);
// run modules
TVMGraphExecutor* graph_executor = NULL;
TVM_CCALL(TVMGraphExecutor_Create(g_graph, mod_syslib, &dev, &graph_executor));
TVM_CCALL(TVMGraphExecutor_LoadParams(graph_executor, params.data, params.size));
//return graph_executor;
g_handle = graph_executor;
return 0; // TODO
}
void *TVMWrap_GetInputPtr(int index)
{
${inMeta}
DLTensor input;
input.data = (void*)data[index];
DLDevice device = {kDLCPU, 0};
input.device = device;
input.ndim = ndims[index];
input.dtype = dtypes[index];
input.shape = shapes[index];
input.strides = NULL;
input.byte_offset = 0;
TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)g_handle;
TVMGraphExecutor_SetInput(graph_executor, names[index], &input);
return data[index];
}
size_t TVMWrap_GetInputSize(int index)
{
${inSizes}
return sizes[index];
}
size_t TVMWrap_GetNumInputs()
{
return ${numInputs};
}
int TVMWrap_Run()
{
TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)g_handle;
TVMGraphExecutor_Run(graph_executor);
#if DEBUG_ARENA_USAGE
DBGPRINTF("\\nGraph executor arena max usage after model invocation: %lu bytes\\n", max_arena_usage);
#endif // DEBUG_ARENA_USAGE
return 0; // TODO
}
void *TVMWrap_GetOutputPtr(int index)
{
${outMeta}
DLTensor output;
output.data = (void*)data[index];
DLDevice device = {kDLCPU, 0};
output.device = device;
output.ndim = ndims[index];
output.dtype = dtypes[index];
output.shape = shapes[index];
output.strides = NULL;
output.byte_offset = 0;
TVMGraphExecutor* graph_executor = (TVMGraphExecutor*)g_handle;
TVMGraphExecutor_GetOutput(graph_executor, index, &output);
return data[index];
}
size_t TVMWrap_GetOutputSize(int index)
{
${outSizes}
return sizes[index];
}
size_t TVMWrap_GetNumOutputs()
{
return ${numOutputs};
}
"""
out += fill(
mainCode,
inMeta=getMeta(model_info.in_tensors, True),
outMeta=getMeta(model_info.out_tensors),
inSizes=getSizes(model_info.in_tensors),
outSizes=getSizes(model_info.out_tensors),
numInputs=len(model_info.in_tensors),
numOutputs=len(model_info.out_tensors),
numPages=crtNumPages,
pageSizeLog2=crtPageSizeLog2,
)
return out
[docs]
def generate_tvmaot_wrapper(model_info, workspace_size, mod_name, api="c", debug_arena=False):
modPrefix = f"tvmgen_{mod_name}"
def writeTensors(in_tensors, out_tensors, modPrefix, api):
if api == "c":
retStr = """
// Define data for input and output tensors
"""
def writeTensorsHelper(tensors, prefix, out=False):
lenTensors = len(tensors)
direction = "out" if out else "in"
ret = ""
names = [f"{direction}put{i}_data" for i in range(lenTensors)]
for i, t in enumerate(tensors):
ret += "char " + names[i] + "[" + str(t.size) + "];\n"
ret += f"void* {direction}puts[] = {{" + ", ".join(names) + "};\n"
ret += f"struct {prefix}_{direction}puts {prefix}_{direction}puts = {{" + "\n"
for i, t in enumerate(tensors):
tensor_name = t.name.replace(":", "_").replace("/", "_").replace(".", "_").replace(";", "_")
ret += f" .{tensor_name} = {names[i]}," + "\n"
ret += "};\n"
return ret
retStr += writeTensorsHelper(in_tensors, modPrefix, False)
retStr += writeTensorsHelper(out_tensors, modPrefix, True)
return retStr
elif api == "packed":
retStr = """
// Define data for input and output tensors
"""
def writeTensorsHelper(tensors, prefix, out=False):
lenTensors = len(tensors)
direction = "out" if out else "in"
ret = ""
names = [f"{direction}put{i}_data" for i in range(lenTensors)]
for i, t in enumerate(tensors):
ret += "char " + names[i] + "[" + str(t.size) + "];\n"
ret += f"void* {direction}puts[] = {{" + ", ".join(names) + "};\n"
return ret
retStr += writeTensorsHelper(in_tensors, modPrefix, False)
retStr += writeTensorsHelper(out_tensors, modPrefix, True)
return retStr
else:
raise RuntimeError("api has to be either 'c' or 'packed'")
out = ""
out += generate_header()
includes = generate_aot_includes(workspace_size > 0)
if api == "c":
includes += '#include "${modPrefix}.h"\n'
out += fill(includes, modPrefix=modPrefix)
out += "\n"
out += writeTensors(model_info.in_tensors, model_info.out_tensors, modPrefix, api)
logging_code = """
void TVMLogf(const char* msg, ...)
{
va_list args;
va_start(args, msg);
DBGPRINTF(msg, args);
va_end(args);
}
"""
out += logging_code
if workspace_size > 0:
workspace_code = """
#define WORKSPACE_SIZE (${workspaceBytes})
static uint8_t g_aot_memory[WORKSPACE_SIZE];
tvm_workspace_t app_workspace;
"""
if debug_arena: # This will enable the feature only if it is not overwritten by the user/compiler
workspace_code += """
#ifndef DEBUG_ARENA_USAGE
#define DEBUG_ARENA_USAGE 1
#endif
#ifndef TVMAOT_DEBUG_ALLOCATIONS
#define TVMAOT_DEBUG_ALLOCATIONS 1
#endif
"""
workspace_code += """
#ifdef DEBUG_ARENA_USAGE
size_t max_arena_usage = 0;
#endif
tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr)
{
#ifdef TVMAOT_DEBUG_ALLOCATIONS
if (num_bytes > (app_workspace.workspace + app_workspace.workspace_size - app_workspace.next_alloc))
{
TVMLogf("TVMPlatformMemoryAllocate(%lu): Allocation would overflow arena!\\n", num_bytes);
return kTvmErrorPlatformNoMemory;
}
#endif
tvm_crt_error_t ret = StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
#ifdef DEBUG_ARENA_USAGE
// Use this to estimate the required number of bytes for the arena
size_t end = app_workspace.next_alloc-app_workspace.workspace;
if (end > max_arena_usage)
{
max_arena_usage = end;
}
#endif
return ret;
}
tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev)
{
#ifdef TVMAOT_DEBUG_ALLOCATIONS
if ((uint8_t*)ptr < app_workspace.workspace || (uint8_t*)ptr >= app_workspace.next_alloc)
{
TVMLogf("TVMPlatformMemoryFree(%p): Invalid Memory region to be free'd!\\n", ptr);
return kTvmErrorPlatformNoMemory;
}
#endif
return StackMemoryManager_Free(&app_workspace, ptr);
}
"""
out += fill(workspace_code, workspaceBytes=int(workspace_size))
else:
workspace_code = """
tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr)
{
return kTvmErrorFunctionCallNotImplemented;
}
tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev)
{
return kTvmErrorFunctionCallNotImplemented;
}
"""
out += workspace_code
mainCode = ""
if api == "packed":
mainCode += (
"int32_t ${modPrefix}_run(void* args, void* type_code, int num_args,"
+ " void* out_value, void* out_type_code, void* resource_handle);\n"
)
mainCode += """
void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code)
{
mlonmcu_exit(1);
}
TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override)
{
return 0;
}
int TVMWrap_Init()
{
"""
if workspace_size > 0:
mainCode += " StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);"
mainCode += """
return 0; // TODO
}
void *TVMWrap_GetInputPtr(int index)
{
return inputs[index];
}
size_t TVMWrap_GetInputSize(int index)
{
${inSizes}
return sizes[index];
}
size_t TVMWrap_GetNumInputs()
{
return ${numInputs};
}
int TVMWrap_Run()
{"""
if api == "c":
mainCode += """
int ret_val = ${modPrefix}_run(&${modPrefix}_inputs, &${modPrefix}_outputs);
if (ret_val)
{
TVMPlatformAbort(kTvmErrorPlatformCheckFailure);
}
"""
elif api == "packed":
mainCode += """
static DLDevice fake_device = {kDLCPU, 0};
static int64_t fake_dims = 0;
static int64_t fake_shape = {0};
DLTensor tensors[${numInputs} + ${numOutputs}];
TVMValue values[${numInputs} + ${numOutputs}];
int32_t typeids[${numInputs} + ${numOutputs}];
for (size_t i = 0; i < ${numInputs}+${numOutputs}; i++)
{
tensors[i].device = fake_device;
tensors[i].data = (i < ${numInputs}) ? inputs[i] : outputs[i - ${numInputs}];
tensors[i].shape = &fake_shape;
tensors[i].ndim = fake_dims;
tensors[i].byte_offset = 0;
tensors[i].strides = NULL;
values[i].v_handle = &tensors[i];
}
int ret_val = ${modPrefix}_run(values, typeids, 0, NULL, 0, NULL);
if (ret_val)
{
TVMPlatformAbort(kTvmErrorPlatformCheckFailure);
}
return 0;
"""
else:
raise RuntimeError("api can only be 'c' or 'packed'")
if workspace_size > 0:
mainCode += """
#if DEBUG_ARENA_USAGE
DBGPRINTF("\\nAoT executor arena max usage after model invocation: %lu bytes\\n", max_arena_usage);
#endif // DEBUG_ARENA_USAGE
"""
mainCode += """
return 0; // TODO
}
void *TVMWrap_GetOutputPtr(int index)
{
return outputs[index];
}
size_t TVMWrap_GetOutputSize(int index)
{
${outSizes}
return sizes[index];
}
size_t TVMWrap_GetNumOutputs()
{
return ${numOutputs};
}
"""
out += fill(
mainCode,
inSizes=getSizes(model_info.in_tensors),
outSizes=getSizes(model_info.out_tensors),
numInputs=len(model_info.in_tensors),
numOutputs=len(model_info.out_tensors),
modPrefix=modPrefix,
)
return out
[docs]
def write_tvmaot_wrapper(path, model_info, workspace_size, mod_name, api="c"):
with open(path, "w") as f:
text = generate_tvmaot_wrapper(model_info, workspace_size, mod_name, api=api)
f.write(text)