Commit ccbfe557 authored by Cao Duc Anh's avatar Cao Duc Anh

edit somethings

parent 59e98787
...@@ -2,4 +2,5 @@ ...@@ -2,4 +2,5 @@
vietocr vietocr
albumentations albumentations
matplotlib matplotlib
onnxruntime onnxruntime
\ No newline at end of file onnx
\ No newline at end of file
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import numpy as np\n",
"from PIL import Image\n",
"import torchvision\n",
"import cv2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import yaml\n",
"\n",
"def load_config(config_file):\n",
" with open(config_file, encoding='utf-8') as f:\n",
" config = yaml.safe_load(f)\n",
"\n",
" return config\n",
"\n",
"class Vocab():\n",
" def __init__(self, chars):\n",
" self.pad = 0\n",
" self.go = 1\n",
" self.eos = 2\n",
" self.mask_token = 3\n",
"\n",
" self.chars = chars\n",
"\n",
" self.c2i = {c:i+4 for i, c in enumerate(chars)}\n",
"\n",
" self.i2c = {i+4:c for i, c in enumerate(chars)}\n",
" \n",
" self.i2c[0] = '<pad>'\n",
" self.i2c[1] = '<sos>'\n",
" self.i2c[2] = '<eos>'\n",
" self.i2c[3] = '*'\n",
"\n",
" def encode(self, chars):\n",
" return [self.go] + [self.c2i[c] for c in chars] + [self.eos]\n",
" \n",
" def decode(self, ids):\n",
" first = 1 if self.go in ids else 0\n",
" last = ids.index(self.eos) if self.eos in ids else None\n",
" sent = ''.join([self.i2c[i] for i in ids[first:last]])\n",
" return sent\n",
" \n",
" def __len__(self):\n",
" return len(self.c2i) + 4\n",
" \n",
" def batch_decode(self, arr):\n",
" texts = [self.decode(ids) for ids in arr]\n",
" return texts\n",
"\n",
" def __str__(self):\n",
" return self.chars"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"config = load_config('/home/anhcd/Projects/vivas-viet-ocr/vietocr/config/base.yml')\n",
"vocab = Vocab(config['vocab'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' '"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"config['vocab'][-1]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0: a\n",
"1: A\n",
"2: à\n",
"3: À\n",
"4: ả\n",
"5: Ả\n",
"6: ã\n",
"7: Ã\n",
"8: á\n",
"9: Á\n",
"10: ạ\n",
"11: Ạ\n",
"12: ă\n",
"13: Ă\n",
"14: ằ\n",
"15: Ằ\n",
"16: ẳ\n",
"17: Ẳ\n",
"18: ẵ\n",
"19: Ẵ\n",
"20: ắ\n",
"21: Ắ\n",
"22: ặ\n",
"23: Ặ\n",
"24: â\n",
"25: Â\n",
"26: ầ\n",
"27: Ầ\n",
"28: ẩ\n",
"29: Ẩ\n",
"30: ẫ\n",
"31: Ẫ\n",
"32: ấ\n",
"33: Ấ\n",
"34: ậ\n",
"35: Ậ\n",
"36: b\n",
"37: B\n",
"38: c\n",
"39: C\n",
"40: d\n",
"41: D\n",
"42: đ\n",
"43: Đ\n",
"44: e\n",
"45: E\n",
"46: è\n",
"47: È\n",
"48: ẻ\n",
"49: Ẻ\n",
"50: ẽ\n",
"51: Ẽ\n",
"52: é\n",
"53: É\n",
"54: ẹ\n",
"55: Ẹ\n",
"56: ê\n",
"57: Ê\n",
"58: ề\n",
"59: Ề\n",
"60: ể\n",
"61: Ể\n",
"62: ễ\n",
"63: Ễ\n",
"64: ế\n",
"65: Ế\n",
"66: ệ\n",
"67: Ệ\n",
"68: f\n",
"69: F\n",
"70: g\n",
"71: G\n",
"72: h\n",
"73: H\n",
"74: i\n",
"75: I\n",
"76: ì\n",
"77: Ì\n",
"78: ỉ\n",
"79: Ỉ\n",
"80: ĩ\n",
"81: Ĩ\n",
"82: í\n",
"83: Í\n",
"84: ị\n",
"85: Ị\n",
"86: j\n",
"87: J\n",
"88: k\n",
"89: K\n",
"90: l\n",
"91: L\n",
"92: m\n",
"93: M\n",
"94: n\n",
"95: N\n",
"96: o\n",
"97: O\n",
"98: ò\n",
"99: Ò\n",
"100: ỏ\n",
"101: Ỏ\n",
"102: õ\n",
"103: Õ\n",
"104: ó\n",
"105: Ó\n",
"106: ọ\n",
"107: Ọ\n",
"108: ô\n",
"109: Ô\n",
"110: ồ\n",
"111: Ồ\n",
"112: ổ\n",
"113: Ổ\n",
"114: ỗ\n",
"115: Ỗ\n",
"116: ố\n",
"117: Ố\n",
"118: ộ\n",
"119: Ộ\n",
"120: ơ\n",
"121: Ơ\n",
"122: ờ\n",
"123: Ờ\n",
"124: ở\n",
"125: Ở\n",
"126: ỡ\n",
"127: Ỡ\n",
"128: ớ\n",
"129: Ớ\n",
"130: ợ\n",
"131: Ợ\n",
"132: p\n",
"133: P\n",
"134: q\n",
"135: Q\n",
"136: r\n",
"137: R\n",
"138: s\n",
"139: S\n",
"140: t\n",
"141: T\n",
"142: u\n",
"143: U\n",
"144: ù\n",
"145: Ù\n",
"146: ủ\n",
"147: Ủ\n",
"148: ũ\n",
"149: Ũ\n",
"150: ú\n",
"151: Ú\n",
"152: ụ\n",
"153: Ụ\n",
"154: ư\n",
"155: Ư\n",
"156: ừ\n",
"157: Ừ\n",
"158: ử\n",
"159: Ử\n",
"160: ữ\n",
"161: Ữ\n",
"162: ứ\n",
"163: Ứ\n",
"164: ự\n",
"165: Ự\n",
"166: v\n",
"167: V\n",
"168: w\n",
"169: W\n",
"170: x\n",
"171: X\n",
"172: y\n",
"173: Y\n",
"174: ỳ\n",
"175: Ỳ\n",
"176: ỷ\n",
"177: Ỷ\n",
"178: ỹ\n",
"179: Ỹ\n",
"180: ý\n",
"181: Ý\n",
"182: ỵ\n",
"183: Ỵ\n",
"184: z\n",
"185: Z\n",
"186: 0\n",
"187: 1\n",
"188: 2\n",
"189: 3\n",
"190: 4\n",
"191: 5\n",
"192: 6\n",
"193: 7\n",
"194: 8\n",
"195: 9\n",
"196: !\n",
"197: \"\n",
"198: #\n",
"199: $\n",
"200: %\n",
"201: &\n",
"202: '\n",
"203: (\n",
"204: )\n",
"205: *\n",
"206: +\n",
"207: ,\n",
"208: -\n",
"209: .\n",
"210: /\n",
"211: :\n",
"212: ;\n",
"213: <\n",
"214: =\n",
"215: >\n",
"216: ?\n",
"217: @\n",
"218: [\n",
"219: \\\n",
"220: ]\n",
"221: ^\n",
"222: _\n",
"223: `\n",
"224: {\n",
"225: |\n",
"226: }\n",
"227: ~\n",
"228: \n"
]
}
],
"source": [
"for i, c in enumerate(config['vocab']):\n",
" print(f'{i}: {c}')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"device = 'cuda:0'"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"image_path = '/home/anhcd/Data/data_vietocr_thangdm/images/+50-100__1721908725165.jpg'\n",
"image = cv2.imread(image_path)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"ename": "RuntimeError",
"evalue": "PytorchStreamReader failed reading zip archive: failed finding central directory",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m model_cnn \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjit\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/home/anhcd/Projects/vivas-viet-ocr/weights/cnn.onnx\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m 2\u001b[0m model_encoder \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mjit\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/home/anhcd/Projects/vivas-viet-ocr/weights/encoder.onnx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m 3\u001b[0m model_decoder \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mjit\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/home/anhcd/Projects/vivas-viet-ocr/weights/decoder.onnx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mto(device)\n",
"File \u001b[0;32m~/miniconda3/envs/vietocr/lib/python3.11/site-packages/torch/jit/_serialization.py:163\u001b[0m, in \u001b[0;36mload\u001b[0;34m(f, map_location, _extra_files, _restore_shapes)\u001b[0m\n\u001b[1;32m 161\u001b[0m cu \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39mCompilationUnit()\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(f, (\u001b[38;5;28mstr\u001b[39m, os\u001b[38;5;241m.\u001b[39mPathLike)):\n\u001b[0;32m--> 163\u001b[0m cpp_module \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_C\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_ir_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcu\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfspath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_location\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_extra_files\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_restore_shapes\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[call-arg]\u001b[39;00m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 165\u001b[0m cpp_module \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39mimport_ir_module_from_buffer(\n\u001b[1;32m 166\u001b[0m cu, f\u001b[38;5;241m.\u001b[39mread(), map_location, _extra_files, _restore_shapes\n\u001b[1;32m 167\u001b[0m ) \u001b[38;5;66;03m# type: ignore[call-arg]\u001b[39;00m\n",
"\u001b[0;31mRuntimeError\u001b[0m: PytorchStreamReader failed reading zip archive: failed finding central directory"
]
}
],
"source": [
"model_cnn = torch.jit.load(\"/home/anhcd/Projects/vivas-viet-ocr/weights/cnn.onnx\").to(device)\n",
"model_encoder = torch.jit.load(\"/home/anhcd/Projects/vivas-viet-ocr/weights/encoder.onnx\").to(device)\n",
"model_decoder = torch.jit.load(\"/home/anhcd/Projects/vivas-viet-ocr/weights/decoder.onnx\").to(device)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'model_cnn' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[11], line 19\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m#================================================================\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m#CNN\u001b[39;00m\n\u001b[1;32m 18\u001b[0m input_cnn \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mfrom_numpy(img)\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[0;32m---> 19\u001b[0m output_cnn \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_cnn\u001b[49m(input_cnn)\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# print(output_cnn)\u001b[39;00m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;66;03m#================================================================\u001b[39;00m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;66;03m#ENCODER\u001b[39;00m\n\u001b[1;32m 23\u001b[0m output_encoder, hiden \u001b[38;5;241m=\u001b[39m model_encoder(output_cnn\u001b[38;5;241m.\u001b[39mto(device))\n",
"\u001b[0;31mNameError\u001b[0m: name 'model_cnn' is not defined"
]
}
],
"source": [
"sos_token = 1\n",
"eos_token = 2\n",
"max_seq_length = 128\n",
"target_height = 32\n",
"translated_sentence = [[sos_token]] \n",
"\n",
"with torch.no_grad():\n",
" aspect_ratio = image.shape[1] / image.shape[0]\n",
" target_width = int(target_height * aspect_ratio)\n",
" resized_image = cv2.resize(image, (target_width, target_height))\n",
" # Convert the image to float32 and normalize\n",
" resized_image = resized_image.astype(np.float32) / 255.0\n",
" # Convert the image to a NumPy array and transpose to the desired shape\n",
" numpy_array = np.transpose(resized_image, (2, 0, 1)) # Transpose to (3, 32, length)\n",
" img = np.expand_dims(numpy_array, axis=0) # Add batch dimension\n",
" #================================================================\n",
" #CNN\n",
" input_cnn = torch.from_numpy(img).to(device)\n",
" output_cnn = model_cnn(input_cnn)\n",
" # print(output_cnn)\n",
" #================================================================\n",
" #ENCODER\n",
" output_encoder, hiden = model_encoder(output_cnn.to(device))\n",
" # print(output_encoder)\n",
" #================================================================\n",
" #DECODER\n",
" max_length = 0\n",
" while max_length <= max_seq_length and not all(\n",
" np.any(np.asarray(translated_sentence).T == eos_token, axis=1)\n",
" ):\n",
" tgt_inp = torch.tensor(translated_sentence[-1])\n",
" # decoder_input = torch.tensor(tgt_inp.to(device), hiden.to(device), output_encoder.to(device))\n",
"\n",
" output_decoder, hiden, _ = model_decoder(tgt_inp.to(device), hiden.to(device), output_encoder.to(device))\n",
"\n",
" values, indices = torch.topk(output_decoder, 1)\n",
" indices = indices[:, 0].tolist()\n",
" translated_sentence.append(indices)\n",
"\n",
" max_length+=1\n",
" del output_decoder\n",
"\n",
"translated_sentence = np.asarray(translated_sentence).T\n",
"print(translated_sentence)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result: 7.800 8 000\n"
]
}
],
"source": [
"s = vocab.decode(translated_sentence[0].tolist())\n",
"print(\"Result: \", s)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "dacocr",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/anhcd/Projects/vivas-viet-ocr/ConvertVietOcr2Onnx\n"
]
}
],
"source": [
"%cd ConvertVietOcr2Onnx/"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"from PIL import Image\n",
"from tool.config import Cfg\n",
"from tool.translate import build_model, process_input, translate\n",
"import torch\n",
"import onnxruntime\n",
"import numpy as np\n",
"\n",
"config = Cfg.load_config_from_file('/home/anhcd/Projects/vivas-viet-ocr/vgg-seq2seq.yml')\n",
"config['cnn']['pretrained']=False\n",
"config['device'] = 'cuda:0'\n",
"weight_path = '/tmp/vgg_seq2seq.pth'"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# build model\n",
"model, vocab = build_model(config)\n",
"\n",
"# load weight\n",
"model.load_state_dict(torch.load(weight_path, map_location=torch.device(config['device'])))\n",
"model = model.eval() "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Export mô hình CNN"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Exported graph: graph(%img : Float(1, 3, 32, *, strides=[45600, 15200, 475, 1], requires_grad=0, device=cuda:0),\n",
" %model.last_conv_1x1.weight : Float(256, 512, 1, 1, strides=[512, 1, 1, 1], requires_grad=1, device=cuda:0),\n",
" %model.last_conv_1x1.bias : Float(256, strides=[1], requires_grad=1, device=cuda:0),\n",
" %onnx::Conv_180 : Float(64, 3, 3, 3, strides=[27, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_181 : Float(64, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_183 : Float(64, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_184 : Float(64, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_186 : Float(128, 64, 3, 3, strides=[576, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_187 : Float(128, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_189 : Float(128, 128, 3, 3, strides=[1152, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_190 : Float(128, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_192 : Float(256, 128, 3, 3, strides=[1152, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_193 : Float(256, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_195 : Float(256, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_196 : Float(256, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_198 : Float(256, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_199 : Float(256, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_201 : Float(256, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_202 : Float(256, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_204 : Float(512, 256, 3, 3, strides=[2304, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_205 : Float(512, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_207 : Float(512, 512, 3, 3, strides=[4608, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_208 : Float(512, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_210 : Float(512, 512, 3, 3, strides=[4608, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_211 : Float(512, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_213 : Float(512, 512, 3, 3, strides=[4608, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_214 : Float(512, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_216 : Float(512, 512, 3, 3, strides=[4608, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_217 : Float(512, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_219 : Float(512, 512, 3, 3, strides=[4608, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_220 : Float(512, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_222 : Float(512, 512, 3, 3, strides=[4608, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_223 : Float(512, strides=[1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_225 : Float(512, 512, 3, 3, strides=[4608, 9, 3, 1], requires_grad=0, device=cuda:0),\n",
" %onnx::Conv_226 : Float(512, strides=[1], requires_grad=0, device=cuda:0)):\n",
" %/model/features/features.0/Conv_output_0 : Float(1, 64, 32, *, strides=[972800, 15200, 475, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.0/Conv\"](%img, %onnx::Conv_180, %onnx::Conv_181), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.0 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.2/Relu_output_0 : Float(1, 64, 32, *, strides=[972800, 15200, 475, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.2/Relu\"](%/model/features/features.0/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.2 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.3/Conv_output_0 : Float(1, 64, 32, *, strides=[972800, 15200, 475, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.3/Conv\"](%/model/features/features.2/Relu_output_0, %onnx::Conv_183, %onnx::Conv_184), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.3 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.5/Relu_output_0 : Float(1, 64, 32, *, strides=[972800, 15200, 475, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.5/Relu\"](%/model/features/features.3/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.5 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.6/AveragePool_output_0 : Float(1, 64, 16, *, strides=[242688, 3792, 237, 1], requires_grad=0, device=cuda:0) = onnx::AveragePool[ceil_mode=0, count_include_pad=1, kernel_shape=[2, 2], pads=[0, 0, 0, 0], strides=[2, 2], onnx_name=\"/model/features/features.6/AveragePool\"](%/model/features/features.5/Relu_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.pooling.AvgPool2d::features.6 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/pooling.py:631:0\n",
" %/model/features/features.7/Conv_output_0 : Float(1, 128, 16, *, strides=[485376, 3792, 237, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.7/Conv\"](%/model/features/features.6/AveragePool_output_0, %onnx::Conv_186, %onnx::Conv_187), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.7 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.9/Relu_output_0 : Float(1, 128, 16, *, strides=[485376, 3792, 237, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.9/Relu\"](%/model/features/features.7/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.9 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.10/Conv_output_0 : Float(1, 128, 16, *, strides=[485376, 3792, 237, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.10/Conv\"](%/model/features/features.9/Relu_output_0, %onnx::Conv_189, %onnx::Conv_190), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.10 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.12/Relu_output_0 : Float(1, 128, 16, *, strides=[485376, 3792, 237, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.12/Relu\"](%/model/features/features.10/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.12 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.13/AveragePool_output_0 : Float(1, 128, 8, *, strides=[120832, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::AveragePool[ceil_mode=0, count_include_pad=1, kernel_shape=[2, 2], pads=[0, 0, 0, 0], strides=[2, 2], onnx_name=\"/model/features/features.13/AveragePool\"](%/model/features/features.12/Relu_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.pooling.AvgPool2d::features.13 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/pooling.py:631:0\n",
" %/model/features/features.14/Conv_output_0 : Float(1, 256, 8, *, strides=[241664, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.14/Conv\"](%/model/features/features.13/AveragePool_output_0, %onnx::Conv_192, %onnx::Conv_193), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.14 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.16/Relu_output_0 : Float(1, 256, 8, *, strides=[241664, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.16/Relu\"](%/model/features/features.14/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.16 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.17/Conv_output_0 : Float(1, 256, 8, *, strides=[241664, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.17/Conv\"](%/model/features/features.16/Relu_output_0, %onnx::Conv_195, %onnx::Conv_196), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.17 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.19/Relu_output_0 : Float(1, 256, 8, *, strides=[241664, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.19/Relu\"](%/model/features/features.17/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.19 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.20/Conv_output_0 : Float(1, 256, 8, *, strides=[241664, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.20/Conv\"](%/model/features/features.19/Relu_output_0, %onnx::Conv_198, %onnx::Conv_199), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.20 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.22/Relu_output_0 : Float(1, 256, 8, *, strides=[241664, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.22/Relu\"](%/model/features/features.20/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.22 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.23/Conv_output_0 : Float(1, 256, 8, *, strides=[241664, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.23/Conv\"](%/model/features/features.22/Relu_output_0, %onnx::Conv_201, %onnx::Conv_202), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.23 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.25/Relu_output_0 : Float(1, 256, 8, *, strides=[241664, 944, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.25/Relu\"](%/model/features/features.23/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.25 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.26/AveragePool_output_0 : Float(1, 256, 4, *, strides=[120832, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::AveragePool[ceil_mode=0, count_include_pad=1, kernel_shape=[2, 1], pads=[0, 0, 0, 0], strides=[2, 1], onnx_name=\"/model/features/features.26/AveragePool\"](%/model/features/features.25/Relu_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.pooling.AvgPool2d::features.26 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/pooling.py:631:0\n",
" %/model/features/features.27/Conv_output_0 : Float(1, 512, 4, *, strides=[241664, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.27/Conv\"](%/model/features/features.26/AveragePool_output_0, %onnx::Conv_204, %onnx::Conv_205), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.27 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.29/Relu_output_0 : Float(1, 512, 4, *, strides=[241664, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.29/Relu\"](%/model/features/features.27/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.29 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.30/Conv_output_0 : Float(1, 512, 4, *, strides=[241664, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.30/Conv\"](%/model/features/features.29/Relu_output_0, %onnx::Conv_207, %onnx::Conv_208), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.30 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.32/Relu_output_0 : Float(1, 512, 4, *, strides=[241664, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.32/Relu\"](%/model/features/features.30/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.32 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.33/Conv_output_0 : Float(1, 512, 4, *, strides=[241664, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.33/Conv\"](%/model/features/features.32/Relu_output_0, %onnx::Conv_210, %onnx::Conv_211), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.33 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.35/Relu_output_0 : Float(1, 512, 4, *, strides=[241664, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.35/Relu\"](%/model/features/features.33/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.35 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.36/Conv_output_0 : Float(1, 512, 4, *, strides=[241664, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.36/Conv\"](%/model/features/features.35/Relu_output_0, %onnx::Conv_213, %onnx::Conv_214), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.36 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.38/Relu_output_0 : Float(1, 512, 4, *, strides=[241664, 472, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.38/Relu\"](%/model/features/features.36/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.38 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.39/AveragePool_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::AveragePool[ceil_mode=0, count_include_pad=1, kernel_shape=[2, 1], pads=[0, 0, 0, 0], strides=[2, 1], onnx_name=\"/model/features/features.39/AveragePool\"](%/model/features/features.38/Relu_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.pooling.AvgPool2d::features.39 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/pooling.py:631:0\n",
" %/model/features/features.40/Conv_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.40/Conv\"](%/model/features/features.39/AveragePool_output_0, %onnx::Conv_216, %onnx::Conv_217), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.40 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.42/Relu_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.42/Relu\"](%/model/features/features.40/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.42 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.43/Conv_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.43/Conv\"](%/model/features/features.42/Relu_output_0, %onnx::Conv_219, %onnx::Conv_220), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.43 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.45/Relu_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.45/Relu\"](%/model/features/features.43/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.45 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.46/Conv_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.46/Conv\"](%/model/features/features.45/Relu_output_0, %onnx::Conv_222, %onnx::Conv_223), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.46 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.48/Relu_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.48/Relu\"](%/model/features/features.46/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.48 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.49/Conv_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[3, 3], pads=[1, 1, 1, 1], strides=[1, 1], onnx_name=\"/model/features/features.49/Conv\"](%/model/features/features.48/Relu_output_0, %onnx::Conv_225, %onnx::Conv_226), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.conv.Conv2d::features.49 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/features/features.51/Relu_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Relu[onnx_name=\"/model/features/features.51/Relu\"](%/model/features/features.49/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.activation.ReLU::features.51 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/functional.py:1471:0\n",
" %/model/features/features.52/AveragePool_output_0 : Float(1, 512, 2, *, strides=[120832, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::AveragePool[ceil_mode=0, count_include_pad=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name=\"/model/features/features.52/AveragePool\"](%/model/features/features.51/Relu_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.container.Sequential::features/torch.nn.modules.pooling.AvgPool2d::features.52 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/pooling.py:631:0\n",
" %/model/last_conv_1x1/Conv_output_0 : Float(1, 256, 2, *, strides=[60416, 236, 118, 1], requires_grad=0, device=cuda:0) = onnx::Conv[dilations=[1, 1], group=1, kernel_shape=[1, 1], pads=[0, 0, 0, 0], strides=[1, 1], onnx_name=\"/model/last_conv_1x1/Conv\"](%/model/features/features.52/AveragePool_output_0, %model.last_conv_1x1.weight, %model.last_conv_1x1.bias), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model/torch.nn.modules.conv.Conv2d::last_conv_1x1 # /home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/nn/modules/conv.py:456:0\n",
" %/model/Transpose_output_0 : Float(1, 256, *, 2, strides=[60416, 236, 1, 118], requires_grad=0, device=cuda:0) = onnx::Transpose[perm=[0, 1, 3, 2], onnx_name=\"/model/Transpose\"](%/model/last_conv_1x1/Conv_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:40:0\n",
" %/model/Shape_output_0 : Long(4, strides=[1], device=cpu) = onnx::Shape[onnx_name=\"/model/Shape\"](%/model/Transpose_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:41:0\n",
" %/model/Constant_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={0}, onnx_name=\"/model/Constant\"](), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:41:0\n",
" %/model/Constant_1_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={0}, onnx_name=\"/model/Constant_1\"](), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:41:0\n",
" %/model/Constant_2_output_0 : Long(1, strides=[1], device=cpu) = onnx::Constant[value={2}, onnx_name=\"/model/Constant_2\"](), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:41:0\n",
" %/model/Slice_output_0 : Long(2, strides=[1], device=cpu) = onnx::Slice[onnx_name=\"/model/Slice\"](%/model/Shape_output_0, %/model/Constant_1_output_0, %/model/Constant_2_output_0, %/model/Constant_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:41:0\n",
" %/model/Constant_3_output_0 : Long(1, strides=[1], requires_grad=0, device=cpu) = onnx::Constant[value={-1}, onnx_name=\"/model/Constant_3\"](), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:41:0\n",
" %/model/Concat_output_0 : Long(3, strides=[1], device=cpu) = onnx::Concat[axis=0, onnx_name=\"/model/Concat\"](%/model/Slice_output_0, %/model/Constant_3_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:41:0\n",
" %/model/Reshape_output_0 : Float(*, *, *, strides=[60416, 236, 1], requires_grad=0, device=cuda:0) = onnx::Reshape[onnx_name=\"/model/Reshape\"](%/model/Transpose_output_0, %/model/Concat_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:41:0\n",
" %output : Float(*, *, *, strides=[1, 60416, 236], requires_grad=0, device=cuda:0) = onnx::Transpose[perm=[2, 0, 1], onnx_name=\"/model/Transpose_1\"](%/model/Reshape_output_0), scope: model.backbone.cnn.CNN::/model.backbone.vgg.Vgg::model # /home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/backbone/vgg.py:42:0\n",
" return (%output)\n",
"\n"
]
}
],
"source": [
"def convert_cnn_part(img, save_path, model): \n",
" with torch.no_grad(): \n",
" src = model.cnn(img)\n",
" torch.onnx.export(model.cnn, img, save_path, export_params=True, opset_version=12, do_constant_folding=True, verbose=True, input_names=['img'], output_names=['output'], dynamic_axes={'img': {3: 'lenght'}, 'output': {0: 'channel'}})\n",
" \n",
" return src\n",
" \n",
"img = torch.rand(1, 3, 32, 475).cuda()\n",
"src = convert_cnn_part(img, './weight/cnn.onnx', model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Export mô hình Encoder"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/anhcd/miniconda3/envs/dacocr/lib/python3.10/site-packages/torch/onnx/symbolic_opset9.py:4662: UserWarning: Exporting a model to ONNX with a batch_size other than 1, with a variable length with GRU can cause an error when running the ONNX model with a different batch size. Make sure to save the model with a batch size of 1, or define the initial states (h0/c0) as inputs of the model. \n",
" warnings.warn(\n"
]
}
],
"source": [
"def convert_encoder_part(model, src, save_path): \n",
" encoder_outputs, hidden = model.transformer.encoder(src) \n",
" torch.onnx.export(model.transformer.encoder, src, save_path, export_params=True, opset_version=11, do_constant_folding=True, input_names=['src'], output_names=['encoder_outputs', 'hidden'], dynamic_axes={'src':{0: \"channel_input\"}, 'encoder_outputs': {0: 'channel_output'}}) \n",
" return hidden, encoder_outputs\n",
" \n",
"hidden, encoder_outputs = convert_encoder_part(model, src, './weight/encoder.onnx')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Export mô hình Decoder"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/anhcd/Projects/dac_ocr/ConvertVietOcr2Onnx/model/seqmodel/seq2seq.py:93: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n",
" assert (output == hidden).all()\n"
]
}
],
"source": [
"def convert_decoder_part(model, tgt, hidden, encoder_outputs, save_path):\n",
" tgt = tgt[-1]\n",
" \n",
" torch.onnx.export(model.transformer.decoder,\n",
" (tgt, hidden, encoder_outputs),\n",
" save_path,\n",
" export_params=True,\n",
" opset_version=11,\n",
" do_constant_folding=True,\n",
" input_names=['tgt', 'hidden', 'encoder_outputs'],\n",
" output_names=['output', 'hidden_out', 'last'],\n",
" dynamic_axes={'encoder_outputs':{0: \"channel_input\"},\n",
" 'last': {0: 'channel_output'}})\n",
" \n",
"device = img.device\n",
"tgt = torch.LongTensor([[1] * len(img)]).to(device)\n",
"convert_decoder_part(model, tgt, hidden, encoder_outputs, './weight/decoder.onnx')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Kiểm tra mô hình sau khi chuyển đổi"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'graph main_graph (\\n %tgt[INT64, 1]\\n %hidden[FLOAT, 1x256]\\n %encoder_outputs[FLOAT, channel_inputx1x512]\\n) initializers (\\n %attention.attn.bias[FLOAT, 256]\\n %embedding.weight[FLOAT, 233x256]\\n %fc_out.weight[FLOAT, 233x1024]\\n %fc_out.bias[FLOAT, 233]\\n %onnx::MatMul_118[FLOAT, 768x256]\\n %onnx::MatMul_119[FLOAT, 256x1]\\n %onnx::GRU_137[FLOAT, 1x768x768]\\n %onnx::GRU_138[FLOAT, 1x768x256]\\n %onnx::GRU_139[FLOAT, 1x1536]\\n) {\\n %/Unsqueeze_output_0 = Unsqueeze[axes = [0]](%tgt)\\n %/embedding/Gather_output_0 = Gather(%embedding.weight, %/Unsqueeze_output_0)\\n %/attention/Shape_output_0 = Shape(%encoder_outputs)\\n %/attention/Constant_output_0 = Constant[value = <Scalar Tensor []>]()\\n %/attention/Gather_output_0 = Gather[axis = 0](%/attention/Shape_output_0, %/attention/Constant_output_0)\\n %/attention/Unsqueeze_output_0 = Unsqueeze[axes = [1]](%hidden)\\n %/attention/Constant_1_output_0 = Constant[value = <Tensor>]()\\n %/attention/Unsqueeze_1_output_0 = Unsqueeze[axes = [0]](%/attention/Gather_output_0)\\n %/attention/Constant_2_output_0 = Constant[value = <Tensor>]()\\n %/attention/Concat_output_0 = Concat[axis = 0](%/attention/Constant_1_output_0, %/attention/Unsqueeze_1_output_0, %/attention/Constant_2_output_0)\\n %/attention/Constant_3_output_0 = Constant[value = <Tensor>]()\\n %/attention/Unsqueeze_2_output_0 = Unsqueeze[axes = [0]](%/attention/Gather_output_0)\\n %/attention/Constant_4_output_0 = Constant[value = <Tensor>]()\\n %/attention/Concat_1_output_0 = Concat[axis = 0](%/attention/Constant_3_output_0, %/attention/Unsqueeze_2_output_0, %/attention/Constant_4_output_0)\\n %/attention/Shape_1_output_0 = Shape(%/attention/Concat_output_0)\\n %/attention/ConstantOfShape_output_0 = ConstantOfShape[value = <Tensor>](%/attention/Shape_1_output_0)\\n %/attention/Expand_output_0 = Expand(%/attention/Unsqueeze_output_0, %/attention/ConstantOfShape_output_0)\\n %/attention/Tile_output_0 = Tile(%/attention/Expand_output_0, %/attention/Concat_1_output_0)\\n %/attention/Transpose_output_0 = Transpose[perm = [1, 0, 2]](%encoder_outputs)\\n %/attention/Concat_2_output_0 = Concat[axis = 2](%/attention/Tile_output_0, %/attention/Transpose_output_0)\\n %/attention/attn/MatMul_output_0 = MatMul(%/attention/Concat_2_output_0, %onnx::MatMul_118)\\n %/attention/attn/Add_output_0 = Add(%attention.attn.bias, %/attention/attn/MatMul_output_0)\\n %/attention/Tanh_output_0 = Tanh(%/attention/attn/Add_output_0)\\n %/attention/v/MatMul_output_0 = MatMul(%/attention/Tanh_output_0, %onnx::MatMul_119)\\n %/attention/Squeeze_output_0 = Squeeze[axes = [2]](%/attention/v/MatMul_output_0)\\n %/attention/Softmax_output_0 = Softmax[axis = 1](%/attention/Squeeze_output_0)\\n %/Unsqueeze_1_output_0 = Unsqueeze[axes = [1]](%/attention/Softmax_output_0)\\n %/MatMul_output_0 = MatMul(%/Unsqueeze_1_output_0, %/attention/Transpose_output_0)\\n %/Transpose_output_0 = Transpose[perm = [1, 0, 2]](%/MatMul_output_0)\\n %/Concat_output_0 = Concat[axis = 2](%/embedding/Gather_output_0, %/Transpose_output_0)\\n %/Unsqueeze_2_output_0 = Unsqueeze[axes = [0]](%hidden)\\n %/rnn/GRU_output_0, %/rnn/GRU_output_1 = GRU[hidden_size = 256, linear_before_reset = 1](%/Concat_output_0, %onnx::GRU_137, %onnx::GRU_138, %onnx::GRU_139, %, %/Unsqueeze_2_output_0)\\n %/rnn/Squeeze_output_0 = Squeeze[axes = [1]](%/rnn/GRU_output_0)\\n %/Squeeze_output_0 = Squeeze[axes = [0]](%/embedding/Gather_output_0)\\n %/Squeeze_1_output_0 = Squeeze[axes = [0]](%/rnn/Squeeze_output_0)\\n %/Squeeze_2_output_0 = Squeeze[axes = [0]](%/Transpose_output_0)\\n %/Concat_1_output_0 = Concat[axis = 1](%/Squeeze_1_output_0, %/Squeeze_2_output_0, %/Squeeze_output_0)\\n %output = Gemm[alpha = 1, beta = 1, transB = 1](%/Concat_1_output_0, %fc_out.weight, %fc_out.bias)\\n %hidden_out = Squeeze[axes = [0]](%/rnn/GRU_output_1)\\n %last = Squeeze[axes = [1]](%/Unsqueeze_1_output_0)\\n return %output, %hidden_out, %last\\n}'"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import onnx\n",
"\n",
"# load model from onnx\n",
"cnn = onnx.load('./weight/cnn.onnx')\n",
"decoder = onnx.load('./weight/encoder.onnx')\n",
"encoder = onnx.load('./weight/decoder.onnx')\n",
"\n",
"# confirm model has valid schema\n",
"onnx.checker.check_model(cnn)\n",
"onnx.checker.check_model(decoder)\n",
"onnx.checker.check_model(encoder)\n",
"\n",
"# Print a human readable representation of the graph\n",
"onnx.helper.printable_graph(encoder.graph)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dự đoán cùng với ONNX Runtime"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import torch\n",
"import torchvision.transforms as transforms\n",
"\n",
"def translate_onnx(img, session, max_seq_length=128, sos_token=1, eos_token=2):\n",
" \"\"\"data: BxCxHxW\"\"\"\n",
" cnn_session, encoder_session, decoder_session = session\n",
" # Convert image to float32 and normalize\n",
" target_height = 32\n",
" aspect_ratio = img.shape[1] / img.shape[0]\n",
" target_width = int(target_height * aspect_ratio)\n",
" resized_image = cv2.resize(img, (target_width, target_height))\n",
" # Convert the image to float32 and normalize\n",
" resized_image = resized_image.astype(np.float32) / 255.0\n",
" # Convert the image to a NumPy array and transpose to the desired shape\n",
" numpy_array = np.transpose(resized_image, (2, 0, 1)) # Transpose to (3, 32, length)\n",
" img = np.expand_dims(numpy_array, axis=0) # Add batch dimension\n",
" \n",
" # create cnn input\n",
" cnn_input = {cnn_session.get_inputs()[0].name: img}\n",
" src = cnn_session.run(None, cnn_input)\n",
" \n",
" # create encoder input\n",
" encoder_input = {encoder_session.get_inputs()[0].name: src[0]}\n",
" encoder_outputs, hidden = encoder_session.run(None, encoder_input)\n",
" translated_sentence = [[sos_token] * len(img)]\n",
" max_length = 0\n",
"\n",
" while max_length <= max_seq_length and not all(\n",
" np.any(np.asarray(translated_sentence).T == eos_token, axis=1)\n",
" ):\n",
" tgt_inp = translated_sentence\n",
" decoder_input = {decoder_session.get_inputs()[0].name: tgt_inp[-1], decoder_session.get_inputs()[1].name: hidden, decoder_session.get_inputs()[2].name: encoder_outputs}\n",
"\n",
" output, hidden, _ = decoder_session.run(None, decoder_input)\n",
" output = np.expand_dims(output, axis=1)\n",
" output = torch.Tensor(output)\n",
"\n",
" values, indices = torch.topk(output, 1)\n",
" indices = indices[:, -1, 0]\n",
" indices = indices.tolist()\n",
"\n",
" translated_sentence.append(indices)\n",
" max_length += 1\n",
"\n",
" del output\n",
"\n",
" translated_sentence = np.asarray(translated_sentence).T\n",
"\n",
" return translated_sentence\n",
"\n",
"# create inference session\n",
"cnn_session = onnxruntime.InferenceSession(\"/home/anhcd/Projects/vivas-viet-ocr/weights/cnn.onnx\")\n",
"encoder_session = onnxruntime.InferenceSession(\"/home/anhcd/Projects/vivas-viet-ocr/weights/encoder.onnx\")\n",
"decoder_session = onnxruntime.InferenceSession(\"/home/anhcd/Projects/vivas-viet-ocr/weights/decoder.onnx\")\n",
"\n",
"session = (cnn_session, encoder_session, decoder_session)\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"image_path = '/home/anhcd/Data/data_vietocr_thangdm/images/+50-100__1721908725165.jpg'\n",
"image = cv2.imread(image_path)\n",
"print(image.shape)\n",
"s = translate_onnx(image, session)[0].tolist()\n",
"s = vocab.decode(s)\n",
"print(\"Result: \", s)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "dacocr",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment