Commit 7a3e3b97 authored by Cao Duc Anh's avatar Cao Duc Anh

reupdate

parent 8099e7e9
This diff is collapsed.
......@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
......@@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -67,7 +67,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
......@@ -77,7 +77,273 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"' '"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"config['vocab'][-1]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0: a\n",
"1: A\n",
"2: à\n",
"3: À\n",
"4: ả\n",
"5: Ả\n",
"6: ã\n",
"7: Ã\n",
"8: á\n",
"9: Á\n",
"10: ạ\n",
"11: Ạ\n",
"12: ă\n",
"13: Ă\n",
"14: ằ\n",
"15: Ằ\n",
"16: ẳ\n",
"17: Ẳ\n",
"18: ẵ\n",
"19: Ẵ\n",
"20: ắ\n",
"21: Ắ\n",
"22: ặ\n",
"23: Ặ\n",
"24: â\n",
"25: Â\n",
"26: ầ\n",
"27: Ầ\n",
"28: ẩ\n",
"29: Ẩ\n",
"30: ẫ\n",
"31: Ẫ\n",
"32: ấ\n",
"33: Ấ\n",
"34: ậ\n",
"35: Ậ\n",
"36: b\n",
"37: B\n",
"38: c\n",
"39: C\n",
"40: d\n",
"41: D\n",
"42: đ\n",
"43: Đ\n",
"44: e\n",
"45: E\n",
"46: è\n",
"47: È\n",
"48: ẻ\n",
"49: Ẻ\n",
"50: ẽ\n",
"51: Ẽ\n",
"52: é\n",
"53: É\n",
"54: ẹ\n",
"55: Ẹ\n",
"56: ê\n",
"57: Ê\n",
"58: ề\n",
"59: Ề\n",
"60: ể\n",
"61: Ể\n",
"62: ễ\n",
"63: Ễ\n",
"64: ế\n",
"65: Ế\n",
"66: ệ\n",
"67: Ệ\n",
"68: f\n",
"69: F\n",
"70: g\n",
"71: G\n",
"72: h\n",
"73: H\n",
"74: i\n",
"75: I\n",
"76: ì\n",
"77: Ì\n",
"78: ỉ\n",
"79: Ỉ\n",
"80: ĩ\n",
"81: Ĩ\n",
"82: í\n",
"83: Í\n",
"84: ị\n",
"85: Ị\n",
"86: j\n",
"87: J\n",
"88: k\n",
"89: K\n",
"90: l\n",
"91: L\n",
"92: m\n",
"93: M\n",
"94: n\n",
"95: N\n",
"96: o\n",
"97: O\n",
"98: ò\n",
"99: Ò\n",
"100: ỏ\n",
"101: Ỏ\n",
"102: õ\n",
"103: Õ\n",
"104: ó\n",
"105: Ó\n",
"106: ọ\n",
"107: Ọ\n",
"108: ô\n",
"109: Ô\n",
"110: ồ\n",
"111: Ồ\n",
"112: ổ\n",
"113: Ổ\n",
"114: ỗ\n",
"115: Ỗ\n",
"116: ố\n",
"117: Ố\n",
"118: ộ\n",
"119: Ộ\n",
"120: ơ\n",
"121: Ơ\n",
"122: ờ\n",
"123: Ờ\n",
"124: ở\n",
"125: Ở\n",
"126: ỡ\n",
"127: Ỡ\n",
"128: ớ\n",
"129: Ớ\n",
"130: ợ\n",
"131: Ợ\n",
"132: p\n",
"133: P\n",
"134: q\n",
"135: Q\n",
"136: r\n",
"137: R\n",
"138: s\n",
"139: S\n",
"140: t\n",
"141: T\n",
"142: u\n",
"143: U\n",
"144: ù\n",
"145: Ù\n",
"146: ủ\n",
"147: Ủ\n",
"148: ũ\n",
"149: Ũ\n",
"150: ú\n",
"151: Ú\n",
"152: ụ\n",
"153: Ụ\n",
"154: ư\n",
"155: Ư\n",
"156: ừ\n",
"157: Ừ\n",
"158: ử\n",
"159: Ử\n",
"160: ữ\n",
"161: Ữ\n",
"162: ứ\n",
"163: Ứ\n",
"164: ự\n",
"165: Ự\n",
"166: v\n",
"167: V\n",
"168: w\n",
"169: W\n",
"170: x\n",
"171: X\n",
"172: y\n",
"173: Y\n",
"174: ỳ\n",
"175: Ỳ\n",
"176: ỷ\n",
"177: Ỷ\n",
"178: ỹ\n",
"179: Ỹ\n",
"180: ý\n",
"181: Ý\n",
"182: ỵ\n",
"183: Ỵ\n",
"184: z\n",
"185: Z\n",
"186: 0\n",
"187: 1\n",
"188: 2\n",
"189: 3\n",
"190: 4\n",
"191: 5\n",
"192: 6\n",
"193: 7\n",
"194: 8\n",
"195: 9\n",
"196: !\n",
"197: \"\n",
"198: #\n",
"199: $\n",
"200: %\n",
"201: &\n",
"202: '\n",
"203: (\n",
"204: )\n",
"205: *\n",
"206: +\n",
"207: ,\n",
"208: -\n",
"209: .\n",
"210: /\n",
"211: :\n",
"212: ;\n",
"213: <\n",
"214: =\n",
"215: >\n",
"216: ?\n",
"217: @\n",
"218: [\n",
"219: \\\n",
"220: ]\n",
"221: ^\n",
"222: _\n",
"223: `\n",
"224: {\n",
"225: |\n",
"226: }\n",
"227: ~\n",
"228: \n"
]
}
],
"source": [
"for i, c in enumerate(config['vocab']):\n",
" print(f'{i}: {c}')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -185,6 +451,25 @@
"s = vocab.decode(translated_sentence[0].tolist())\n",
"print(\"Result: \", s)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Đồng Lý, Lý Nhân, Hà Nam\n"
]
}
],
"source": [
"s = [1, 47, 114, 98, 74, 232, 95, 184, 211, 232, 95, 184, 232, 99, 76, 28, 98, 211, 232, 77, 6, 232, 99, 4, 96, 2]\n",
"s = vocab.decode(s)\n",
"print(s)"
]
}
],
"metadata": {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment