From 6a4565f4bbd4ffd0cfb5bfb24d13f75ad32fe429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Haoran=20S=2E=20Diao=20=28=E5=88=81=E6=B5=A9=E7=84=B6=29?= <0@hairydiode.xyz> Date: Thu, 23 Nov 2023 04:11:58 -0800 Subject: =?UTF-8?q?=E5=88=9D=E5=BF=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 README (limited to 'README') diff --git a/README b/README new file mode 100644 index 0000000..103738f --- /dev/null +++ b/README @@ -0,0 +1,30 @@ +Unihan Database from + https://www.unicode.org/Public/UNIDATA/ +Unihan_DictionaryLikeData.txt + has all the four corners info + +Logo genrrated here: + https://www.zhuanshuti.cn/3 + +grep kFourCornerCode +delete comment line +:%s/\(.*\s.*\s\)\(.*\)\s\(.*\)/\1\2\r\1\3/ + removes duplicate four corners + +clean it up so its +12345 U+212121 +then turn to echo $'123445 \u12341' +using two seperate subsitutes for 4 and 5 char +change to 8 character length + +then use the following to convert to actual unicode +:%s/^\(.*\)\t\(.*\)$/echo -e "\1\\t$(echo \2 |xxd -r -ps -u | iconv -f UTF-32BE -t UTF-8)"/ + +xxd -r -ps -u | iconv -f UTF-32BE -t UTF-8 + coverts fro U+code to normal, need to pad to 32bits + +create tempalte according to usr/share/ibus-input/tables/template.txt + + +cat four | awk '{print $1}' | sort | uniq -c | sort -n + counts 71 conflicts at most -- cgit v1.1