From 9fde1ff772e6101ecfe29d2ecc37f084ba9d3504 Mon Sep 17 00:00:00 2001 From: chai2010 Date: Fri, 18 Dec 2015 14:26:55 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E5=96=84=20zh2tw,=20=E5=87=8F?= =?UTF-8?q?=E5=B0=91map=E7=9A=84=E4=B9=B1=E5=BA=8F=E5=BE=AA=E7=8E=AF?= =?UTF-8?q?=E5=AF=B9=E7=BB=93=E6=9E=9C=E4=BA=A7=E7=94=9F=E7=9A=84=E5=BD=B1?= =?UTF-8?q?=E5=93=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 5 +++++ zh2tw.go | 39 +++++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 9e014a5..a89c5d4 100644 --- a/Makefile +++ b/Makefile @@ -16,3 +16,8 @@ zh2tw: tw2zh: go run zh2tw.go . .md$$ tw2zh + +loop: + go run zh2tw.go . .md$$ tw2zh + go run zh2tw.go . .md$$ zh2tw + diff --git a/zh2tw.go b/zh2tw.go index 0d5a659..1288bb0 100644 --- a/zh2tw.go +++ b/zh2tw.go @@ -25,6 +25,7 @@ import ( "os" "path/filepath" "regexp" + "sort" "unicode/utf8" ) @@ -158,25 +159,24 @@ func tw2zh(s string) string { } func init() { - // 剔除出现多次的字符 - vvMap := make(map[rune]int) - for k, v := range _TSCharactersMap { - vvMap[k]++ - vvMap[v]++ + // 作为map键的繁体没有重复 + // 但一个繁体可能对应多个简体, 需要按照key字典顺序导入 + // 只保留根据key字典顺序一个出现的简体 + kkMap := make([]int, 0, len(_TSCharactersMap)) + for k, _ := range _TSCharactersMap { + kkMap = append(kkMap, int(k)) } - for k, v := range _TSCharactersMap { - if vvMap[k] > 1 { - delete(_TSCharactersMap, k) - } - if vvMap[v] > 1 { - delete(_TSCharactersMap, v) - } - } - for k, v := range _TSCharactersMap { + sort.Ints(kkMap) + + // 导入初始转换表 + for _, k := range kkMap { + k := rune(k) + v := _TSCharactersMap[k] + tw2zhMap[k] = v zh2twMap[v] = k } - // 修正错误的转换 + // 修正错误的转换(仅简体到繁体) for k, v := range zh2twMapPatch { zh2twMap[k] = v } @@ -184,7 +184,7 @@ func init() { var ( zh2twMap = make(map[rune]rune) - tw2zhMap = _TSCharactersMap + tw2zhMap = make(map[rune]rune) ) // 修正错误的转换 @@ -196,8 +196,11 @@ var zh2twMapPatch = map[rune]rune{ '同': '同', '向': '向', '合': '合', - '針': '针', - '別': '别', + '针': '針', + '别': '别', + '个': '個', + '家': '家', + '当': '當', } var _TSCharactersMap = map[rune]rune{