Skip to content

Commit 6a9b5cb

Browse files
authored
Add Gwoyeu Romatzyh (#343)
* Add Gwoyeu Romatzyh * Gwoyeu Romatzyh fix * Fix ěi, ün and ch * Test
1 parent e42dede commit 6a9b5cb

File tree

4 files changed

+90
-0
lines changed

4 files changed

+90
-0
lines changed

pypinyin/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ class Style(IntEnum):
9999
CYRILLIC_FIRST = 13
100100
#: 威妥玛拼音/韦氏拼音/威式拼音风格,无声调
101101
WADEGILES = 14
102+
#: 国语罗马字风格。如:中国 -> ``jong gwo``
103+
GWOYEU = 15
102104

103105

104106
NORMAL = STYLE_NORMAL = Style.NORMAL

pypinyin/style/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,5 @@ def auto_discover():
7070
cyrillic,
7171
wadegiles,
7272
others,
73+
gwoyeu,
7374
)

pypinyin/style/gwoyeu.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import unicode_literals
3+
import re
4+
5+
from pypinyin.constants import Style
6+
from pypinyin.style import register
7+
from pypinyin.style._constants import RE_TONE3
8+
from pypinyin.style._utils import replace_symbol_to_number
9+
10+
GWOYEU_REPLACE = (
11+
(re.compile(r'^r5$'), 'er5'),
12+
(re.compile(r'iu'), 'iou'),
13+
(re.compile(r'ao'), 'au'),
14+
(re.compile(r'^yi?'), 'i'),
15+
(re.compile(r'^wu?'), 'u'),
16+
(re.compile(r'^([jqx])u'), '\\1iu'),
17+
(re.compile(r'(?<![iy])u([in])'), 'ue\\1'),
18+
(re.compile(r'v'), 'iu'),
19+
(re.compile(r'^([zcsr]h?)i'), '\\1y'),
20+
(re.compile(r'^zh'), 'j'),
21+
(re.compile(r'^z'), 'tz'),
22+
(re.compile(r'^c(?!h)'), 'ts'),
23+
(re.compile(r'^q'), 'ch'),
24+
(re.compile(r'^x'), 'sh'),
25+
(re.compile(r'er'), 'el'),
26+
(re.compile(r'5$'), ''),
27+
(re.compile(r'0$'), 'q'),
28+
(re.compile(r'^i(.*[34])$'), 'yi\\1'),
29+
(re.compile(r'^u(.*[34])$'), 'wu\\1'),
30+
(re.compile(r'^yi([aeu].*4)$'), 'y\\1'),
31+
(re.compile(r'^wu([ae].*4)$'), 'w\\1'),
32+
)
33+
34+
TONE_REPLACE = (
35+
(re.compile(r'^([lmnr])(.+)1$'), '\\1h\\2'),
36+
(re.compile(r'1$'), ''),
37+
(re.compile(r'^([lmnr])(.+)2$'), '\\1\\2'),
38+
(re.compile(r'^([^ae]*)i(ng?)*2$'), '\\1yi\\2'),
39+
(re.compile(r'^([^ao]*)u2$'), '\\1wu'),
40+
(re.compile(r'^([^ae]*)i(.+)2$'), '\\1y\\2'),
41+
(re.compile(r'^([^ao]*)u(.+)2$'), '\\1w\\2'),
42+
(re.compile(r'([aeiouy]+)(.*)2$'), '\\1r\\2'),
43+
(re.compile(r'^([^aeiou]*)([iu])(ng?)?3$'), '\\1\\2\\2\\3'),
44+
(re.compile(r'^([^eu]*)i(.*)3$'), '\\1e\\2'),
45+
(re.compile(r'^(.*)u(.*)3$'), '\\1o\\2'),
46+
(re.compile(r'([aeiouy])(.*)3$'), '\\1\\1\\2'),
47+
(re.compile(r'^([^ae]*)i4$'), '\\1ih'),
48+
(re.compile(r'^([^ao]*)u4$'), '\\1uh'),
49+
(re.compile(r'i4$'), 'y'),
50+
(re.compile(r'u4$'), 'w'),
51+
(re.compile(r'l4$'), 'll'),
52+
(re.compile(r'ng4$'), 'nq'),
53+
(re.compile(r'n4$'), 'nn'),
54+
(re.compile(r'4$'), 'h'),
55+
)
56+
57+
58+
class GwoyeuConverter(object):
59+
def _pre_convert(self, pinyin):
60+
# 用数字表示声调
61+
pinyin = replace_symbol_to_number(pinyin)
62+
# 将声调数字移动到最后
63+
return RE_TONE3.sub(r'\1\3\2', pinyin)
64+
65+
def to_gwoyeu(self, pinyin, **kwargs):
66+
pinyin = self._pre_convert(pinyin)
67+
for find_re, replace in GWOYEU_REPLACE:
68+
pinyin = find_re.sub(replace, pinyin)
69+
for find_re, replace in TONE_REPLACE:
70+
if find_re.search(pinyin):
71+
return find_re.sub(replace, pinyin)
72+
return pinyin
73+
74+
75+
converter = GwoyeuConverter()
76+
77+
register(Style.GWOYEU, func=converter.to_gwoyeu)

pypinyin/style/gwoyeu.pyi

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from typing import Any, Text, Tuple
2+
3+
GWOYEU_REPLACE = ... # type: Tuple[Tuple[Any]]
4+
5+
class GwoyeuConverter(object):
6+
def to_wade_glides(self, pinyin: Text, **kwargs: Any) -> Text: ...
7+
8+
def _pre_convert(self, pinyin: Text) -> Text: ...
9+
10+
converter = ... # type: GwoyeuConverter

0 commit comments

Comments
 (0)