|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +from __future__ import unicode_literals |
| 3 | +import re |
| 4 | + |
| 5 | +from pypinyin.constants import Style |
| 6 | +from pypinyin.style import register |
| 7 | +from pypinyin.style._constants import RE_TONE3 |
| 8 | +from pypinyin.style._utils import replace_symbol_to_number |
| 9 | + |
| 10 | +GWOYEU_REPLACE = ( |
| 11 | + (re.compile(r'^r5$'), 'er5'), |
| 12 | + (re.compile(r'iu'), 'iou'), |
| 13 | + (re.compile(r'ao'), 'au'), |
| 14 | + (re.compile(r'^yi?'), 'i'), |
| 15 | + (re.compile(r'^wu?'), 'u'), |
| 16 | + (re.compile(r'^([jqx])u'), '\\1iu'), |
| 17 | + (re.compile(r'(?<![iy])u([in])'), 'ue\\1'), |
| 18 | + (re.compile(r'v'), 'iu'), |
| 19 | + (re.compile(r'^([zcsr]h?)i'), '\\1y'), |
| 20 | + (re.compile(r'^zh'), 'j'), |
| 21 | + (re.compile(r'^z'), 'tz'), |
| 22 | + (re.compile(r'^c(?!h)'), 'ts'), |
| 23 | + (re.compile(r'^q'), 'ch'), |
| 24 | + (re.compile(r'^x'), 'sh'), |
| 25 | + (re.compile(r'er'), 'el'), |
| 26 | + (re.compile(r'5$'), ''), |
| 27 | + (re.compile(r'0$'), 'q'), |
| 28 | + (re.compile(r'^i(.*[34])$'), 'yi\\1'), |
| 29 | + (re.compile(r'^u(.*[34])$'), 'wu\\1'), |
| 30 | + (re.compile(r'^yi([aeu].*4)$'), 'y\\1'), |
| 31 | + (re.compile(r'^wu([ae].*4)$'), 'w\\1'), |
| 32 | +) |
| 33 | + |
| 34 | +TONE_REPLACE = ( |
| 35 | + (re.compile(r'^([lmnr])(.+)1$'), '\\1h\\2'), |
| 36 | + (re.compile(r'1$'), ''), |
| 37 | + (re.compile(r'^([lmnr])(.+)2$'), '\\1\\2'), |
| 38 | + (re.compile(r'^([^ae]*)i(ng?)*2$'), '\\1yi\\2'), |
| 39 | + (re.compile(r'^([^ao]*)u2$'), '\\1wu'), |
| 40 | + (re.compile(r'^([^ae]*)i(.+)2$'), '\\1y\\2'), |
| 41 | + (re.compile(r'^([^ao]*)u(.+)2$'), '\\1w\\2'), |
| 42 | + (re.compile(r'([aeiouy]+)(.*)2$'), '\\1r\\2'), |
| 43 | + (re.compile(r'^([^aeiou]*)([iu])(ng?)?3$'), '\\1\\2\\2\\3'), |
| 44 | + (re.compile(r'^([^eu]*)i(.*)3$'), '\\1e\\2'), |
| 45 | + (re.compile(r'^(.*)u(.*)3$'), '\\1o\\2'), |
| 46 | + (re.compile(r'([aeiouy])(.*)3$'), '\\1\\1\\2'), |
| 47 | + (re.compile(r'^([^ae]*)i4$'), '\\1ih'), |
| 48 | + (re.compile(r'^([^ao]*)u4$'), '\\1uh'), |
| 49 | + (re.compile(r'i4$'), 'y'), |
| 50 | + (re.compile(r'u4$'), 'w'), |
| 51 | + (re.compile(r'l4$'), 'll'), |
| 52 | + (re.compile(r'ng4$'), 'nq'), |
| 53 | + (re.compile(r'n4$'), 'nn'), |
| 54 | + (re.compile(r'4$'), 'h'), |
| 55 | +) |
| 56 | + |
| 57 | + |
| 58 | +class GwoyeuConverter(object): |
| 59 | + def _pre_convert(self, pinyin): |
| 60 | + # 用数字表示声调 |
| 61 | + pinyin = replace_symbol_to_number(pinyin) |
| 62 | + # 将声调数字移动到最后 |
| 63 | + return RE_TONE3.sub(r'\1\3\2', pinyin) |
| 64 | + |
| 65 | + def to_gwoyeu(self, pinyin, **kwargs): |
| 66 | + pinyin = self._pre_convert(pinyin) |
| 67 | + for find_re, replace in GWOYEU_REPLACE: |
| 68 | + pinyin = find_re.sub(replace, pinyin) |
| 69 | + for find_re, replace in TONE_REPLACE: |
| 70 | + if find_re.search(pinyin): |
| 71 | + return find_re.sub(replace, pinyin) |
| 72 | + return pinyin |
| 73 | + |
| 74 | + |
| 75 | +converter = GwoyeuConverter() |
| 76 | + |
| 77 | +register(Style.GWOYEU, func=converter.to_gwoyeu) |
0 commit comments