# -*- coding: utf-8 -*-
#
# This file is part of ibus-bogo project.
#
# Copyright (C) 2012 Long T. Dam <longdt90@gmail.com>
# Copyright (C) 2012-2013 Trung Ngo <ndtrung4419@gmail.com>
# Copyright (C) 2013 Duong H. Nguyen <cmpitg@gmail.com>
#
# ibus-bogo is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ibus-bogo is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ibus-bogo. If not, see <http://www.gnu.org/licenses/>.
#
from __future__ import unicode_literals
VOWELS = "àáảãạaằắẳẵặăầấẩẫậâèéẻẽẹeềếểễệêìíỉĩịi" + \
"òóỏõọoồốổỗộôờớởỡợơùúủũụuừứửữựưỳýỷỹỵy"
[docs]def join(alist):
return "".join(alist)
[docs]def is_vowel(char):
char = char.lower()
return char in VOWELS
[docs]def change_case(string, case):
"""
Helper: Return new string obtained from change the given string to
desired case.
Args
string
case - 0: lower, 1: upper
"""
return string.upper() if case else string.lower()
[docs]def append_comps(comps, char):
"""
Append a character to `comps` following this rule: a vowel is added to the
vowel part if there is no last consonant, else to the last consonant part;
a consonant is added to the first consonant part if there is no vowel, and
to the last consonant part if the vowel part is not empty.
>>> transform(['', '', ''])
['c', '', '']
>>> transform(['c', '', ''], '+o')
['c', 'o', '']
>>> transform(['c', 'o', ''], '+n')
['c', 'o', 'n']
>>> transform(['c', 'o', 'n'], '+o')
['c', 'o', 'no']
"""
c = list(comps)
if is_vowel(char):
if not c[2]: pos = 1
else: pos = 2
else:
if not c[2] and not c[1]: pos = 0
else: pos = 2
c[pos] += char
return c
# def gibberish_split(head, tail=""):
# """
# Try to split a string into two parts: the alphabetic part at the end and the
# rest.
# >>> gibberish_split("aoeu")
# ("", "aoeu")
# >>> gibberish_split("ao.eu")
# ("ao.", "eu")
# >>> gibberish_split("aoeu.")
# ("aoeu.", "")
# """
# if head == "" or not head[-1].isalpha():
# return (head, tail)
# else:
# return gibberish_split(head[:-1], head[-1] + tail)
[docs]def separate(string):
"""
Separate a string into smaller parts: first consonant (or head), vowel,
last consonant (if any).
>>> separate('tuong')
['t','uo','ng']
>>> separate('ohmyfkinggod')
['ohmyfkingg','o','d']
"""
def atomic_separate(string, last_chars, last_is_vowel):
if string == "" or (last_is_vowel != is_vowel(string[-1])):
return (string, last_chars)
else:
return atomic_separate(string[:-1],
string[-1] + last_chars, last_is_vowel)
head, last_consonant = atomic_separate(string, "", False)
first_consonant, vowel = atomic_separate(head, "", True)
if last_consonant and not (vowel + first_consonant):
comps = [last_consonant, '', ''] # ['', '', b] -> ['b', '', '']
else:
comps = [first_consonant, vowel, last_consonant]
# 'gi' and 'qu' are considered qualified consonants.
# We want something like this:
# ['g', 'ia', ''] -> ['gi', 'a', '']
# ['q', 'ua', ''] -> ['qu', 'a', '']
if (comps[0] != '' and comps[1] != '') and \
((comps[0] in 'gG' and comps[1][0] in 'iI' and len(comps[1]) > 1) or
(comps[0] in 'qQ' and comps[1][0] in 'uU')):
comps[0] += comps[1][:1]
comps[1] = comps[1][1:]
return comps