Source code for annotations.Syll.rules
# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.annotations.Syll.rules.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: Rules of the syllabification system.
.. _This file is part of SPPAS: http://www.sppas.org/
..
-------------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
-------------------------------------------------------------------------
"""
from sppas.src.config import symbols
from sppas.src.config import separators
from sppas.src.config import sppasUnicode
# ----------------------------------------------------------------------------
[docs]class SyllRules(object):
"""Manager of a set of rules for syllabification.
The rules we propose follow usual phonological statements for most of the
corpus. A configuration file indicates phonemes, classes and rules.
This file can be edited and modified to adapt the syllabification.
The syllable configuration file is a simple ASCII text file that the user
can change as needed.
"""
BREAK_SYMBOL = "#"
# -----------------------------------------------------------------------
[docs] def __init__(self, filename=None):
"""Create a new SyllRules instance.
:param filename: (str) Name of the file with the rules.
"""
self.general = dict() # list of general rules
self.exception = dict() # list of exception rules
self.gap = dict() # list of gap rules
self.phonclass = dict() # list of tuple (phoneme, classe)
if filename is not None:
self.load(filename)
else:
self.reset()
# ------------------------------------------------------------------------
[docs] def reset(self):
"""Reset the set of rules."""
self.general = dict() # list of general rules
self.general["VV"] = 0
self.general["VXV"] = 0
self.general["VXXV"] = 1
self.general["VXXXV"] = 1
self.general["VXXXXV"] = 1
self.general["VXXXXXV"] = 2
self.general["VXXXXXV"] = 3
self.general["VXXXXXXV"] = 3
self.exception = dict() # list of exception rules
self.gap = dict() # list of gap rules
self.phonclass = dict() # list of tuple (phoneme, class)
for phone in symbols.all:
self.phonclass[phone] = SyllRules.BREAK_SYMBOL
# ------------------------------------------------------------------------
[docs] def load(self, filename):
"""Load the rules from a file.
:param filename: (str) Name of the file with the rules.
"""
self.reset()
with open(filename, "r") as f:
lines = f.readlines()
f.close()
for line_nb, line in enumerate(lines, 1):
sp = sppasUnicode(line)
line = sp.to_strip()
wds = line.split()
if len(wds) == 3:
if wds[0] == "PHONCLASS":
self.phonclass[wds[1]] = wds[2]
elif wds[0] == "GENRULE":
self.general[wds[1]] = int(wds[2])
elif wds[0] == "EXCRULE":
self.exception[wds[1]] = int(wds[2])
if len(wds) == 7:
if wds[0] == "OTHRULE":
s = " ".join(wds[1:6])
self.gap[s] = int(wds[6])
# ------------------------------------------------------------------------
[docs] def get_class(self, phoneme):
"""Return the class identifier of the phoneme.
If the phoneme is unknown, the break symbol is returned.
:param phoneme: (str) A phoneme
:returns: class of the phoneme or break symbol
"""
return self.phonclass.get(phoneme, SyllRules.BREAK_SYMBOL)
# ------------------------------------------------------------------------
[docs] def is_exception(self, rule):
"""Return True if the rule is an exception rule.
:param rule: (str)
"""
return rule in self.exception
# ------------------------------------------------------------------------
[docs] def get_boundary(self, phonemes):
"""Get the index of the syllable boundary (EXCRULES or GENRULES).
Phonemes are separated with the symbol defined by separators.phonemes
variable.
:param phonemes: (str) Sequence of phonemes to syllabify
:returns: (int) boundary index or -1 if phonemes don't match any rule.
"""
sp = sppasUnicode(phonemes)
phonemes = sp.to_strip()
phon_list = phonemes.split(separators.phonemes)
classes = ""
for phon in phon_list:
classes += self.get_class(phon)
# search into exception
if classes in self.exception:
return self.exception[classes]
# search into general
for key, val in self.general.items():
if len(key) == len(phon_list):
return val
return -1
# ------------------------------------------------------------------------
[docs] def get_class_rules_boundary(self, classes):
"""Get the index of the syllable boundary (EXCRULES or GENRULES).
:param classes: (str) The class sequence to syllabify
:returns: (int) boundary index or -1 if it does not match any rule.
"""
# search into exception
if classes in self.exception:
return self.exception[classes]
# search into general
for key, val in self.general.items():
if len(key) == len(classes):
return val
return 0
# ------------------------------------------------------------------------
[docs] def get_gap(self, phonemes):
"""Return the shift to apply (OTHRULES).
:param phonemes: (str) Phonemes to syllabify
:returns: (int) boundary shift
"""
for gp in self.gap:
if gp == phonemes:
return self.gap[gp]
# Search by replacing a phoneme by "ANY"
if gp.find("ANY") > -1:
r = gp.split()
phons = phonemes.split()
new_phonemes = ""
if len(r) == len(phons):
# For each phoneme, replace the ANY
for ph in range(len(r)):
if r[ph] == "ANY":
new_phonemes += "ANY "
else:
new_phonemes += phons[ph] + " "
new_phonemes = new_phonemes.strip()
if gp == new_phonemes:
return self.gap[gp]
return 0