Source code for annotations.Align.models.acm.phoneset

"""
:filename: sppas.src.annotations.Align.models.acm.phoneset.py
:author:   Brigitte Bigi
:contact:  develop@sppas.org
:summary:  Data structure for the list of phonemes of an acm.

.. _This file is part of SPPAS: http://www.sppas.org/
..
    -------------------------------------------------------------------------

     ___   __    __    __    ___
    /     |  \  |  \  |  \  /              the automatic
    \__   |__/  |__/  |___| \__             annotation and
       \  |     |     |   |    \             analysis
    ___/  |     |     |   | ___/              of speech

    Copyright (C) 2011-2021  Brigitte Bigi
    Laboratoire Parole et Langage, Aix-en-Provence, France

    Use of this software is governed by the GNU Public License, version 3.

    SPPAS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    SPPAS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SPPAS. If not, see <http://www.gnu.org/licenses/>.

    This banner notice must not be removed.

    -------------------------------------------------------------------------

"""

from sppas.src.config import symbols
from sppas.src.config import separators
from sppas.src.resources.dictpron import sppasDictPron
from sppas.src.resources.vocab import sppasVocabulary

# ---------------------------------------------------------------------------


[docs]class sppasPhoneSet(sppasVocabulary): """Manager of the list of phonemes. This class allows to manage the list of phonemes: - get it from a pronunciation dictionary, - read it from a file, - write it into a file, - check if a phone string is valid to be used with HTK toolkit. """
[docs] def __init__(self, filename=None): """Create a sppasPhoneSet instance. Add events to the list: laugh, dummy, noise, silence. :param filename (str) A file with 1 column containing the list of phonemes. """ super(sppasPhoneSet, self).__init__(filename, nodump=True, case_sensitive=True) for key in symbols.phone: if symbols.phone[key] != "pause": self.add(key)
# -----------------------------------------------------------------------
[docs] def add_from_dict(self, dict_filename): """Add the list of phones from a pronunciation dictionary. :param dict_filename: (str) Name of an HTK-ASCII pronunciation dict """ d = sppasDictPron(dict_filename) for key in d: value = d.get_pron(key) variants = value.split(separators.variants) for variant in variants: phones = variant.split(separators.phonemes) for phone in phones: self.add(phone)
# -----------------------------------------------------------------------
[docs] @staticmethod def check_as_htk_phone(phone): """Check if a phone is correct to be used with HTK toolkit. A phone can't start by a digit nor '-' nor '+', and must be ASCII. :param phone: (str) Phone to be checked :returns: (bool) """ try: phone = str(phone) except UnicodeEncodeError: return False # Must not contain spaces phone_copy = phone.strip() if len(phone_copy) != len(phone): return False # Must contain characters! if len(phone) == 0: return False # Must not start by minus or plus if phone[0] in ['-', '+']: return False # Must not start by a digit try: int(phone[0]) except ValueError: return False return True