Source code for annotations.Align.tracksgmt

"""
:filename: sppas.src.annotations.Align.tracksgmt.py
:author:   Brigitte Bigi
:contact:  develop@sppas.org
:summary:  Automatic segmentation of the data into track segments

.. _This file is part of SPPAS: http://www.sppas.org/
..
    -------------------------------------------------------------------------

     ___   __    __    __    ___
    /     |  \  |  \  |  \  /              the automatic
    \__   |__/  |__/  |___| \__             annotation and
       \  |     |     |   |    \             analysis
    ___/  |     |     |   | ___/              of speech

    Copyright (C) 2011-2021  Brigitte Bigi
    Laboratoire Parole et Langage, Aix-en-Provence, France

    Use of this software is governed by the GNU Public License, version 3.

    SPPAS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    SPPAS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SPPAS. If not, see <http://www.gnu.org/licenses/>.

    This banner notice must not be removed.

    -------------------------------------------------------------------------

"""

import os
import codecs

from sppas.src.config import sg
from sppas.src.config import info
from sppas.src.config import sppasUnicode

from .aligners import sppasAligners

# ---------------------------------------------------------------------------


[docs]class TrackSegmenter(object): """Automatic segmentation of a unit of speech. Speech segmentation of a unit of speech (IPU/utterance/sentence/segment) at phones and tokens levels. This class is mainly an interface with external automatic aligners. It is expected that all the following data were previously properly fixed: - audio file: 1 channel, 16000 Hz, 16 bits; - tokenization: UTF-8 encoding file (optional); - phonetization: UTF-8 encoding file; - acoustic model: HTK-ASCII (Julius or HVite expect this format); and that: - both the AC and phonetization are based on the same phone set - both the tokenization and phonetization contain the same nb of words """ aligners = sppasAligners() DEFAULT_ALIGNER = aligners.default_aligner_name() # -----------------------------------------------------------------------
[docs] def __init__(self, model=None, aligner_name=DEFAULT_ALIGNER): """Create a TrackSegmenter instance. :param model: (str) Name of the directory of the acoustic model. :param aligner_name: (str) The identifier name of the aligner. It is expected that the AC model contains at least a file with name "hmmdefs", and a file with name "monophones" for HVite command. It can also contain: - tiedlist file; - monophones.repl file; - config file. Any other file will be ignored. """ # The acoustic model directory self._model_dir = None # The automatic alignment system, and the "basic". # The basic aligner is used: # - when the track segment contains only one phoneme; # - when the track segment does not contain phonemes. self._aligner = None self.set_aligner(aligner_name) self._basic_aligner = TrackSegmenter.aligners.instantiate(None) if model is not None: self.set_model(model)
# -----------------------------------------------------------------------
[docs] def set_model(self, model): """Fix an acoustic model to perform time-alignment. :param model: (str) Name of the directory of the acoustic model. """ self._model_dir = model # re-instantiate the same aligner with the appropriate model self._instantiate_aligner(self._aligner.name())
# -----------------------------------------------------------------------
[docs] def set_aligner(self, aligner_name): """Fix the name of the aligner, one of aligners.ALIGNERS_TYPES. :param aligner_name: (str) Case-insensitive name of an aligner system. """ self._instantiate_aligner(aligner_name)
# -----------------------------------------------------------------------
[docs] def get_aligner_name(self): """Return the name of the instantiated aligner.""" return self._aligner.name()
# -----------------------------------------------------------------------
[docs] def get_aligner_ext(self): """Return the output file extension the aligner will use.""" return self._aligner.get_outext()
# -----------------------------------------------------------------------
[docs] def set_aligner_ext(self, ext): """Fix the output file extension the aligner will use.""" self._aligner.set_outext(ext)
# -----------------------------------------------------------------------
[docs] def get_model(self): """Return the model directory name.""" return self._model_dir
# -----------------------------------------------------------------------
[docs] def segment(self, audio_filename, phon_name, token_name, align_name): """Call an aligner to perform speech segmentation and manage errors. :param audio_filename: (str) the audio file name of an IPU :param phon_name: (str) file name with the phonetization :param token_name: (str) file name with the tokenization :param align_name: (str) file name to save the result WITHOUT ext. :returns: A message of the aligner in case of any problem, or an empty string if success. """ # Get the phonetization and tokenization strings to time-align. phones = "" tokens = "" if phon_name is not None: phones = self._readline(phon_name) self._aligner.set_phones(phones) self._basic_aligner.set_phones(phones) if token_name is not None: tokens = self._readline(token_name) self._aligner.set_tokens(tokens) self._basic_aligner.set_tokens(tokens) # Do not align nothing! if len(phones) == 0: self._basic_aligner.run_alignment(0., align_name) return info(1222, "annotations") # If no audio available... if os.path.exists(audio_filename) is False: ret = self._basic_aligner.run_alignment(1., align_name) else: # Do not align only one phoneme! if len(phones.split()) <= 1 and "-" not in phones: self._basic_aligner.run_alignment(audio_filename, align_name) return "" # Execute Alignment ret = self._aligner.check_data() ret += self._aligner.run_alignment(audio_filename, align_name) return ret
# ----------------------------------------------------------------------- # Private # ----------------------------------------------------------------------- def _instantiate_aligner(self, name): """Instantiate self._aligner to the appropriate Aligner system.""" self._aligner = TrackSegmenter.aligners.instantiate( self._model_dir, name) # ----------------------------------------------------------------------- def _readline(self, filename): """Return the first line of a file as a unicode formatted string.""" line = "" try: with codecs.open(filename, 'r', sg.__encoding__) as fp: sp = sppasUnicode(fp.readline()) line = sp.to_strip() fp.close() except: return "" return line