SPPAS 4.22

https://sppas.org/

Module sppas.src.annotations

Class sppasSyll

Description

SPPAS integration of the automatic syllabification annotation.

Constructor

Create a new sppasSyll instance with only the general rules.

Log is used for a better communication of the annotation process and its results. If None, logs are redirected to the default logging system.

Parameters
  • log: (sppasLog) Human-readable logs.
View Source
def __init__(self, log=None):
    """Create a new sppasSyll instance with only the general rules.

    Log is used for a better communication of the annotation process and its
    results. If None, logs are redirected to the default logging system.

    :param log: (sppasLog) Human-readable logs.

    """
    super(sppasSyll, self).__init__('syllabify.json', log)
    self.__syllabifier = Syllabifier()
    self.__lang = 'und'

Public functions

load_resources

Fix the syllabification rules from a configuration file.

Parameters
  • config_filename: Name of the configuration file with the rules
  • lang: (str) Iso639-3 of the language or "und" if unknown.
View Source
def load_resources(self, config_filename, lang='und', **kwargs):
    """Fix the syllabification rules from a configuration file.

        :param config_filename: Name of the configuration file with the rules
        :param lang: (str) Iso639-3 of the language or "und" if unknown.

        """
    self.__lang = lang
    self.__syllabifier = Syllabifier(config_filename)

fix_options

Fix all options.

Available options are:

  • usesintervals
  • usesphons
  • tiername
  • createclasses
  • createstructures
Parameters
  • options: (sppasOption)
View Source
def fix_options(self, options):
    """Fix all options.

        Available options are:

            - usesintervals
            - usesphons
            - tiername
            - createclasses
            - createstructures

        :param options: (sppasOption)

        """
    for opt in options:
        key = opt.get_key()
        if 'usesintervals' == key:
            self.set_usesintervals(opt.get_value())
        elif 'usesphons' == key:
            self.set_usesphons(opt.get_value())
        elif 'tiername' == key:
            self.set_tiername(opt.get_value())
        elif 'createclasses' == key:
            self.set_create_tier_classes(opt.get_value())
        elif 'createstructs' == key:
            self.set_create_tier_structures(opt.get_value())
        elif 'pattern' in key:
            self._options[key] = opt.get_value()
        else:
            raise AnnotationOptionError(key)

set_usesintervals

Fix the usesintervals option.

Parameters
  • mode: (bool) If mode is set to True, the syllabification operates inside specific (given) intervals.
View Source
def set_usesintervals(self, mode):
    """Fix the usesintervals option.

        :param mode: (bool) If mode is set to True, the syllabification
        operates inside specific (given) intervals.

        """
    self._options['usesintervals'] = mode

set_usesphons

Fix the usesphons option.

Parameters
  • mode: (str) If mode is set to True, the syllabification operates by using only tier with phonemes.
View Source
def set_usesphons(self, mode):
    """Fix the usesphons option.

        :param mode: (str) If mode is set to True, the syllabification operates
        by using only tier with phonemes.

        """
    self._options['usesphons'] = mode

set_create_tier_classes

Fix the createclasses option.

Parameters
  • create: (bool)
View Source
def set_create_tier_classes(self, create=True):
    """Fix the createclasses option.

        :param create: (bool)

        """
    self._options['createclasses'] = bool(create)

set_create_tier_structures

Fix the createstucts option.

Parameters
  • create: (bool)
View Source
def set_create_tier_structures(self, create=True):
    """Fix the createstucts option.

        :param create: (bool)

        """
    self._options['createstructs'] = bool(create)

set_tiername

Fix the tiername option.

Parameters
  • tier_name: (str)
View Source
def set_tiername(self, tier_name):
    """Fix the tiername option.

        :param tier_name: (str)

        """
    self._options['tiername'] = sppasUnicode(tier_name).to_strip()

convert

Syllabify labels of a time-aligned phones tier.

Parameters
  • phonemes: (sppasTier) time-aligned phonemes tier
  • intervals: (sppasTier)
Returns
  • (sppasTier)
View Source
def convert(self, phonemes, intervals=None):
    """Syllabify labels of a time-aligned phones tier.

        :param phonemes: (sppasTier) time-aligned phonemes tier
        :param intervals: (sppasTier)
        :returns: (sppasTier)

        """
    if intervals is None:
        intervals = sppasSyll._phon_to_intervals(phonemes)
    syllables = sppasTier('SyllAlign')
    syllables.set_meta('syllabification_of_tier', phonemes.get_name())
    syllables.set_meta('linguistic_resource_rules', self.__syllabifier.get_rules_filename())
    for interval in intervals:
        start_phon_idx = phonemes.lindex(interval.get_lowest_localization())
        if start_phon_idx == -1:
            start_phon_idx = phonemes.mindex(interval.get_lowest_localization(), bound=-1)
        end_phon_idx = phonemes.rindex(interval.get_highest_localization())
        if end_phon_idx == -1:
            end_phon_idx = phonemes.mindex(interval.get_highest_localization(), bound=1)
        if start_phon_idx != -1 and end_phon_idx != -1:
            self.syllabify_interval(phonemes, start_phon_idx, end_phon_idx, syllables)
        else:
            self.logfile.print_message(info(1224, 'annotations').format(interval), indent=2, status=annots.warning)
    return syllables

make_classes

Create the tier with syllable classes.

Parameters
  • syllables: (sppasTier)
View Source
def make_classes(self, syllables):
    """Create the tier with syllable classes.

        :param syllables: (sppasTier)

        """
    classes = sppasTier('SyllClassAlign')
    classes.set_meta('classes_of_tier', syllables.get_name())
    for syll in syllables:
        location = syll.get_location().copy()
        syll_tag = syll.get_best_tag()
        class_tag = sppasTag(self.__syllabifier.classes_phonetized(syll_tag.get_typed_content()))
        classes.create_annotation(location, sppasLabel(class_tag))
    return classes

make_structures

Create the tier with syllable structures.

Parameters
  • syllables: (sppasTier)
View Source
def make_structures(self, syllables):
    """Create the tier with syllable structures.

        :param syllables: (sppasTier)

        """
    classes = sppasTier('SyllStructsAlign')
    classes.set_meta('structures_of_tier', syllables.get_name())
    for syll in syllables:
        location = syll.get_location().copy()
        syll_tag = syll.get_best_tag()
        class_tag = sppasTag(self.__syllabifier.structures_phonetized(syll_tag.get_typed_content()))
        classes.create_annotation(location, sppasLabel(class_tag))
    return classes

syllabify_interval

Perform the syllabification of one interval.

Parameters
  • phonemes: (sppasTier)
  • from_p: (int) index of the first phoneme to be syllabified
  • to_p: (int) index of the last phoneme to be syllabified
  • syllables: (sppasTier)
View Source
def syllabify_interval(self, phonemes, from_p, to_p, syllables):
    """Perform the syllabification of one interval.

        :param phonemes: (sppasTier)
        :param from_p: (int) index of the first phoneme to be syllabified
        :param to_p: (int) index of the last phoneme to be syllabified
        :param syllables: (sppasTier)

        """
    p = list()
    for ann in phonemes[from_p:to_p + 1]:
        tag = ann.get_best_tag()
        p.append(tag.get_typed_content())
    s = self.__syllabifier.annotate(p)
    for i, syll in enumerate(s):
        start_idx, end_idx = syll
        begin = phonemes[start_idx + from_p].get_lowest_localization().copy()
        end = phonemes[end_idx + from_p].get_highest_localization().copy()
        location = sppasLocation(sppasInterval(begin, end))
        syll_string = Syllabifier.phonetize_syllables(p, [syll])
        label = sppasLabel(sppasTag(syll_string))
        syllables.create_annotation(location, label)

get_inputs

Return the the tier with aligned tokens.

Parameters
  • input_files: (list)
Raises

NoTierInputError

Returns
  • (sppasTier)
View Source
def get_inputs(self, input_files):
    """Return the the tier with aligned tokens.

        :param input_files: (list)
        :raise: NoTierInputError
        :return: (sppasTier)

        """
    tier = None
    intervals = None
    for filename in input_files:
        if filename is None:
            continue
        parser = sppasTrsRW(filename)
        trs_input = parser.read()
        if intervals is None:
            intervals = trs_input.find(self._options['tiername'])
        if tier is None:
            tier = sppasFindTier.aligned_phones(trs_input)
    if tier is None:
        logging.error('A tier with a time-aligned phonemes was not found.')
        raise NoTierInputError
    return (tier, intervals)

run

Run the automatic annotation process on an input.

Parameters
  • input_files: (list of str) Time-aligned phonemes
  • output: (str) the output file name
Returns
  • (sppasTranscription)
View Source
def run(self, input_files, output=None):
    """Run the automatic annotation process on an input.

        :param input_files: (list of str) Time-aligned phonemes
        :param output: (str) the output file name
        :returns: (sppasTranscription)

        """
    tier_input, intervals = self.get_inputs(input_files)
    trs_output = sppasTranscription(self.name)
    trs_output.set_meta('annotation_result_of', input_files[0])
    trs_output.set_meta('language_iso', 'iso639-3')
    trs_output.set_meta('language_name_0', 'Undetermined')
    if len(self.__lang) == 3:
        trs_output.set_meta('language_code_0', self.__lang)
        trs_output.set_meta('language_url_0', 'https://iso639-3.sil.org/code/' + self.__lang)
    else:
        trs_output.set_meta('language_code_0', 'und')
        trs_output.set_meta('language_url_0', 'https://iso639-3.sil.org/code/und')
    if self._options['usesphons'] is True:
        tier_syll = self.convert(tier_input)
        trs_output.append(tier_syll)
        if self._options['createclasses']:
            trs_output.append(self.make_classes(tier_syll))
        if self._options['createstructs']:
            trs_output.append(self.make_structures(tier_syll))
    if self._options['usesintervals'] is True:
        if intervals is None:
            self.logfile.print_message(info(1264, 'annotations').format(tiername=self._options['tiername']), indent=2, status=annots.warning)
        else:
            tier_syll_int = self.convert(tier_input, intervals)
            tier_syll_int.set_name('SyllAlign-Intervals')
            tier_syll_int.set_meta('syllabification_used_intervals', intervals.get_name())
            trs_output.append(tier_syll_int)
            if self._options['createclasses']:
                t = self.make_classes(tier_syll_int)
                t.set_name('SyllClassAlign-Intervals')
                trs_output.append(t)
            if self._options['createstructs']:
                t = self.make_structures(tier_syll_int)
                t.set_name('SyllStructsAlign-Intervals')
                trs_output.append(t)
    if output is not None:
        if len(trs_output) > 0:
            output_file = self.fix_out_file_ext(output)
            parser = sppasTrsRW(output_file)
            parser.write(trs_output)
            return [output_file]
        else:
            raise EmptyOutputError
    return trs_output

get_output_pattern

Pattern this annotation uses in an output filename.

View Source
def get_output_pattern(self):
    """Pattern this annotation uses in an output filename."""
    return self._options.get('outputpattern', '-syll')

get_input_pattern

Pattern this annotation expects for its input filename.

View Source
def get_input_pattern(self):
    """Pattern this annotation expects for its input filename."""
    return self._options.get('inputpattern', '-palign')

Private functions

_phon_to_intervals

Create the intervals to be syllabified.

we could use symbols.phone only, but for backward compatibility we hardly add the symbols we previously used into SPPAS.

Parameters
  • phonemes
View Source
@staticmethod
def _phon_to_intervals(phonemes):
    """Create the intervals to be syllabified.

        we could use symbols.phone only, but for backward compatibility
        we hardly add the symbols we previously used into SPPAS.

        """
    stop = list(symbols.phone.keys())
    stop.append('#')
    stop.append('@@')
    stop.append('+')
    stop.append('gb')
    stop.append('lg')
    return phonemes.export_to_intervals(stop)