Source code for annotations.Align.models.tiermapping

# -*- coding: UTF-8 -*-
:author:   Brigitte Bigi
:summary:  Map tags of annotated tiers.

.. _This file is part of SPPAS:

     ___   __    __    __    ___
    /     |  \  |  \  |  \  /              the automatic
    \__   |__/  |__/  |___| \__             annotation and
       \  |     |     |   |    \             analysis
    ___/  |     |     |   | ___/              of speech

    Copyright (C) 2011-2021  Brigitte Bigi
    Laboratoire Parole et Langage, Aix-en-Provence, France

    Use of this software is governed by the GNU Public License, version 3.

    SPPAS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    SPPAS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SPPAS. If not, see <>.

    This banner notice must not be removed.



from sppas.src.config import u
from sppas.src.resources import sppasMapping
from sppas.src.anndata import sppasTag
from sppas.src.anndata import sppasLabel
from sppas.src.anndata import sppasAnnotation
from sppas.src.anndata import sppasTier

# ---------------------------------------------------------------------------

[docs]class sppasMappingTier(sppasMapping): """Map content of annotations of a tier from a mapping table. A conversion table is used to map symbols of labels of a tier with new symbols. This class can convert either individual symbols or strings of symbols (syllables, words, ...) if a separator is given. Any symbols in the transcription tier which is not in the conversion table is replaced by a specific symbol (by default '*'). """
[docs] def __init__(self, dict_name=None): """Create a sppasMappingTier instance. :param dict_name: (str) The mapping dictionary. """ super(sppasMappingTier, self).__init__(dict_name) self._delimiters = sppasMapping.DEFAULT_SEP self._map_symbols = False
# -----------------------------------------------------------------------
[docs] def get_delimiters(self): """Return the list of delimiters.""" return self._delimiters
# -----------------------------------------------------------------------
[docs] def set_delimiters(self, delimit_list): """Fix the list of characters to be used as symbol delimiters. If delimit_list is an empty list, the mapping system will map with a longest matching algorithm. :param delimit_list: List of characters, for example [" ", ".", "-"] """ delimit_list = list(delimit_list) # Each element of the list must contain only one character for i, c in enumerate(delimit_list): delimit_list[i] = u(c)[0] # Set the delimiters as Iterable() and not as List() self._delimiters = tuple(delimit_list)
# -----------------------------------------------------------------------
[docs] def set_map_symbols(self, value): """Define if symbols have to be mapped or not. :param value: (bool) """ self._map_symbols = value
# -----------------------------------------------------------------------
[docs] def map_tier(self, tier): """Run the mapping process on an input tier. :param tier: (sppasTier) The tier instance to map label symbols. :returns: a new tier """ # Create the output tier new_tier = sppasTier(tier.get_name()+"-map") new_tier.set_media(tier.get_media()) for key in tier.get_meta_keys(): if key != 'id': new_tier.set_meta(key, tier.get_meta(key)) new_tier.set_meta('tier_was_mapped_from', tier.get_name()) # if no annotations if len(tier) == 0: return new_tier # always map, even if empty mapping table: it will copy annotations for ann in tier: new_tier.add(self.map_annotation(ann)) return new_tier
# -----------------------------------------------------------------------
[docs] def map_annotation(self, annotation): """Run the mapping process on an annotation. :param annotation: (sppasAnnotation) annotation with symbols to map :returns: a new annotation, with the same 'id' as the given one """ # Annotation without label if annotation.is_labelled() is False: a = annotation.copy() a.gen_id() return a # Map all tags of all labels, if tags are strings new_labels = list() for label in annotation.get_labels(): new_labels.append(self.map_label(label)) # Create an annotation with the new version of labels a = sppasAnnotation(annotation.get_location().copy(), new_labels) for m in annotation.get_meta_keys(): if m != 'id': a.set_meta(m, annotation.get_meta(m)) return a
# -----------------------------------------------------------------------
[docs] def map_label(self, label): """Run the mapping process on a label. :param label: (sppasLabel) label with symbols to map :returns: sppasLabel() """ new_label = sppasLabel(None) for tag, score in label: new_tags = self.map_tag(tag) for new_tag in new_tags: s = None if score is not None: s = float(score) / float(len(new_tags)) new_label.append(new_tag, s) return new_label
# -----------------------------------------------------------------------
[docs] def map_tag(self, tag): """Run the mapping process on a tag. :param tag: (sppasTag) tag with symbols to map :returns: List of sppasTag() """ # only non-empty strings can me mapped if tag.get_type() == 'str' and tag.is_empty() is False: # only speech can be mapped, not the symbols. if tag.is_speech() is True or self._map_symbols is True: result = list() content = tag.get_content() if content.startswith('{') and content.endswith('}'): content = content[1:-1] mapped_content =, self._delimiters) for content in mapped_content.split('|'): result.append(sppasTag(content)) return result return [tag.copy()]