Source code for annotations.LexMetric.occrank

# -*- coding : UTF-8 -*-
"""
:filename: sppas.src.annotations.LexMetric.occrank.py
:author:   Brigitte Bigi
:contact:  develop@sppas.org
:summary:  Part the LexMetric automatic annotation for occurrences and ranks.

.. _This file is part of SPPAS: http://www.sppas.org/
..
    -------------------------------------------------------------------------

     ___   __    __    __    ___
    /     |  \  |  \  |  \  /              the automatic
    \__   |__/  |__/  |___| \__             annotation and
       \  |     |     |   |    \             analysis
    ___/  |     |     |   | ___/              of speech

    Copyright (C) 2011-2021  Brigitte Bigi
    Laboratoire Parole et Langage, Aix-en-Provence, France

    Use of this software is governed by the GNU Public License, version 3.

    SPPAS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    SPPAS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SPPAS. If not, see <http://www.gnu.org/licenses/>.

    This banner notice must not be removed.

    -------------------------------------------------------------------------

"""

from sppas.src.config import sppasTypeError
from sppas.src.anndata import sppasTier
from sppas.src.anndata import sppasTag
from sppas.src.anndata import sppasLabel
from sppas.src.resources import sppasUnigram

# ---------------------------------------------------------------------------


[docs]class OccRank(object): """A class to estimate occurrences and ranks of items of a tier. """
[docs] def __init__(self, tier, alt=True): """Create an instance of OccRank. :param tier: (sppasTier) :param alt: (bool) Use alternative tags to estimate counts and rank """ if isinstance(tier, sppasTier) is False: raise sppasTypeError(tier, "sppasTier") self.__tier = tier self.__alt = True self.set_use_alt(alt) self.__unigram = sppasUnigram() self.__estimate_counts()
# ----------------------------------------------------------------------- def __estimate_counts(self): self.__unigram = sppasUnigram() for ann in self.__tier: for label in ann.get_labels(): if self.__alt is True: for tag, score in label: self.__unigram.add(tag.get_content()) else: tag = label.get_best() self.__unigram.add(tag.get_content()) # -----------------------------------------------------------------------
[docs] def get_use_alt(self): """Return True if alternative tags are used.""" return self.__alt
# -----------------------------------------------------------------------
[docs] def set_use_alt(self, value): """Either alternative tags are used or not. :param value: (bool) """ alt = bool(value) if alt != self.__alt: self.__alt = alt self.__estimate_counts()
# -----------------------------------------------------------------------
[docs] def occ(self): """Return a tier with occurrences of all labels. :Example: input: the | little | little | cat output: 1 | 2 | 2 | 1 """ new_tier = sppasTier("LM-Occ-%s" % self.__tier.get_name()) new_tier.set_meta("occurrences_of_tier", self.__tier.get_name()) for ann in self.__tier: location = ann.get_location().copy() labels = list() for label in ann.get_labels(): if self.__alt is True: occ_label = sppasLabel(tag=None) for tag, score in label: content = tag.get_content() occ = self.__unigram.get_count(content) occ_label.append(sppasTag(occ, "int"), score) labels.append(occ_label) else: tag = label.get_best() content = tag.get_content() occ = self.__unigram.get_count(content) labels.append(sppasLabel(sppasTag(occ, "int"))) if len(labels) > 0: new_tier.create_annotation(location, labels) return new_tier
# -----------------------------------------------------------------------
[docs] def rank(self): """Return a tier with the rank of each label. :Example: input: the | little | little | cat output: 1 | 1 | 2 | 1 """ new_tier = sppasTier("LM-Rank-%s" % self.__tier.get_name()) new_tier.set_meta("rank_of_tier", self.__tier.get_name()) unigram = sppasUnigram() for ann in self.__tier: location = ann.get_location().copy() labels = list() for label in ann.get_labels(): if self.__alt is True: rank_label = sppasLabel(tag=None) for tag, score in label: content = tag.get_content() unigram.add(content) occ = unigram.get_count(content) rank_label.append(sppasTag(occ, "int"), score) labels.append(rank_label) else: tag = label.get_best() content = tag.get_content() unigram.add(content) occ = unigram.get_count(content) labels.append(sppasLabel(sppasTag(occ, "int"))) if len(labels) > 0: new_tier.create_annotation(location, labels) return new_tier