Source code for anndata.ann.annlabel.label

# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.anndata.annlabel.label.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: Represent one of labels of an annotation.

.. _This file is part of SPPAS: http://www.sppas.org/
..
    -------------------------------------------------------------------------

     ___   __    __    __    ___
    /     |  \  |  \  |  \  /              the automatic
    \__   |__/  |__/  |___| \__             annotation and
       \  |     |     |   |    \             analysis
    ___/  |     |     |   | ___/              of speech

    Copyright (C) 2011-2021  Brigitte Bigi
    Laboratoire Parole et Langage, Aix-en-Provence, France

    Use of this software is governed by the GNU Public License, version 3.

    SPPAS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    SPPAS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SPPAS. If not, see <http://www.gnu.org/licenses/>.

    This banner notice must not be removed.

    -------------------------------------------------------------------------

"""

import copy

from ...anndataexc import AnnDataTypeError

from .tag import sppasTag

# ----------------------------------------------------------------------------


[docs]class sppasLabel(object): """Represent the content of an annotation. sppasLabel allows to store a set of sppasTags with their scores. This class is using a list of lists, i.e. a list of pairs (tag, score). This is the best compromise between memory usage, speed and readability. A label is a list of possible sppasTag(), represented as a UNICODE string. A data type can be associated, as sppasTag() can be 'int', 'float' or 'bool'. """
[docs] def __init__(self, tag, score=None): """Create a new sppasLabel instance. :param tag: (sppasTag or list of sppasTag or None) :param score: (float or list of float or None) """ self.__tags = None if tag is not None: if isinstance(tag, list): if isinstance(score, list) and len(tag) == len(score): for t, s in zip(tag, score): self.append(t, s) else: # scores are ignored for t in tag: self.append(t) else: if isinstance(score, list): score = None self.append(tag, score)
# ----------------------------------------------------------------------- # Setters # -----------------------------------------------------------------------
[docs] def append_content(self, content, data_type="str", score=None): """Add a text into the list. :param content: (str) :param data_type: (str): The type of this text content.\ One of: (str, int, float, bool) :param score: (float) """ tag = sppasTag(content, data_type) self.append(tag, score)
# -----------------------------------------------------------------------
[docs] def append(self, tag, score=None): """Add a sppasTag into the list. Do not add the tag if this alternative is already inside the list, but add the scores. :param tag: (sppasTag) :param score: (float) """ if not isinstance(tag, sppasTag): raise AnnDataTypeError(tag, "sppasTag") if self.__tags is None: self.__tags = list() # check types consistency. if len(self.__tags) > 0: if self.__tags[0][0].get_type() != tag.get_type(): raise AnnDataTypeError(tag, self.__tags[0][0].get_type()) # check if this tag content is already in the list for i in range(len(self.__tags)): current_tag, current_score = self.__tags[i] if tag.get_typed_content() == current_tag.get_typed_content(): if score is not None: if current_score is None: self.__tags[i][1] = score else: self.__tags[i][1] += score return False self.__tags.append([tag, score]) return True
# -----------------------------------------------------------------------
[docs] def remove(self, tag): """Remove a tag of the list. :param tag: (sppasTag) the tag to be removed of the list. """ if not isinstance(tag, sppasTag): raise AnnDataTypeError(tag, "sppasTag") if self.__tags is not None: if len(self.__tags) == 1: self.__tags = None else: for t in self.__tags: if t[0] == tag: self.__tags.remove(t)
# -----------------------------------------------------------------------
[docs] def get_score(self, tag): """Return the score of a tag or None if tag is not in the label. :param tag: (sppasTag) :returns: score: (float) """ if not isinstance(tag, sppasTag): raise AnnDataTypeError(tag, "sppasTag") if self.__tags is not None: for t in self.__tags: if t[0] == tag: return t[1] return None
# -----------------------------------------------------------------------
[docs] def set_score(self, tag, score): """Set a score to a given tag. :param tag: (sppasTag) :param score: (float) """ if not isinstance(tag, sppasTag): raise AnnDataTypeError(tag, "sppasTag") if self.__tags is not None: for i, t in enumerate(self.__tags): if t[0] == tag: self.__tags[i][1] = score
# -----------------------------------------------------------------------
[docs] def get_best(self): """Return the best sppasTag, i.e. the one with the better score. :returns: (sppasTag or None) """ if self.__tags is None: return None if len(self.__tags) == 1: return self.__tags[0][0] _max_tag = self.__tags[0][0] _max_score = self.__tags[0][1] for (t, s) in reversed(self.__tags): if _max_score is None or (s is not None and s > _max_score): _max_score = s _max_tag = t return _max_tag
# -----------------------------------------------------------------------
[docs] def get_type(self): """Return the type of the tags content.""" if self.__tags is None: return "str" return self.__tags[0][0].get_type()
# -----------------------------------------------------------------------
[docs] def is_tagged(self): """Return False if no tag is set.""" if self.__tags is None: return False if len(self.__tags) == 0: return False return True
# -----------------------------------------------------------------------
[docs] def is_string(self): """Return True if tags are string or unicode. Return False if no tag is set. """ if self.is_tagged() is False: return False return self.__tags[0][0].get_type() == "str"
# -----------------------------------------------------------------------
[docs] def is_float(self): """Return True if tags are of type "float". Return False if no tag is set. """ if self.is_tagged() is False: return False return self.__tags[0][0].get_type() == "float"
# -----------------------------------------------------------------------
[docs] def is_int(self): """Return True if tags are of type "int". Return False if no tag is set. """ if self.is_tagged() is False: return False return self.__tags[0][0].get_type() == "int"
# -----------------------------------------------------------------------
[docs] def is_bool(self): """Return True if tags are of type "bool". Return False if no tag is set. """ if self.is_tagged() is False: return False return self.__tags[0][0].get_type() == "bool"
# -----------------------------------------------------------------------
[docs] def is_point(self): """Return True if tags are of type "point". Return False if no tag is set. """ if self.is_tagged() is False: return False return self.__tags[0][0].get_type() == "point"
# -----------------------------------------------------------------------
[docs] def copy(self): """Return a deep copy of the label.""" return copy.deepcopy(self)
# -----------------------------------------------------------------------
[docs] def match(self, tag_functions, logic_bool="and"): """Return True if a tag matches all or any of the functions. :param tag_functions: list of (function, value, logical_not) :param logic_bool: (str) Apply a logical "and" or a logical "or" \ between the functions. :returns: (bool) - function: a function in python with 2 arguments: tag/value - value: the expected value for the tag - logical_not: boolean :Example: Search if a tag is exactly matching "R": >>> l.match([(exact, "R", False)]) :Example: Search if a tag is starting with "p" or starting with "t": >>> l.match([(startswith, "p", False), >>> (startswith, "t", False), ], logic_bool="or") """ is_matching = False # any tag can match for tag, score in self.__tags: matches = list() for func, value, logical_not in tag_functions: if logical_not is True: matches.append(not func(tag, value)) else: matches.append(func(tag, value)) if logic_bool == "and": is_matching = all(matches) else: is_matching = any(matches) # no need to test the next tags if the current one is matching. if is_matching is True: return True return is_matching
# -----------------------------------------------------------------------
[docs] def serialize(self, empty="", alt=True): """Convert the label into a string, include or not alternative tags. @DeprecationWarning Use aioutils.serialize_label() instead. """ if self.__tags is None: return empty if len(self.__tags) == 0: return empty if self.get_best() is None: return empty if alt is False or len(self.__tags) == 1: best = self.get_best() if best.is_empty(): return empty return best.get_content() # we store the alternative tags into a list. # empty tags are replaced by the empty item. tag_contents = list() for tag, score in self.__tags: content = tag.get_content() if len(content) > 0: tag_contents.append(content) else: tag_contents.append(empty) return "{" + "|".join(tag_contents) + "}"
# ----------------------------------------------------------------------- # Overloads # ----------------------------------------------------------------------- def __format__(self, fmt): return str(self).__format__(fmt) # ----------------------------------------------------------------------- def __repr__(self): st = "" if self.__tags is not None: for t, s in self.__tags: st += "sppasTag({!s:s}, score={!s:s}), ".format(t, s) return st # ----------------------------------------------------------------------- def __str__(self): st = "" if self.__tags is not None: for t, s in self.__tags: st += "{!s:s}, {!s:s} ; ".format(t, s) return st # ----------------------------------------------------------------------- def __iter__(self): if self.__tags is not None: for t in self.__tags: yield t # ----------------------------------------------------------------------- def __getitem__(self, i): if self.__tags is not None: return self.__tags[i] else: raise IndexError(i) # ----------------------------------------------------------------------- def __len__(self): if self.__tags is not None: return len(self.__tags) return 0 # ----------------------------------------------------------------------- def __eq__(self, other): if self.__tags is not None: if other is None: return False if isinstance(other, sppasLabel) is False: return False if len(self.__tags) != len(other): return False for (tag1, tag2) in zip(self.__tags, other): # compare the typed content of the tags and # also compare the scores... if tag1[0] != tag2[0]: return False if tag1[1] != tag2[1]: return False return True else: # self and other are both None if other is None: return True return False # ----------------------------------------------------------------------- def __ne__(self, other): return not self == other