Source code for annotations.OtherRepet.rules

"""
:filename: sppas.src.annotations.OtherRepet.rules.py
:author:   Brigitte Bigi
:contact:  develop@sppas.org
:summary:  Other-Repetitions rules to accept/reject a repetition.

.. _This file is part of SPPAS: http://www.sppas.org/
..
    -------------------------------------------------------------------------

     ___   __    __    __    ___
    /     |  \  |  \  |  \  /              the automatic
    \__   |__/  |__/  |___| \__             annotation and
       \  |     |     |   |    \             analysis
    ___/  |     |     |   | ___/              of speech

    Copyright (C) 2011-2021  Brigitte Bigi
    Laboratoire Parole et Langage, Aix-en-Provence, France

    Use of this software is governed by the GNU Public License, version 3.

    SPPAS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    SPPAS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SPPAS. If not, see <http://www.gnu.org/licenses/>.

    This banner notice must not be removed.

    -------------------------------------------------------------------------

"""

from ..SelfRepet.rules import SelfRules

# ----------------------------------------------------------------------------


[docs]class OtherRules(SelfRules): """Rules to select other-repetitions. Proposed rules deal with the number of words, the word frequencies and distinguishes if the repetition is strict or not. The following rules are proposed for other-repetitions: - Rule 1: A source is accepted if it contains one or more relevant token. Relevance depends on the speaker producing the echo; - Rule 2: A source which contains at least K tokens is accepted if the repetition is strict. Rule number 1 need to fix a clear definition of the relevance of a token. Un-relevant tokens are then stored in a stop-list. The stop-list also should contain very frequent tokens in the given language like adjectives, pronouns, etc. """
[docs] def __init__(self, stop_list=None): """Create an OtherRules instance. :param stop_list: (sppasVocabulary or list) Un-relevant tokens. """ super(OtherRules, self).__init__(stop_list)
# -----------------------------------------------------------------------
[docs] def rule_strict(self, start, end, speaker1, speaker2): """Apply rule 2 to decide if selection is a repetition or not. Rule 2: The selection is a repetition if it respects at least one of the following criteria: - selection contains at least 3 tokens; - the repetition is strict (the source is strictly included into the echo). :param start: (int) Index to start the selection :param end: (int) Index to stop the selection :param speaker1: (DataSpeaker) All the data :param speaker2: (DataSpeaker) All the data :returns: (bool) """ # At least 3 tokens are acceptable if (end-start) < 2: return False # Test if the echo is strict # create a string with the tokens of the source speaker #source = "" #for i in range(start, end+1): # source = source + " " + speaker1.get_entry(i) source = " ".join(speaker1[i] for i in range(start, end+1)) # create a string with the tokens of the echoing spk #echo = "" #for i in range(len(speaker2)): # echo = echo + " " + speaker2.get_entry(i) echo = " ".join(speaker2[i] for i in range(len(speaker2))) return source in echo