Source code for annotations.SearchIPUs.searchipus

# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.annotations.SearchIPUs.searchipus.py
:author:   Brigitte Bigi
:contact:  develop@sppas.org
:summary:  Silences vs sounding segments segmentation.

.. _This file is part of SPPAS: http://www.sppas.org/
..
    -------------------------------------------------------------------------

     ___   __    __    __    ___
    /     |  \  |  \  |  \  /              the automatic
    \__   |__/  |__/  |___| \__             annotation and
       \  |     |     |   |    \             analysis
    ___/  |     |     |   | ___/              of speech

    Copyright (C) 2011-2021  Brigitte Bigi
    Laboratoire Parole et Langage, Aix-en-Provence, France

    Use of this software is governed by the GNU Public License, version 3.

    SPPAS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    SPPAS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SPPAS. If not, see <http://www.gnu.org/licenses/>.

    This banner notice must not be removed.

    -------------------------------------------------------------------------

"""

from .silences import sppasSilences

# ---------------------------------------------------------------------------


[docs]class SearchIPUs(sppasSilences): """An automatic silence/tracks segmentation system. Silence/tracks segmentation aims at finding IPUs. IPUs - Inter-Pausal Units are blocks of speech bounded by silent pauses of more than X ms, and time-aligned on the speech signal. """ MIN_SIL_DUR = 0.06 MIN_IPU_DUR = 0.06 DEFAULT_MIN_SIL_DUR = 0.250 DEFAULT_MIN_IPU_DUR = 0.300 DEFAULT_VOL_THRESHOLD = 0 DEFAULT_SHIFT_START = 0.02 DEFAULT_SHIFT_END = 0.02
[docs] def __init__(self, channel, win_len=0.02): """Create a new SearchIPUs instance. :param channel: (sppasChannel) """ super(SearchIPUs, self).__init__(channel, win_len, win_len / 4.) self._win_length = win_len self._min_sil_dur = SearchIPUs.DEFAULT_MIN_SIL_DUR self._min_ipu_dur = SearchIPUs.DEFAULT_MIN_IPU_DUR self._vol_threshold = SearchIPUs.DEFAULT_VOL_THRESHOLD self._auto_threshold = SearchIPUs.DEFAULT_VOL_THRESHOLD self._shift_start = SearchIPUs.DEFAULT_SHIFT_START self._shift_end = SearchIPUs.DEFAULT_SHIFT_END
# ----------------------------------------------------------------------- # Manage Channel # -----------------------------------------------------------------------
[docs] def get_track_data(self, tracks): """Return the audio data of tracks. :param tracks: List of tracks. A track is a tuple (start, end). :returns: List of audio data """ return self.track_data(tracks)
# -----------------------------------------------------------------------
[docs] def get_channel(self): """Return the channel.""" return self._channel
# ----------------------------------------------------------------------- # Getters for members # -----------------------------------------------------------------------
[docs] def get_win_length(self): """Return the windows length used to estimate the RMS.""" return self._win_length
[docs] def get_vol_threshold(self): """Return the initial volume threshold used to search for silences.""" return self._vol_threshold
[docs] def get_effective_threshold(self): """Return the threshold volume estimated automatically to search for silences.""" return self._auto_threshold
[docs] def get_min_sil_dur(self): """Return the minimum duration of a silence.""" return self._min_sil_dur
[docs] def get_min_ipu_dur(self): """Return the minimum duration of a track.""" return self._min_ipu_dur
[docs] def get_shift_start(self): return self._shift_start
[docs] def get_shift_end(self): return self._shift_end
# ----------------------------------------------------------------------- # Setters for members # -----------------------------------------------------------------------
[docs] def set_win_length(self, w): """Set a new length of window for a estimation or volume values. TAKE CARE: it cancels any previous estimation of volume and silence search. :param w: (float) between 0.01 and 0.04. """ self._win_length = max(float(w), 0.002) if self._channel is not None: self.set_channel(self._channel)
# -----------------------------------------------------------------------
[docs] def set_vol_threshold(self, vol_threshold): """Fix the default minimum volume value to find silences. It won't affect the current list of silence values. Use search_sil(). :param vol_threshold: (int) RMS value """ self._vol_threshold = int(vol_threshold) if self._vol_threshold < 0: self._vol_threshold = SearchIPUs.DEFAULT_VOL_THRESHOLD
# -----------------------------------------------------------------------
[docs] def set_min_sil(self, min_sil_dur): """Fix the default minimum duration of a silence. :param min_sil_dur: (float) Duration in seconds. """ self._min_sil_dur = max( float(min_sil_dur), SearchIPUs.MIN_SIL_DUR )
# -----------------------------------------------------------------------
[docs] def set_min_ipu(self, min_ipu_dur): """Fix the default minimum duration of an IPU. :param min_ipu_dur: (float) Duration in seconds. """ self._min_ipu_dur = max( float(min_ipu_dur), SearchIPUs.MIN_IPU_DUR )
# -----------------------------------------------------------------------
[docs] def set_shift_start(self, s): """Fix the default minimum boundary shift value. :param s: (float) Duration in seconds. """ s = float(s) if -self._min_ipu_dur < s < self._min_sil_dur: self._shift_start = s
# -----------------------------------------------------------------------
[docs] def set_shift_end(self, s): """Fix the default minimum boundary shift value. :param s: (float) Duration in seconds. """ s = float(s) if -self._min_ipu_dur < s < self._min_sil_dur: self._shift_end = s
# -----------------------------------------------------------------------
[docs] def min_channel_duration(self): """Return the minimum duration we expect for a channel.""" d = max(self._min_sil_dur, self._min_ipu_dur) return d + self._shift_start + self._shift_end
# -----------------------------------------------------------------------
[docs] def get_rms_stats(self): """Return min, max, mean, median, stdev of the RMS.""" vs = self.get_volstats() return [vs.min(), vs.max(), vs.mean(), vs.median(), vs.coefvariation()]
# ----------------------------------------------------------------------- # Silence/Speech segmentation # -----------------------------------------------------------------------
[docs] def get_tracks(self, time_domain=False): """Return a list of tuples (from,to) of tracks. (from,to) values are converted, or not, into the time-domain. The tracks are found from the current list of silences, which is firstly filtered with the min_sil_dur. This methods requires the following members to be fixed: - the volume threshold - the minimum duration for a silence, - the minimum duration for a track, - the duration to remove to the start boundary, - the duration to add to the end boundary. :param time_domain: (bool) Convert from/to values in seconds :returns: (list of tuples) with (from,to) of the tracks """ # Search for the silences, comparing each rms to the threshold self._auto_threshold = self.search_silences(self._vol_threshold) # Keep only silences during more than a given duration # remove silences first because we are interested in finding tracks # from sppas-4.1, the min sil value is taking into account the # future shift values applied to 'enlarge' the IPUs msd = self._min_sil_dur + self._shift_start + self._shift_end thr = self._auto_threshold // 2 self.filter_silences(thr, msd) # Get the (from_pos, to_pos) of the tracks during more than # a given duration and shift these values (from-start; to+end) tracks = self.extract_tracks(self._min_ipu_dur, self._shift_start, self._shift_end) # Convert the (from_pos, to_pos) of tracks into (from_time, to_time) if time_domain is True: time_tracks = [] for i, (from_pos, to_pos) in enumerate(tracks): f = float(from_pos) / float(self._channel.get_framerate()) t = float(to_pos) / float(self._channel.get_framerate()) time_tracks.append((f, t)) return time_tracks return tracks