Source code for annotations.TGA.timegroupanalysis
# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.annotations.TGA.timegroupanalysis.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: The time group analyzer partly re-implemented
.. _This file is part of SPPAS: <http://www.sppas.org/>
..
---------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
---------------------------------------------------------------------
"""
from sppas.src.calculus.stats import variability
from sppas.src.calculus.stats.linregress import tga_linear_regression
from sppas.src.calculus.stats.descriptivesstats import sppasDescriptiveStatistics
# ----------------------------------------------------------------------------
[docs]class TimeGroupAnalysis(sppasDescriptiveStatistics):
u"""Time Group Analyzer estimator class.
TGA: Time Group Analyzer is an online tool for speech annotation mining
written by Dafydd Gibbon (Universität Bielefeld).
See: <http://wwwhomes.uni-bielefeld.de/gibbon/TGA/>
This class estimates TGA on a set of data values, stored in a dictionary:
- key is the name of the time group;
- value is the list of durations of each segments in the time group.
>>> d = {'tg1':[1.0, 1.2, 3.2, 4.1] , 'tg2':[2.9, 3.3, 3.6, 5.8]}
>>> tga = TimeGroupAnalysis(d)
>>> total = tga.total()
>>> intercept, slope = tga.intercept_slope()
>>> print(slope['tg_1'])
>>> print(slope['tg_2'])
"""
[docs] def __init__(self, dict_items):
"""TGA - The Time Group Analyzer.
:param dict_items: (dict) a dict of a list of durations.
"""
super(TimeGroupAnalysis, self).__init__(dict_items)
# -----------------------------------------------------------------------
# Specific estimators for speech rythm analysis
# -----------------------------------------------------------------------
[docs] def rPVI(self):
"""Estimate the Raw Pairwise Variability Index of data values.
:returns: (dict) a dictionary of (key, nPVI) of float values
"""
return dict((key, variability.rPVI(values))
for key, values in self._items.items())
# -----------------------------------------------------------------------
[docs] def nPVI(self):
"""Estimate the Normalized Pairwise Variability Index of data values.
:returns: (dict) a dictionary of (key, nPVI) of float values
"""
return dict((key, variability.nPVI(values))
for key, values in self._items.items())
# -----------------------------------------------------------------------
[docs] def intercept_slope_original(self):
"""Estimate the intercept like the original TGA of data values.
Create the list of points (x,y) of each TG where:
- x is the position
- y is the duration
:returns: (dict) a dict of (key, (intercept,slope)) of float values
"""
lin_reg = list()
for key, values in self._items.items():
points = [(pos, dur) for pos, dur in enumerate(values)]
lin_reg.append((key, (tga_linear_regression(points))))
return dict(lin_reg)
# -----------------------------------------------------------------------
[docs] def intercept_slope(self):
"""Estimate the intercept like AnnotationPro of data values.
Create the list of points (x,y) of each TG where:
- x is the timestamps
- y is the duration
:returns: (dict) a dict of (key, (intercept, slope)) of float values
"""
lin_reg = list()
for key, values in self._items.items():
points = list()
timestamp = 0.
for duration in values:
points.append((timestamp, duration))
timestamp += duration
lin_reg.append((key, (tga_linear_regression(points))))
return dict(lin_reg)