Estimate descriptive statistics of annotations of a tier.
Map a tier into a dictionary where:
- key is a tag
- value is the list of observed durations of this tag in annotations
Estimate descriptive statistics of annotations of a tier.
Map a tier into a dictionary where:
Create a new TierStats instance.
def __init__(self, tier=None, n=1, with_radius=0, with_alt=False):
"""Create a new TierStats instance.
:param tier: (either sppasTier or list of them)
:param n: (int) n-gram value
:param with_radius: (int) 0 to use Midpoint, negative value
to use R-, positive value to use R+
:param with_alt: (bool) Use or not use of alternative labels
"""
self.tier = tier
self._with_radius = with_radius
self._with_alt = with_alt
self._ngram = 1
self.set_ngram(n)
Returns the n-gram value.
def get_ngram(self):
"""Returns the n-gram value."""
return self._ngram
Returns how to use the radius in duration estimations.
0 means to use Midpoint, negative value means to use R-, and positive value means to use R+.
def get_with_radius(self):
"""Returns how to use the radius in duration estimations.
0 means to use Midpoint, negative value means to use R-, and
positive value means to use R+.
"""
return self._with_radius
Return if alternative labels will be used or not.
def get_with_alt(self):
"""Return if alternative labels will be used or not."""
return self._with_alt
Return the tier to estimate stats.
def get_tier(self):
"""Return the tier to estimate stats."""
return self.tier
Set the with_radius option, used to estimate the duration.
with_radius can take the following values:
def set_with_radius(self, with_radius):
"""Set the with_radius option, used to estimate the duration.
:param with_radius: (int) Fix the with_radius option
with_radius can take the following values:
- 0 means to use midpoint;
- negative value means to use (midpoint-radius);
- positive radius means to use (midpoint+radius).
"""
self._with_radius = int(with_radius)
Set the withalt option, used to select the labels.
def set_withalt(self, withalt):
"""Set the withalt option, used to select the labels.
- False means to use only the label with the higher score
of each annotation
- True means to use all labels of each annotation
"""
self._with_alt = withalt
Set the n value of the n-grams.
It is used to fix the history size (at least =1).
def set_ngram(self, n):
"""Set the n value of the n-grams.
It is used to fix the history size (at least =1).
"""
n = int(n)
if 0 < n <= MAX_NGRAM:
self._ngram = n
else:
raise InsideIntervalError(n, 1, MAX_NGRAM)
Create a DescriptiveStatistic object for the given tier.
def ds(self):
"""Create a DescriptiveStatistic object for the given tier.
:returns: (DescriptiveStatistic)
"""
ltup = self.__tiers_to_tuple()
ngrams = list()
for t in ltup:
ngrams.extend(self.__ngrams(t))
items = sppasTierStats.tuple_to_dict(ngrams)
return sppasDescriptiveStatistics(items)
Convert into a dictionary.
@staticmethod
def tuple_to_dict(items):
"""Convert into a dictionary.
:param items: (tuple) the ngram items
:returns: dictionary key=text, value=list of durations.
"""
d = dict()
for item in items:
dur = sum([i[1] for i in item])
text = ' '.join([i[0] for i in item])
if text not in d:
d[text] = []
d[text].append(dur)
return d
Return a list of tuples of label/duration pairs.
def __tiers_to_tuple(self):
"""Return a list of tuples of label/duration pairs."""
tiers = self.tier
if not isinstance(self.tier, list):
tiers = [self.tier]
return [self.__tier_to_tuple(tier) for tier in tiers]
Return a tuple of label/duration pairs for a given tier.
def __tier_to_tuple(self, tier):
"""Return a tuple of label/duration pairs for a given tier.
:param tier: (Tier)
:returns: tuple
"""
l = list()
for ann in tier:
labels = ann.get_labels()
if labels is None:
continue
if len(labels) == 0:
continue
contents = list()
for label in labels:
if self._with_alt is False:
contents.append(label.get_best())
else:
for tag, score in label:
if tag is not None:
contents.append(tag)
dur = 0.0
if len(labels) == 1:
duration = ann.get_location().get_best().duration()
dur = duration.get_value()
if self._with_radius < 0:
dur += duration.get_margin()
elif self._with_radius > 0:
dur -= duration.get_margin()
for tag in contents:
l.append((tag.get_content(), dur))
return tuple(l)
Yield a sequences of ngrams.
def __ngrams(self, items):
"""Yield a sequences of ngrams.
:param items: (tuple) the ngram items
"""
l = list()
size = len(items)
if size - self._ngram > 0:
limit = size - self._ngram + 1
for i in range(limit):
l.append(items[i:i + self._ngram])
return l