Source code for annotations.Intsint.intsint
# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.annotations.Intsint.intsint.py
:author: Tatsuya Watanabe, Brigitte Bigi
:contact: develop@sppas.org
:summary: Algorithm of INTSINT automatic annotation.
.. _This file is part of SPPAS: http://www.sppas.org/
..
-------------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
-------------------------------------------------------------------------
"""
import math
from ..annotationsexc import SmallSizeInputError
# ----------------------------------------------------------------------------
BIG_NUMBER = 32764
# ----------------------------------------------------------------------------
[docs]def octave(value):
return math.log(value) / math.log(2)
# ----------------------------------------------------------------------------
[docs]def linear(value):
return 2 ** value
# -------------------------------------------------------------------
[docs]class Intsint(object):
"""Provide optimal INTSINT coding for anchor points.
"""
# parameters for data checking.
MIN_F0 = 60 # (Hz)
MAX_F0 = 600 # (Hz)
# parameters for optimization.
MIN_PAUSE = 0.5 # seconds
MIN_RANGE = 0.5 # octaves
MAX_RANGE = 2.5 # octaves
STEP_RANGE = 0.1 # octaves
MEAN_SHIFT = 50 # (Hz)
STEP_SHIFT = 1 # (Hz)
# parameters for target estimation.
HIGHER = 0.5
LOWER = 0.5
UP = 0.25
DOWN = 0.25
# List of "absolute" tones
TONES_ABSOLUTE = ['T', 'M', 'B']
# List of "relative" tones
TONES_RELATIVE = ['H', 'L', 'U', 'D', 'S']
# All tones
TONES = TONES_ABSOLUTE + TONES_RELATIVE
# -------------------------------------------------------------------
[docs] def __init__(self):
"""Create a new Intsint instance."""
self.best_intsint = None
self.best_estimate = None
self.intsint = []
self.estimates = []
self.targets = []
self.time = []
self.mid = 0
self.top = 0
self.bottom = 0
self.last_estimate = 0
self.best_mid = 0
self.best_range = 0
self.min_mean = 0
self.max_mean = 0
self.min_ss_error = 0
# -------------------------------------------------------------------
[docs] def reset(self):
"""Fix all member to their initial value."""
self.best_intsint = None
self.best_estimate = None
self.intsint = []
self.estimates = []
self.targets = []
self.time = []
self.mid = 0
self.top = 0
self.bottom = 0
self.last_estimate = 0
self.best_mid = 0
self.best_range = 0
self.min_mean = 0
self.max_mean = 0
self.min_ss_error = 0
# -------------------------------------------------------------------
[docs] def adjust_f0(self, f0):
"""Return F0 value within self range of values.
:param f0: (float) Input pitch value.
:returns: (float) Normalized pitch value.
"""
if f0 < Intsint.MIN_F0:
return Intsint.MIN_F0
if f0 > Intsint.MAX_F0:
return Intsint.MAX_F0
return f0
# -------------------------------------------------------------------
[docs] def init(self, momel_anchors):
"""Initialize INTSINT attributes from a list of targets.
:param momel_anchors: (list of tuple) List of time
(in seconds) and anchors (Hz).
"""
self.reset()
for (time, target) in momel_anchors:
# Convert f0 to octave scale
self.targets.append(octave(self.adjust_f0(target)))
self.time.append(time)
self.intsint = [""]*len(self.targets)
self.estimates = [0]*len(self.targets)
sum_octave = sum(self.targets)
mean_f0 = float(sum_octave) / float(len(self.targets))
linear_mean_f0 = round(linear(mean_f0))
self.min_mean = linear_mean_f0 - Intsint.MEAN_SHIFT
self.max_mean = linear_mean_f0 + Intsint.MEAN_SHIFT
self.min_ss_error = BIG_NUMBER
# -------------------------------------------------------------------
[docs] def optimise(self, mid, _range):
"""Fix tones.
:param mid:
:param _range:
"""
self.top = mid + _range / 2
self.bottom = mid - _range / 2
f0 = self.targets[0]
if self.top - f0 < math.fabs(f0 - mid):
self.intsint[0] = "T"
elif f0 - self.bottom < math.fabs(f0 - mid):
self.intsint[0] = "B"
else:
self.intsint[0] = "M"
estimated = self.estimate(self.intsint[0], self.last_estimate)
self.estimates[0] = estimated
error = math.fabs(estimated - self.targets[0])
ss_error = error * error
self.last_estimate = estimated
for i in range(1, len(self.targets)):
target = self.targets[i]
# after pause choose from (MTB)
if self.time[i] - self.time[i - 1] > Intsint.MIN_PAUSE:
if self.top - target < math.fabs(target - mid):
self.intsint[i] = "T"
elif target - self.bottom < math.fabs(target - mid):
self.intsint[i] = "B"
else:
self.intsint[i] = "M"
# elsewhere any tone except M
else:
min_difference = BIG_NUMBER
best_tone = ""
for tone in Intsint.TONES:
if tone != "M":
estimate = self.estimate(tone, self.last_estimate)
difference = math.fabs(target - estimate)
if difference < min_difference:
min_difference = difference
best_tone = tone
self.intsint[i] = best_tone
estimate = self.estimate(self.intsint[i], self.last_estimate)
self.estimates[i] = estimate
error = math.fabs(estimate - self.targets[i])
ss_error += error * error
self.last_estimate = estimate
if ss_error < self.min_ss_error:
self.min_ss_error = ss_error
self.best_range = _range
self.best_mid = mid
self.best_intsint = self.intsint[:]
self.best_estimate = self.estimates[:]
# -------------------------------------------------------------------
[docs] def estimate(self, tone, last_anchor):
"""Estimate f0 from current tone and last target.
:param tone:
:param last_anchor:
"""
estimated = ""
if tone == "M":
estimated = self.mid
elif tone == "S":
estimated = last_anchor
elif tone == "T":
estimated = self.top
elif tone == "H":
estimated = last_anchor + \
(self.top - last_anchor) * Intsint.HIGHER
elif tone == "U":
estimated = last_anchor + \
(self.top - last_anchor) * Intsint.UP
elif tone == "B":
estimated = self.bottom
elif tone == "L":
estimated = last_anchor - \
(last_anchor - self.bottom) * Intsint.LOWER
elif tone == "D":
estimated = last_anchor - \
(last_anchor - self.bottom) * Intsint.DOWN
return estimated
# -------------------------------------------------------------------
[docs] def recode(self):
"""Recode within the parameters space.
mean +/- 50 Hz for key and [0.5..2.5 octaves] for range.
"""
_range = Intsint.MIN_RANGE
while _range < Intsint.MAX_RANGE:
lm = self.min_mean
while lm < self.max_mean:
self.mid = octave(lm)
self.optimise(self.mid, _range)
lm += Intsint.STEP_SHIFT
_range += Intsint.STEP_RANGE
# -------------------------------------------------------------------
[docs] def annotate(self, momel_anchors):
"""Provide optimal INTSINT coding for sequence of target points.
:param momel_anchors: (list of tuple) List of time (in seconds)
and anchors (Hz).
"""
if len(momel_anchors) < 2:
raise SmallSizeInputError(2)
self.init(momel_anchors)
self.recode()
return self.best_intsint