Source code for calculus.stats.variability
# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.calculus.stats.variability.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: variance estimators.
.. _This file is part of SPPAS: http://www.sppas.org/
..
-------------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
-------------------------------------------------------------------------
"""
import math
from .central import fmean
from .central import fsum
# ----------------------------------------------------------------------------
[docs]def lunbiasedvariance(items):
"""Calculate the unbiased sample variance of the data values, for a sample.
It means that the estimation is using N-1 for the denominator.
The variance is a measure of dispersion near the mean.
:param items: (list) list of data values
:returns: (float)
"""
if len(items) < 2:
return 0.
mn = fmean(items)
return fsum(pow(i-mn, 2) for i in items) / (len(items)-1)
# ----------------------------------------------------------------------------
[docs]def lvariance(items):
"""Calculate the variance of the data values, for a population.
It means that the estimation is using N for the denominator.
The variance is a measure of dispersion near the mean.
:param items: (list) list of data values
:returns: (float)
"""
if len(items) < 2:
return 0.
mn = fmean(items)
return fsum(pow(i-mn, 2) for i in items) / (len(items))
# ----------------------------------------------------------------------------
[docs]def lunbiasedstdev(items):
"""Calculate the standard deviation of the data values, for a sample.
The standard deviation is the positive square root of the variance.
:param items: (list) list of data values
:returns: (float)
"""
if len(items) < 2:
return 0.
return math.sqrt(lunbiasedvariance(items))
# ----------------------------------------------------------------------------
[docs]def lstdev(items):
"""Calculate the standard deviation of the data values, for a population.
The standard deviation is the positive square root of the variance.
:param items: (list) list of data values
:returns: (float)
"""
if len(items) < 2:
return 0.
return math.sqrt(lvariance(items))
# ----------------------------------------------------------------------------
[docs]def lsterr(items):
"""Calculate the standard error of the data values.
:param items: (list) list of data values
:returns: (float)
"""
return lstdev(items) / float(math.sqrt(len(items)))
# ----------------------------------------------------------------------------
[docs]def lz(items, score):
"""Calculate the z-score for a given input score.
given that score and the data values from which that score came.
The z-score determines the relative location of a data value.
:param items: (list) list of data values
:param score: (float) a score of any items
:returns: (float)
"""
if len(items) < 2:
return 0.
return (score - fmean(items)) / lstdev(items)
# ----------------------------------------------------------------------------
[docs]def lzs(items):
"""Calculate a list of z-scores, one for each score in the data values.
:param items: (list) list of data values
:returns: (list)
"""
return [lz(items, i) for i in items]
# ----------------------------------------------------------------------------
[docs]def rPVI(items):
"""Calculate the Raw Pairwise Variability Index.
:param items: (list) list of data values
:returns: (float)
"""
if len(items) < 2:
return 0.
n = len(items) - 1
sumd = fsum([math.fabs(items[i]-items[i+1]) for i in range(n)])
return sumd / n
# ----------------------------------------------------------------------------
[docs]def nPVI(items):
"""Calculate the Normalized Pairwise Variability Index.
:param items: (list) list of data values
:returns: (float)
"""
if len(items) < 2:
return 0.
n = len(items) - 1
sumd = 0.
for i in range(n):
d1 = items[i]
d2 = items[i+1]
delta = math.fabs(d1 - d2)
meand = (d1 + d2) / 2.
sumd += delta / meand
return 100. * sumd / n