```
"""
:filename: sppas.src.calculus.infotheory.entropy.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: Entropy estimator.
from ..calculusexc import EmptyError, InsideIntervalError
from .utilit import log2
from .utilit import MAX_NGRAM
from .utilit import symbols_to_items
# ----------------------------------------------------------------------------
[docs]class sppasEntropy(object):
"""Entropy estimation.
Entropy is a measure of unpredictability of information content.
Entropy is one of several ways to measure diversity.
If we want to look at the entropy on a large series, we could also compute
the entropy for windows to measure the evenness or uncertainties.
By looking at the definition, one could predict the areas that have a
lot of variance would result in a higher entropy and the areas that have
lower variance would result in lower entropy.
"""
[docs] def __init__(self, symbols, n=1):
"""Create a sppasEntropy instance with a list of symbols.
:param symbols: (list) a vector of symbols of any type.
:param n: (int) n value for n-gram estimation. n ranges 1..MAX_NGRAM
"""
self._symbols = list()
self._ngram = 1
self.set_symbols(symbols)
self.set_ngram(n)
# -----------------------------------------------------------------------
[docs] def set_symbols(self, symbols):
"""Set the list of symbols.
:param symbols: (list) a vector of symbols of any type.
"""
if len(symbols) == 0:
raise EmptyError
self._symbols = symbols
# -----------------------------------------------------------------------
[docs] def set_ngram(self, n):
"""Set the n value of n-grams.
:param n: (int) n value for n-gram estimation. n ranges 1..8
"""
n = int(n)
if 0 < n <= MAX_NGRAM:
self._ngram = n
else:
raise InsideIntervalError(n, 1, MAX_NGRAM)
# -----------------------------------------------------------------------
[docs] def eval(self):
"""Estimate the Shannon entropy of a vector of symbols.
Shannon's entropy measures the information contained in a message as
opposed to the portion of the message that is determined
(or predictable).
:returns: (float) entropy value
"""
if len(self._symbols) == 0:
raise EmptyError
exr = symbols_to_items(self._symbols, self._ngram)
total = len(self._symbols) - self._ngram + 1
result = 0.
for symbol, occurrences in exr.items():
probability = 1.0 * occurrences / total
self_information = log2(1.0 / probability)
result += (probability * self_information)
return result
```