Source code for calculus.infotheory.utilit
# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.calculus.infotheory.utilit.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: Utilities for the information theory package.
.. _This file is part of SPPAS: http://www.sppas.org/
..
-------------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
-------------------------------------------------------------------------
"""
import math
MAX_NGRAM = 8
# ----------------------------------------------------------------------------
[docs]def log2(x):
"""Estimate log in base 2.
:param x: (int, float) value
:returns: (float)
"""
x = float(x)
return math.log(x)/math.log(2)
# ----------------------------------------------------------------------------
[docs]def find_ngrams(symbols, ngram):
"""Return a list of n-grams from a list of symbols.
:param symbols: (list)
:param ngram: (int) n value for the ngrams
:returns: list of tuples
Example:
>>>symbols=[0,1,0,1,1,1,0]
>>>print(find_ngrams(symbols, 2))
>>>[(0, 1), (1, 0), (0, 1), (1, 1), (1, 1), (1, 0)]
"""
return zip(*[symbols[i:] for i in range(ngram)])
# ----------------------------------------------------------------------------
[docs]def symbols_to_items(symbols, ngram):
"""Convert a list of symbols into a dictionary of items.
Example:
>>>symbols=[0, 1, 0, 1, 1, 1, 0]
>>>print symbols_to_items(symbols,2)
>>>{(0, 1): 2, (1, 0): 2, (1, 1): 2}
:returns: dictionary with key=tuple of symbols, value=number of occurrences
"""
nsymbols = find_ngrams(symbols, ngram)
exr = dict()
for each in nsymbols:
v = 1 + exr.get(each, 0)
exr[each] = v
return exr