Source code for annotations.Align.models.slm.statlangmodel
"""
:filename: sppas.src.annotations.Align.models.slm.statlangmodel.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: Statistical language model representation and use.
.. _This file is part of SPPAS: http://www.sppas.org/
..
-------------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
-------------------------------------------------------------------------
"""
from ..modelsexc import ModelsDataTypeError
from .arpaio import sppasArpaIO
# ---------------------------------------------------------------------------
[docs]class sppasSLM(object):
"""Statistical language model representation.
"""
[docs] def __init__(self):
"""Create a sppasSLM instance without model."""
self.model = None
# -----------------------------------------------------------------------
[docs] def set(self, model):
"""Set the language model.
:param model: (list) List of lists of tuples for 1-gram, 2-grams, ...
"""
if not (isinstance(model, list) and
all([isinstance(m, list) for m in model])):
raise ModelsDataTypeError("slm",
"list of lists of tuples",
type(model))
self.model = model
# -----------------------------------------------------------------------
[docs] def load_from_arpa(self, filename):
"""Load the model from an ARPA-ASCII file.
:param filename: (str) Filename from which to read the model.
"""
arpa_io = sppasArpaIO()
self.model = arpa_io.load(filename)
# -----------------------------------------------------------------------
[docs] def save_as_arpa(self, filename):
"""Save the model into an ARPA-ASCII file.
:param filename: (str) Filename in which to write the model.
"""
arpa_io = sppasArpaIO()
arpa_io.set(self.model)
arpa_io.save(filename)
# -----------------------------------------------------------------------
[docs] def evaluate(self, filename):
"""Evaluate a model on a file (perplexity)."""
raise NotImplementedError("The method 'evaluate' of sppasSLM is "
"not implemented yet. Any help is welcome!")
# -----------------------------------------------------------------------
[docs] def interpolate(self, other):
"""Interpolate the model with another one.
An N-Gram language model can be constructed from a linear interpolation
of several models. In this case, the overall likelihood P(w|h) of a
word w occurring after the history h is computed as the arithmetic
average of P(w|h) for each of the models.
The default interpolation method is linear interpolation. In addition,
log-linear interpolation of models is possible.
:param other: (sppasSLM)
"""
raise NotImplementedError("The method 'interpolate' of sppasSLM is "
"not implemented yet. Any help is welcome!")