Class to represent a list of words.
Module sppas.src.resources
Class sppasVocabulary
Description
Constructor
Create a sppasVocabulary instance.
Parameters
- filename: (str) Name of the file with the list of words.
- nodump: (bool) Allows to disable the creation of a dump file.
- case_sensitive: (bool) the list of word is case-sensitive or not
View Source
def __init__(self, filename=None, nodump=False, case_sensitive=False):
"""Create a sppasVocabulary instance.
:param filename: (str) Name of the file with the list of words.
:param nodump: (bool) Allows to disable the creation of a dump file.
:param case_sensitive: (bool) the list of word is case-sensitive or not
"""
self.__entries = dict()
self.__case_sensitive = case_sensitive
self.__filename = ''
if filename is not None:
self.__filename = filename
dp = sppasDumpFile(filename)
data = dp.load_from_dump()
if data is None:
self.load_from_ascii(filename)
if nodump is False:
dp.save_as_dump(self.__entries)
else:
self.__entries = data
Public functions
get_filename
Return the name of the file from which the vocab comes from.
View Source
def get_filename(self):
"""Return the name of the file from which the vocab comes from."""
return self.__filename
add
Add an entry into the list except if the entry is already inside.
Parameters
- entry: (str) The entry to add in the word list
Returns
- (bool)
View Source
def add(self, entry):
"""Add an entry into the list except if the entry is already inside.
:param entry: (str) The entry to add in the word list
:returns: (bool)
"""
s = sppasUnicode(entry)
entry = s.to_strip()
if self.__case_sensitive is False:
s = sppasUnicode(entry)
entry = s.to_lower()
if entry not in self.__entries:
self.__entries[entry] = None
return True
return False
get_list
Return the list of entries, sorted in alpha-numeric order.
View Source
def get_list(self):
"""Return the list of entries, sorted in alpha-numeric order."""
return sorted(self.__entries.keys())
is_in
Return True if entry is in the list.
Parameters
- entry: (str)
View Source
def is_in(self, entry):
"""Return True if entry is in the list.
:param entry: (str)
"""
return entry in self.__entries
is_unk
Return True if entry is unknown (not in the list).
Parameters
- entry: (str)
View Source
def is_unk(self, entry):
"""Return True if entry is unknown (not in the list).
:param entry: (str)
"""
return entry not in self.__entries
clear
Remove all entries of the vocabulary.
View Source
def clear(self):
"""Remove all entries of the vocabulary."""
self.__entries = dict()
copy
Make a deep copy of the instance.
Returns
- sppasVocabulary
View Source
def copy(self):
"""Make a deep copy of the instance.
:returns: sppasVocabulary
"""
s = sppasVocabulary()
for i in self.__entries:
s.add(i)
return s
load_from_ascii
Read words from a file: one per line.
Parameters
- filename: (str)
View Source
def load_from_ascii(self, filename):
"""Read words from a file: one per line.
:param filename: (str)
"""
try:
with codecs.open(filename, 'r', sg.__encoding__) as fd:
self.__filename = filename
for nbl, line in enumerate(fd, 1):
try:
self.add(line)
except Exception:
raise FileFormatError(nbl, line)
fd.close()
except IOError:
raise FileIOError(filename)
except UnicodeDecodeError:
raise FileUnicodeError(filename)
save
Save the list of words in a file.
Parameters
filename(str)
Returns
- (bool)
View Source
def save(self, filename):
"""Save the list of words in a file.
:param filename (str)
:returns: (bool)
"""
try:
with codecs.open(filename, 'w', sg.__encoding__) as fd:
for word in sorted(self.__entries.keys()):
fd.write('{:s}\n'.format(word))
except Exception as e:
logging.info('Save file failed due to the following error: {:s}'.format(str(e)))
return False
return True
Overloads
__len__
View Source
def __len__(self):
return len(self.__entries)
__contains__
View Source
def __contains__(self, item):
return item in self.__entries
__iter__
View Source
def __iter__(self):
for a in self.__entries:
yield a