SPPAS 4.22

https://sppas.org/

Module sppas.src.resources

Class sppasVocabulary

Description

Class to represent a list of words.

Constructor

Create a sppasVocabulary instance.

Parameters
  • filename: (str) Name of the file with the list of words.
  • nodump: (bool) Allows to disable the creation of a dump file.
  • case_sensitive: (bool) the list of word is case-sensitive or not
View Source
def __init__(self, filename=None, nodump=False, case_sensitive=False):
    """Create a sppasVocabulary instance.

    :param filename: (str) Name of the file with the list of words.
    :param nodump: (bool) Allows to disable the creation of a dump file.
    :param case_sensitive: (bool) the list of word is case-sensitive or not

    """
    self.__entries = dict()
    self.__case_sensitive = case_sensitive
    self.__filename = ''
    if filename is not None:
        self.__filename = filename
        dp = sppasDumpFile(filename)
        data = dp.load_from_dump()
        if data is None:
            self.load_from_ascii(filename)
            if nodump is False:
                dp.save_as_dump(self.__entries)
        else:
            self.__entries = data

Public functions

get_filename

Return the name of the file from which the vocab comes from.

View Source
def get_filename(self):
    """Return the name of the file from which the vocab comes from."""
    return self.__filename

add

Add an entry into the list except if the entry is already inside.

Parameters
  • entry: (str) The entry to add in the word list
Returns
  • (bool)
View Source
def add(self, entry):
    """Add an entry into the list except if the entry is already inside.

        :param entry: (str) The entry to add in the word list
        :returns: (bool)

        """
    s = sppasUnicode(entry)
    entry = s.to_strip()
    if self.__case_sensitive is False:
        s = sppasUnicode(entry)
        entry = s.to_lower()
    if entry not in self.__entries:
        self.__entries[entry] = None
        return True
    return False

get_list

Return the list of entries, sorted in alpha-numeric order.

View Source
def get_list(self):
    """Return the list of entries, sorted in alpha-numeric order."""
    return sorted(self.__entries.keys())

is_in

Return True if entry is in the list.

Parameters
  • entry: (str)
View Source
def is_in(self, entry):
    """Return True if entry is in the list.

        :param entry: (str)

        """
    return entry in self.__entries

is_unk

Return True if entry is unknown (not in the list).

Parameters
  • entry: (str)
View Source
def is_unk(self, entry):
    """Return True if entry is unknown (not in the list).

        :param entry: (str)

        """
    return entry not in self.__entries

clear

Remove all entries of the vocabulary.

View Source
def clear(self):
    """Remove all entries of the vocabulary."""
    self.__entries = dict()

copy

Make a deep copy of the instance.

Returns
  • sppasVocabulary
View Source
def copy(self):
    """Make a deep copy of the instance.

        :returns: sppasVocabulary

        """
    s = sppasVocabulary()
    for i in self.__entries:
        s.add(i)
    return s

load_from_ascii

Read words from a file: one per line.

Parameters
  • filename: (str)
View Source
def load_from_ascii(self, filename):
    """Read words from a file: one per line.

        :param filename: (str)

        """
    try:
        with codecs.open(filename, 'r', sg.__encoding__) as fd:
            self.__filename = filename
            for nbl, line in enumerate(fd, 1):
                try:
                    self.add(line)
                except Exception:
                    raise FileFormatError(nbl, line)
            fd.close()
    except IOError:
        raise FileIOError(filename)
    except UnicodeDecodeError:
        raise FileUnicodeError(filename)

save

Save the list of words in a file.

Parameters

filename(str)

Returns
  • (bool)
View Source
def save(self, filename):
    """Save the list of words in a file.

        :param filename (str)
        :returns: (bool)

        """
    try:
        with codecs.open(filename, 'w', sg.__encoding__) as fd:
            for word in sorted(self.__entries.keys()):
                fd.write('{:s}\n'.format(word))
    except Exception as e:
        logging.info('Save file failed due to the following error: {:s}'.format(str(e)))
        return False
    return True

Overloads

__len__

View Source
def __len__(self):
    return len(self.__entries)

__contains__

View Source
def __contains__(self, item):
    return item in self.__entries

__iter__

View Source
def __iter__(self):
    for a in self.__entries:
        yield a