Public functions
get_filename
    
Return the name of the file from which the vocab comes from.
    
    
    
View Source
    
def get_filename(self):
    """Return the name of the file from which the vocab comes from."""
    return self._filename
 
is_key
    
Return True if entry is exactly a key in the dictionary.
Parameters
- entry: (str) Unicode string.
    
View Source
    
def is_key(self, entry):
    """Return True if entry is exactly a key in the dictionary.
        :param entry: (str) Unicode string.
        """
    return u(entry) in self._dict
 
is_value
    
Return True if entry is a value in the dictionary.
Parameters
- entry: (str) Unicode string.
    
View Source
    
def is_value(self, entry):
    """Return True if entry is a value in the dictionary.
        :param entry: (str) Unicode string.
        """
    s = sppasDictRepl.format_token(entry)
    for v in self._dict.values():
        values = v.split(sppasDictRepl.REPLACE_SEPARATOR)
        for val in values:
            if val == s:
                return True
    return False
 
is_value_of
    
Return True if entry is a value of a given key in the dictionary.
Parameters
- key: (str) Unicode string.
- entry: (str) Unicode string.
    
View Source
    
def is_value_of(self, key, entry):
    """Return True if entry is a value of a given key in the dictionary.
        :param key: (str) Unicode string.
        :param entry: (str) Unicode string.
        """
    s = sppasDictRepl.format_token(entry)
    v = self.get(key, '')
    values = v.split(sppasDictRepl.REPLACE_SEPARATOR)
    for val in values:
        if val == s:
            return True
    return False
 
is_unk
    
Return True if entry is not a key in the dictionary.
Parameters
- entry: (str) Unicode string.
    
View Source
    
def is_unk(self, entry):
    """Return True if entry is not a key in the dictionary.
        :param entry: (str) Unicode string.
        """
    s = sppasDictRepl.format_token(entry)
    return s not in self._dict
 
is_empty
    
Return True if there is no entry in the dictionary.
    
    
    
View Source
    
def is_empty(self):
    """Return True if there is no entry in the dictionary."""
    return len(self._dict) == 0
 
get
    
Return the value of a key of the dictionary or substitution.
Parameters
- entry: (str) A token to find in the dictionary
- substitution: (str) String to return if token is missing of the dict
Returns
- unicode of the replacement or the substitution.
    
View Source
    
def get(self, entry, substitution=''):
    """Return the value of a key of the dictionary or substitution.
        :param entry: (str) A token to find in the dictionary
        :param substitution: (str) String to return if token is missing of the dict
        :returns: unicode of the replacement or the substitution.
        """
    s = sppasDictRepl.format_token(entry)
    return self._dict.get(s, substitution)
 
replace
    
Return the value of a key or None if key has no replacement.
Parameters
    
    
    
View Source
    
def replace(self, key):
    """Return the value of a key or None if key has no replacement."""
    return self.get(key)
 
replace_reversed
    
Return the key(s) of a value or an empty string.
Parameters
- value: (str) value to search
Returns
- a unicode string with all keys, separated by '_', or an empty string if value does not exists.
    
View Source
    
def replace_reversed(self, value):
    """Return the key(s) of a value or an empty string.
        :param value: (str) value to search
        :returns: a unicode string with all keys, separated by '_', or an empty string if value does not exists.
        """
    s = sppasDictRepl.format_token(value)
    keys = []
    for k, v in self._dict.items():
        values = v.split(sppasDictRepl.REPLACE_SEPARATOR)
        for val in values:
            if val == s:
                keys.append(k)
    if len(keys) == 0:
        return ''
    return sppasDictRepl.REPLACE_SEPARATOR.join(keys)
 
format_token
    
Remove the CR/LF, tabs, multiple spaces and others... and lower.
Parameters
Returns
    
    
    
View Source
    
@staticmethod
def format_token(entry):
    """Remove the CR/LF, tabs, multiple spaces and others... and lower.
        :param entry: (str) a token
        :returns: formatted token
        """
    return sppasUnicode(entry).to_strip()
 
add
    
Add a new key,value into the dict.
Add as a new pair or append the value to the existing one with
a "|" used as separator.
Parameters
- token: (str) string of the token to add
- repl: (str) the replacement token
Both token and repl are converted to unicode (if any) and strip.
    
    
    
View Source
    
def add(self, token, repl):
    """Add a new key,value into the dict.
        Add as a new pair or append the value to the existing one with
        a "|" used as separator.
        :param token: (str) string of the token to add
        :param repl: (str) the replacement token
        Both token and repl are converted to unicode (if any) and strip.
        """
    key = sppasDictRepl.format_token(token)
    value = sppasDictRepl.format_token(repl)
    if key in self._dict:
        if self.is_value_of(key, value) is False:
            value = '{0}|{1}'.format(self._dict.get(key), value)
    self._dict[key] = value
 
pop
    
Remove an entry, as key.
Parameters
- entry: (str) unicode string of the entry to remove
    
View Source
    
def pop(self, entry):
    """Remove an entry, as key.
        :param entry: (str) unicode string of the entry to remove
        """
    s = sppasDictRepl.format_token(entry)
    if s in self._dict:
        self._dict.pop(s)
 
remove
    
Remove an entry, as key or value.
Parameters
- entry: (str) unicode string of the entry to remove
    
View Source
    
def remove(self, entry):
    """Remove an entry, as key or value.
        :param entry: (str) unicode string of the entry to remove
        """
    s = sppasDictRepl.format_token(entry)
    to_pop = list()
    for k in self._dict.keys():
        if k == s or self.is_value_of(k, entry):
            to_pop.append(k)
    for k in to_pop:
        self._dict.pop(k)
 
load_from_ascii
    
Load a replacement dictionary from an ascii file.
Parameters
- filename: (str) Replacement dictionary file name
    
View Source
    
def load_from_ascii(self, filename):
    """Load a replacement dictionary from an ascii file.
        :param filename: (str) Replacement dictionary file name
        """
    with codecs.open(filename, 'r', sg.__encoding__) as fd:
        try:
            lines = fd.readlines()
        except UnicodeDecodeError:
            raise FileUnicodeError(filename=filename)
        fd.close()
    self._filename = filename
    for line in lines:
        line = ' '.join(line.split())
        if len(line) == 0:
            continue
        tab_line = line.split()
        if len(tab_line) < 2:
            continue
        key = tab_line[0]
        value = sppasDictRepl.REPLACE_SEPARATOR.join(tab_line[1:])
        self.add(key, value)
 
save_as_ascii
    
Save the replacement dictionary.
Parameters
Returns
    
    
    
View Source
    
def save_as_ascii(self, filename):
    """Save the replacement dictionary.
        :param filename: (str)
        :returns: (bool)
        """
    try:
        with codecs.open(filename, 'w', encoding=sg.__encoding__) as output:
            for entry, value in sorted(self._dict.items(), key=lambda x: x[0]):
                values = value.split(sppasDictRepl.REPLACE_SEPARATOR)
                for v in values:
                    output.write('{:s} {:s}\n'.format(entry, v.strip()))
    except Exception as e:
        logging.info('Saving file failed due to the following error: {:s}'.format(str(e)))
        return False
    return True