Public functions
get_filename
Return the name of the file from which the vocab comes from.
View Source
def get_filename(self):
"""Return the name of the file from which the vocab comes from."""
return self._filename
is_key
Return True if entry is exactly a key in the dictionary.
Parameters
- entry: (str) Unicode string.
View Source
def is_key(self, entry):
"""Return True if entry is exactly a key in the dictionary.
:param entry: (str) Unicode string.
"""
return u(entry) in self._dict
is_value
Return True if entry is a value in the dictionary.
Parameters
- entry: (str) Unicode string.
View Source
def is_value(self, entry):
"""Return True if entry is a value in the dictionary.
:param entry: (str) Unicode string.
"""
s = sppasDictRepl.format_token(entry)
for v in self._dict.values():
values = v.split(sppasDictRepl.REPLACE_SEPARATOR)
for val in values:
if val == s:
return True
return False
is_value_of
Return True if entry is a value of a given key in the dictionary.
Parameters
- key: (str) Unicode string.
- entry: (str) Unicode string.
View Source
def is_value_of(self, key, entry):
"""Return True if entry is a value of a given key in the dictionary.
:param key: (str) Unicode string.
:param entry: (str) Unicode string.
"""
s = sppasDictRepl.format_token(entry)
v = self.get(key, '')
values = v.split(sppasDictRepl.REPLACE_SEPARATOR)
for val in values:
if val == s:
return True
return False
is_unk
Return True if entry is not a key in the dictionary.
Parameters
- entry: (str) Unicode string.
View Source
def is_unk(self, entry):
"""Return True if entry is not a key in the dictionary.
:param entry: (str) Unicode string.
"""
s = sppasDictRepl.format_token(entry)
return s not in self._dict
is_empty
Return True if there is no entry in the dictionary.
View Source
def is_empty(self):
"""Return True if there is no entry in the dictionary."""
return len(self._dict) == 0
get
Return the value of a key of the dictionary or substitution.
Parameters
- entry: (str) A token to find in the dictionary
- substitution: (str) String to return if token is missing of the dict
Returns
- unicode of the replacement or the substitution.
View Source
def get(self, entry, substitution=''):
"""Return the value of a key of the dictionary or substitution.
:param entry: (str) A token to find in the dictionary
:param substitution: (str) String to return if token is missing of the dict
:returns: unicode of the replacement or the substitution.
"""
s = sppasDictRepl.format_token(entry)
return self._dict.get(s, substitution)
replace
Return the value of a key or None if key has no replacement.
Parameters
View Source
def replace(self, key):
"""Return the value of a key or None if key has no replacement."""
return self.get(key)
replace_reversed
Return the key(s) of a value or an empty string.
Parameters
- value: (str) value to search
Returns
- a unicode string with all keys, separated by '_', or an empty string if value does not exists.
View Source
def replace_reversed(self, value):
"""Return the key(s) of a value or an empty string.
:param value: (str) value to search
:returns: a unicode string with all keys, separated by '_', or an empty string if value does not exists.
"""
s = sppasDictRepl.format_token(value)
keys = []
for k, v in self._dict.items():
values = v.split(sppasDictRepl.REPLACE_SEPARATOR)
for val in values:
if val == s:
keys.append(k)
if len(keys) == 0:
return ''
return sppasDictRepl.REPLACE_SEPARATOR.join(keys)
format_token
Remove the CR/LF, tabs, multiple spaces and others... and lower.
Parameters
Returns
View Source
@staticmethod
def format_token(entry):
"""Remove the CR/LF, tabs, multiple spaces and others... and lower.
:param entry: (str) a token
:returns: formatted token
"""
return sppasUnicode(entry).to_strip()
add
Add a new key,value into the dict.
Add as a new pair or append the value to the existing one with
a "|" used as separator.
Parameters
- token: (str) string of the token to add
- repl: (str) the replacement token
Both token and repl are converted to unicode (if any) and strip.
View Source
def add(self, token, repl):
"""Add a new key,value into the dict.
Add as a new pair or append the value to the existing one with
a "|" used as separator.
:param token: (str) string of the token to add
:param repl: (str) the replacement token
Both token and repl are converted to unicode (if any) and strip.
"""
key = sppasDictRepl.format_token(token)
value = sppasDictRepl.format_token(repl)
if key in self._dict:
if self.is_value_of(key, value) is False:
value = '{0}|{1}'.format(self._dict.get(key), value)
self._dict[key] = value
pop
Remove an entry, as key.
Parameters
- entry: (str) unicode string of the entry to remove
View Source
def pop(self, entry):
"""Remove an entry, as key.
:param entry: (str) unicode string of the entry to remove
"""
s = sppasDictRepl.format_token(entry)
if s in self._dict:
self._dict.pop(s)
remove
Remove an entry, as key or value.
Parameters
- entry: (str) unicode string of the entry to remove
View Source
def remove(self, entry):
"""Remove an entry, as key or value.
:param entry: (str) unicode string of the entry to remove
"""
s = sppasDictRepl.format_token(entry)
to_pop = list()
for k in self._dict.keys():
if k == s or self.is_value_of(k, entry):
to_pop.append(k)
for k in to_pop:
self._dict.pop(k)
load_from_ascii
Load a replacement dictionary from an ascii file.
Parameters
- filename: (str) Replacement dictionary file name
View Source
def load_from_ascii(self, filename):
"""Load a replacement dictionary from an ascii file.
:param filename: (str) Replacement dictionary file name
"""
with codecs.open(filename, 'r', sg.__encoding__) as fd:
try:
lines = fd.readlines()
except UnicodeDecodeError:
raise FileUnicodeError(filename=filename)
fd.close()
self._filename = filename
for line in lines:
line = ' '.join(line.split())
if len(line) == 0:
continue
tab_line = line.split()
if len(tab_line) < 2:
continue
key = tab_line[0]
value = sppasDictRepl.REPLACE_SEPARATOR.join(tab_line[1:])
self.add(key, value)
save_as_ascii
Save the replacement dictionary.
Parameters
Returns
View Source
def save_as_ascii(self, filename):
"""Save the replacement dictionary.
:param filename: (str)
:returns: (bool)
"""
try:
with codecs.open(filename, 'w', encoding=sg.__encoding__) as output:
for entry, value in sorted(self._dict.items(), key=lambda x: x[0]):
values = value.split(sppasDictRepl.REPLACE_SEPARATOR)
for v in values:
output.write('{:s} {:s}\n'.format(entry, v.strip()))
except Exception as e:
logging.info('Saving file failed due to the following error: {:s}'.format(str(e)))
return False
return True