Class to manage mapping tables.
A mapping is an extended replacement dictionary. sppasMapping is used for the management of a mapping table of any set of strings.
Class to manage mapping tables.
A mapping is an extended replacement dictionary. sppasMapping is used for the management of a mapping table of any set of strings.
Create a new sppasMapping instance.
def __init__(self, dict_name=None):
"""Create a new sppasMapping instance.
:param dict_name: (str) file name with the mapping data (2 columns)
"""
super(sppasMapping, self).__init__(dict_name, nodump=True)
self._keep_miss = True
self._reverse = False
self._miss_symbol = ''
Return the boolean value of reverse member.
def get_reverse(self):
"""Return the boolean value of reverse member."""
return self._reverse
Return the boolean value of reverse member.
def get_miss_symbol(self):
"""Return the boolean value of reverse member."""
return self._miss_symbol
Fix the keep_miss option.
def set_keep_miss(self, keep_miss):
"""Fix the keep_miss option.
:param keep_miss: (bool) If keep_miss is set to True, each missing
entry is kept without change; instead each missing entry is replaced
by a specific symbol.
"""
self._keep_miss = keep_miss
Fix the reverse option.
def set_reverse(self, reverse):
"""Fix the reverse option.
:param reverse: (bool) If replace is set to True, the mapping will
replace value by key instead of replacing key by value.
"""
self._reverse = reverse
Fix the symbol to be used if keep_miss is False.
def set_miss_symbol(self, symbol):
"""Fix the symbol to be used if keep_miss is False.
:param symbol: (str) US-ASCII symbol to be used in case of a symbol
is missing of the mapping table.
"""
self._miss_symbol = str(symbol)
Map an entry (a key or a value).
def map_entry(self, entry):
"""Map an entry (a key or a value).
:param entry: (str) input string to map
:returns: mapped entry is a string
"""
if self.is_empty() is True:
return entry
if self._reverse is False:
if self.is_key(entry):
return self.get(entry)
else:
s = self.replace_reversed(entry)
if len(s) > 0:
return s
if self._keep_miss is False:
return self._miss_symbol
return entry
Run the Mapping process on an input string.
def map(self, mstr, delimiters=DEFAULT_SEP, separator=''):
"""Run the Mapping process on an input string.
:param mstr: input string to map
:param delimiters: (list) list of character delimiters. Default is: [';', ',', ' ', '.', '|', '+', '-']
:param separator: (char) used to separate parts of the mapped result
(when longest matching algorithm was used to map a string)
:returns: a string
"""
if self.is_empty() is True:
return mstr
tab = []
if len(delimiters) > 0:
pattern = '|'.join(map(re.escape, delimiters))
pattern = '(' + pattern + ')\\s*'
tab = re.split(pattern, mstr)
else:
s = self._miss_symbol
k = self._keep_miss
self._miss_symbol = 'UNKNOWN'
self._keep_miss = False
i = 0
j = 0
maxi = len(mstr)
while i < maxi:
i = maxi
mapped = self.map_entry(mstr[j:i])
while mapped == self._miss_symbol and j < i - 1:
i -= 1
mapped = self.map_entry(mstr[j:i])
tab.append(mstr[j:i])
j = i
self._miss_symbol = s
self._keep_miss = k
map_tab = []
for v in tab:
if v in delimiters:
map_tab.append(v)
else:
mapped = self.map_entry(v)
if mapped == self._miss_symbol:
logging.debug('In {:s}, missing symbol {:s}. Mapped into {:s}.'.format(mstr, v, mapped))
map_tab.append(mapped)
return separator.join(map_tab)