SPPAS integration of the automatic syllabification annotation.
Module sppas.src.annotations
Class sppasSyll
Description
Constructor
Create a new sppasSyll instance with only the general rules.
Log is used for a better communication of the annotation process and its results. If None, logs are redirected to the default logging system.
Parameters
- log: (sppasLog) Human-readable logs.
View Source
def __init__(self, log=None):
"""Create a new sppasSyll instance with only the general rules.
Log is used for a better communication of the annotation process and its
results. If None, logs are redirected to the default logging system.
:param log: (sppasLog) Human-readable logs.
"""
super(sppasSyll, self).__init__('syllabify.json', log)
self.__syllabifier = Syllabifier()
self.__lang = 'und'
Public functions
load_resources
Fix the syllabification rules from a configuration file.
Parameters
- config_filename: Name of the configuration file with the rules
- lang: (str) Iso639-3 of the language or "und" if unknown.
View Source
def load_resources(self, config_filename, lang='und', **kwargs):
"""Fix the syllabification rules from a configuration file.
:param config_filename: Name of the configuration file with the rules
:param lang: (str) Iso639-3 of the language or "und" if unknown.
"""
self.__lang = lang
self.__syllabifier = Syllabifier(config_filename)
fix_options
Fix all options.
Available options are:
- usesintervals
- usesphons
- tiername
- createclasses
- createstructures
Parameters
- options: (sppasOption)
View Source
def fix_options(self, options):
"""Fix all options.
Available options are:
- usesintervals
- usesphons
- tiername
- createclasses
- createstructures
:param options: (sppasOption)
"""
for opt in options:
key = opt.get_key()
if 'usesintervals' == key:
self.set_usesintervals(opt.get_value())
elif 'usesphons' == key:
self.set_usesphons(opt.get_value())
elif 'tiername' == key:
self.set_tiername(opt.get_value())
elif 'createclasses' == key:
self.set_create_tier_classes(opt.get_value())
elif 'createstructs' == key:
self.set_create_tier_structures(opt.get_value())
elif 'pattern' in key:
self._options[key] = opt.get_value()
else:
raise AnnotationOptionError(key)
set_usesintervals
Fix the usesintervals option.
Parameters
- mode: (bool) If mode is set to True, the syllabification operates inside specific (given) intervals.
View Source
def set_usesintervals(self, mode):
"""Fix the usesintervals option.
:param mode: (bool) If mode is set to True, the syllabification
operates inside specific (given) intervals.
"""
self._options['usesintervals'] = mode
set_usesphons
Fix the usesphons option.
Parameters
- mode: (str) If mode is set to True, the syllabification operates by using only tier with phonemes.
View Source
def set_usesphons(self, mode):
"""Fix the usesphons option.
:param mode: (str) If mode is set to True, the syllabification operates
by using only tier with phonemes.
"""
self._options['usesphons'] = mode
set_create_tier_classes
Fix the createclasses option.
Parameters
- create: (bool)
View Source
def set_create_tier_classes(self, create=True):
"""Fix the createclasses option.
:param create: (bool)
"""
self._options['createclasses'] = bool(create)
set_create_tier_structures
Fix the createstucts option.
Parameters
- create: (bool)
View Source
def set_create_tier_structures(self, create=True):
"""Fix the createstucts option.
:param create: (bool)
"""
self._options['createstructs'] = bool(create)
set_tiername
Fix the tiername option.
Parameters
- tier_name: (str)
View Source
def set_tiername(self, tier_name):
"""Fix the tiername option.
:param tier_name: (str)
"""
self._options['tiername'] = sppasUnicode(tier_name).to_strip()
convert
Syllabify labels of a time-aligned phones tier.
Parameters
- phonemes: (sppasTier) time-aligned phonemes tier
- intervals: (sppasTier)
Returns
- (sppasTier)
View Source
def convert(self, phonemes, intervals=None):
"""Syllabify labels of a time-aligned phones tier.
:param phonemes: (sppasTier) time-aligned phonemes tier
:param intervals: (sppasTier)
:returns: (sppasTier)
"""
if intervals is None:
intervals = sppasSyll._phon_to_intervals(phonemes)
syllables = sppasTier('SyllAlign')
syllables.set_meta('syllabification_of_tier', phonemes.get_name())
syllables.set_meta('linguistic_resource_rules', self.__syllabifier.get_rules_filename())
for interval in intervals:
start_phon_idx = phonemes.lindex(interval.get_lowest_localization())
if start_phon_idx == -1:
start_phon_idx = phonemes.mindex(interval.get_lowest_localization(), bound=-1)
end_phon_idx = phonemes.rindex(interval.get_highest_localization())
if end_phon_idx == -1:
end_phon_idx = phonemes.mindex(interval.get_highest_localization(), bound=1)
if start_phon_idx != -1 and end_phon_idx != -1:
self.syllabify_interval(phonemes, start_phon_idx, end_phon_idx, syllables)
else:
self.logfile.print_message(info(1224, 'annotations').format(interval), indent=2, status=annots.warning)
return syllables
make_classes
Create the tier with syllable classes.
Parameters
- syllables: (sppasTier)
View Source
def make_classes(self, syllables):
"""Create the tier with syllable classes.
:param syllables: (sppasTier)
"""
classes = sppasTier('SyllClassAlign')
classes.set_meta('classes_of_tier', syllables.get_name())
for syll in syllables:
location = syll.get_location().copy()
syll_tag = syll.get_best_tag()
class_tag = sppasTag(self.__syllabifier.classes_phonetized(syll_tag.get_typed_content()))
classes.create_annotation(location, sppasLabel(class_tag))
return classes
make_structures
Create the tier with syllable structures.
Parameters
- syllables: (sppasTier)
View Source
def make_structures(self, syllables):
"""Create the tier with syllable structures.
:param syllables: (sppasTier)
"""
classes = sppasTier('SyllStructsAlign')
classes.set_meta('structures_of_tier', syllables.get_name())
for syll in syllables:
location = syll.get_location().copy()
syll_tag = syll.get_best_tag()
class_tag = sppasTag(self.__syllabifier.structures_phonetized(syll_tag.get_typed_content()))
classes.create_annotation(location, sppasLabel(class_tag))
return classes
syllabify_interval
Perform the syllabification of one interval.
Parameters
- phonemes: (sppasTier)
- from_p: (int) index of the first phoneme to be syllabified
- to_p: (int) index of the last phoneme to be syllabified
- syllables: (sppasTier)
View Source
def syllabify_interval(self, phonemes, from_p, to_p, syllables):
"""Perform the syllabification of one interval.
:param phonemes: (sppasTier)
:param from_p: (int) index of the first phoneme to be syllabified
:param to_p: (int) index of the last phoneme to be syllabified
:param syllables: (sppasTier)
"""
p = list()
for ann in phonemes[from_p:to_p + 1]:
tag = ann.get_best_tag()
p.append(tag.get_typed_content())
s = self.__syllabifier.annotate(p)
for i, syll in enumerate(s):
start_idx, end_idx = syll
begin = phonemes[start_idx + from_p].get_lowest_localization().copy()
end = phonemes[end_idx + from_p].get_highest_localization().copy()
location = sppasLocation(sppasInterval(begin, end))
syll_string = Syllabifier.phonetize_syllables(p, [syll])
label = sppasLabel(sppasTag(syll_string))
syllables.create_annotation(location, label)
get_inputs
Return the the tier with aligned tokens.
Parameters
- input_files: (list)
Raises
NoTierInputError
Returns
- (sppasTier)
View Source
def get_inputs(self, input_files):
"""Return the the tier with aligned tokens.
:param input_files: (list)
:raise: NoTierInputError
:return: (sppasTier)
"""
tier = None
intervals = None
for filename in input_files:
if filename is None:
continue
parser = sppasTrsRW(filename)
trs_input = parser.read()
if intervals is None:
intervals = trs_input.find(self._options['tiername'])
if tier is None:
tier = sppasFindTier.aligned_phones(trs_input)
if tier is None:
logging.error('A tier with a time-aligned phonemes was not found.')
raise NoTierInputError
return (tier, intervals)
run
Run the automatic annotation process on an input.
Parameters
- input_files: (list of str) Time-aligned phonemes
- output: (str) the output file name
Returns
- (sppasTranscription)
View Source
def run(self, input_files, output=None):
"""Run the automatic annotation process on an input.
:param input_files: (list of str) Time-aligned phonemes
:param output: (str) the output file name
:returns: (sppasTranscription)
"""
tier_input, intervals = self.get_inputs(input_files)
trs_output = sppasTranscription(self.name)
trs_output.set_meta('annotation_result_of', input_files[0])
trs_output.set_meta('language_iso', 'iso639-3')
trs_output.set_meta('language_name_0', 'Undetermined')
if len(self.__lang) == 3:
trs_output.set_meta('language_code_0', self.__lang)
trs_output.set_meta('language_url_0', 'https://iso639-3.sil.org/code/' + self.__lang)
else:
trs_output.set_meta('language_code_0', 'und')
trs_output.set_meta('language_url_0', 'https://iso639-3.sil.org/code/und')
if self._options['usesphons'] is True:
tier_syll = self.convert(tier_input)
trs_output.append(tier_syll)
if self._options['createclasses']:
trs_output.append(self.make_classes(tier_syll))
if self._options['createstructs']:
trs_output.append(self.make_structures(tier_syll))
if self._options['usesintervals'] is True:
if intervals is None:
self.logfile.print_message(info(1264, 'annotations').format(tiername=self._options['tiername']), indent=2, status=annots.warning)
else:
tier_syll_int = self.convert(tier_input, intervals)
tier_syll_int.set_name('SyllAlign-Intervals')
tier_syll_int.set_meta('syllabification_used_intervals', intervals.get_name())
trs_output.append(tier_syll_int)
if self._options['createclasses']:
t = self.make_classes(tier_syll_int)
t.set_name('SyllClassAlign-Intervals')
trs_output.append(t)
if self._options['createstructs']:
t = self.make_structures(tier_syll_int)
t.set_name('SyllStructsAlign-Intervals')
trs_output.append(t)
if output is not None:
if len(trs_output) > 0:
output_file = self.fix_out_file_ext(output)
parser = sppasTrsRW(output_file)
parser.write(trs_output)
return [output_file]
else:
raise EmptyOutputError
return trs_output
get_output_pattern
Pattern this annotation uses in an output filename.
View Source
def get_output_pattern(self):
"""Pattern this annotation uses in an output filename."""
return self._options.get('outputpattern', '-syll')
get_input_pattern
Pattern this annotation expects for its input filename.
View Source
def get_input_pattern(self):
"""Pattern this annotation expects for its input filename."""
return self._options.get('inputpattern', '-palign')
Private functions
_phon_to_intervals
Create the intervals to be syllabified.
we could use symbols.phone only, but for backward compatibility we hardly add the symbols we previously used into SPPAS.
Parameters
- phonemes
View Source
@staticmethod
def _phon_to_intervals(phonemes):
"""Create the intervals to be syllabified.
we could use symbols.phone only, but for backward compatibility
we hardly add the symbols we previously used into SPPAS.
"""
stop = list(symbols.phone.keys())
stop.append('#')
stop.append('@@')
stop.append('+')
stop.append('gb')
stop.append('lg')
return phonemes.export_to_intervals(stop)