SPPAS integration of the occ and rank estimator.
Module sppas.src.annotations
Class sppasLexMetric
Description
Constructor
Create a new sppasLexMetric instance.
Parameters
- log: (sppasLog) Human-readable logs.
View Source
def __init__(self, log=None):
"""Create a new sppasLexMetric instance.
:param log: (sppasLog) Human-readable logs.
"""
super(sppasLexMetric, self).__init__('lexmetric.json', log)
self._separators = ['#', '+', 'dummy']
Public functions
fix_options
Fix all options.
Parameters
- options: list of sppasOption instances
View Source
def fix_options(self, options):
"""Fix all options.
:param options: list of sppasOption instances
"""
for opt in options:
key = opt.get_key()
if 'alt' == key:
self.set_alt(opt.get_value())
elif 'tiername' == key:
self.set_tiername(opt.get_value())
elif 'separators' == key:
self.set_segments_separators(opt.get_value())
elif 'pattern' in key:
self._options[key] = opt.get_value()
else:
raise AnnotationOptionError(key)
set_alt
Fix the alt option, used to estimate occ and rank.
Parameters
- alt: (bool)
View Source
def set_alt(self, alt):
"""Fix the alt option, used to estimate occ and rank.
:param alt: (bool)
"""
self._options['alt'] = bool(alt)
set_tiername
Fix the tiername option.
Parameters
- tier_name: (str)
View Source
def set_tiername(self, tier_name):
"""Fix the tiername option.
:param tier_name: (str)
"""
self._options['tiername'] = sppasUnicode(tier_name).to_strip()
set_segments_separators
Fix the separators to create segments.
Parameters
- entry: (str) Entries separated by whitespace.
View Source
def set_segments_separators(self, entry):
"""Fix the separators to create segments.
:param entry: (str) Entries separated by whitespace.
"""
sp = sppasUnicode(entry)
tg = sp.to_strip()
if len(tg) > 0:
self._separators = tg.split()
else:
self._separators = list()
tier_to_segment_occ
Create segment intervals and eval the number of occurrences.
Parameters
- input_tier: (sppasTier)
Returns
- (sppasTier)
View Source
def tier_to_segment_occ(self, input_tier):
"""Create segment intervals and eval the number of occurrences.
:param input_tier: (sppasTier)
:returns: (sppasTier)
"""
if len(self._separators) > 0:
occ_ann = input_tier.export_to_intervals(self._separators)
else:
occ_ann = input_tier.copy()
occ_ann.gen_id()
occ_ann.set_name('LM-OccAnnInSegments')
occ_lab = occ_ann.copy()
occ_lab.gen_id()
occ_lab.set_name('LM-OccLabInSegments')
for tg1, tg2 in zip(occ_ann, occ_lab):
values_anns = input_tier.find(tg1.get_lowest_localization(), tg1.get_highest_localization())
tg1.set_labels([sppasLabel(sppasTag(str(len(values_anns)), 'int'))])
nbl = 0
for a in values_anns:
nbl += len(a.get_labels())
tg2.set_labels([sppasLabel(sppasTag(str(nbl), 'int'))])
return (occ_ann, occ_lab)
get_input_tier
Return the input tier from the inputs.
Parameters
- input_files: (list)
View Source
def get_input_tier(self, input_files):
"""Return the input tier from the inputs.
:param input_files: (list)
"""
for filename in input_files:
parser = sppasTrsRW(filename)
trs_input = parser.read()
tier_spk = trs_input.find(self._options['tiername'], case_sensitive=False)
if tier_spk is not None:
return tier_spk
logging.error("Tier with name '{:s}' not found in input file.".format(self._options['tiername']))
raise NoTierInputError
run
Run the automatic annotation process on an input.
Parameters
- input_files: (list of str) Time-aligned tokens, or other
- output: (str) the output file name
Returns
- (sppasTranscription)
View Source
def run(self, input_files, output=None):
"""Run the automatic annotation process on an input.
:param input_files: (list of str) Time-aligned tokens, or other
:param output: (str) the output file name
:returns: (sppasTranscription)
"""
tier = self.get_input_tier(input_files)
ocrk = OccRank(tier)
occ_tier = ocrk.occ()
rank_tier = ocrk.rank()
sgmt_occ_ann_tier, sgmt_occ_lab_tier = self.tier_to_segment_occ(tier)
trs_output = sppasTranscription(self.name)
trs_output.set_meta('token_lexmetric_result_of', input_files[0])
trs_output.append(occ_tier)
trs_output.append(rank_tier)
trs_output.append(sgmt_occ_ann_tier)
trs_output.append(sgmt_occ_lab_tier)
if output is not None:
if len(trs_output) > 0:
output_file = self.fix_out_file_ext(output)
parser = sppasTrsRW(output_file)
parser.write(trs_output)
return [output_file]
else:
raise EmptyOutputError
return trs_output
get_output_pattern
Pattern this annotation uses in an output filename.
View Source
def get_output_pattern(self):
"""Pattern this annotation uses in an output filename."""
return self._options.get('outputpattern', '-lexm')