SPPAS integration of the automatic re-occurrences annotation.
Module sppas.src.annotations
Class sppasReOcc
Description
Constructor
Create a new sppasReOcc instance with only the general rules.
Parameters
- log: (sppasLog) Human-readable logs.
View Source
def __init__(self, log=None):
"""Create a new sppasReOcc instance with only the general rules.
:param log: (sppasLog) Human-readable logs.
"""
super(sppasReOcc, self).__init__('reoccurrences.json', log)
self.__reocc = ReOccurences()
self.max_span = 20
Public functions
fix_options
Fix all options.
Available options are:
Parameters
- options: (sppasOption)
View Source
def fix_options(self, options):
"""Fix all options.
Available options are:
:param options: (sppasOption)
"""
for opt in options:
key = opt.get_key()
if 'tiername' == key:
self.set_tiername(opt.get_value())
elif 'span' == key:
self.set_span(opt.get_value())
elif 'pattern' in key:
self._options[key] = opt.get_value()
else:
raise AnnotationOptionError(key)
set_tiername
Fix the tiername option.
Parameters
- tier_name: (str)
View Source
def set_tiername(self, tier_name):
"""Fix the tiername option.
:param tier_name: (str)
"""
self._options['tiername'] = sppasUnicode(tier_name).to_strip()
set_span
Fix the span option.
Span is the maximum number of annotations to search for re-occ. A value of 1 means to search only in the next annotation.
Parameters
- span: (int) Value between 1 and 20
View Source
def set_span(self, span):
"""Fix the span option.
Span is the maximum number of annotations to search for re-occ.
A value of 1 means to search only in the next annotation.
:param span: (int) Value between 1 and 20
"""
span = int(span)
if 0 < span <= self.max_span:
self._options['span'] = span
else:
raise IndexRangeException(span, 0, self.max_span)
detection
Search for the re-occurrences of annotations.
Parameters
- tier_spk1: (sppasTier)
- tier_spk2: (sppasTier)
View Source
def detection(self, tier_spk1, tier_spk2):
"""Search for the re-occurrences of annotations.
:param tier_spk1: (sppasTier)
:param tier_spk2: (sppasTier)
"""
annset = sppasAnnReOccSet()
if tier_spk1.is_float():
tier_spk1.set_radius(0.04)
if tier_spk1.is_float():
tier_spk2.set_radius(0.04)
end_loc = tier_spk2[-1].get_highest_localization()
for ann1 in tier_spk1:
cur_loc = ann1.get_highest_localization()
all_anns2 = tier_spk2.find(cur_loc, end_loc, overlaps=False)
window_size = min(len(all_anns2), self._options['span'])
anns2 = all_anns2[:window_size]
reoccs = self.__reocc.eval(ann1, anns2)
if len(reoccs) > 0:
annset.append(ann1, reoccs)
return annset.to_tier()
get_inputs
Return 2 tiers with name given in options.
Parameters
- input_files: (list)
Raises
NoTierInputError
Returns
- (sppasTier)
View Source
def get_inputs(self, input_files):
"""Return 2 tiers with name given in options.
:param input_files: (list)
:raise: NoTierInputError
:return: (sppasTier)
"""
if len(input_files) != 2:
raise Exception('Invalid format of input files.')
tier_src = None
for filename in input_files[0]:
parser = sppasTrsRW(filename)
trs_input = parser.read()
if tier_src is None:
tier_src = trs_input.find(self._options['tiername'], case_sensitive=False)
if tier_src is None:
logging.error('A source tier with time-aligned items was expected but not found.')
raise NoTierInputError
tier_echo = None
for filename in input_files[1]:
parser = sppasTrsRW(filename)
trs_input = parser.read()
if tier_echo is None:
tier_echo = trs_input.find(self._options['tiername'], case_sensitive=False)
if tier_echo is None:
logging.error('An echo tier with time-aligned items was expected but not found.')
raise NoTierInputError
return (tier_src, tier_echo)
run
Run the automatic annotation process on an input.
Input file is a tuple with 2 files: the main speaker and the echoing speaker.
Parameters
- input_files: (list of list of str) Time-aligned items, Time-aligned items
- output: (str) the output name
Returns
- (sppasTranscription)
View Source
def run(self, input_files, output=None):
"""Run the automatic annotation process on an input.
Input file is a tuple with 2 files:
the main speaker and the echoing speaker.
:param input_files: (list of list of str) Time-aligned items, Time-aligned items
:param output: (str) the output name
:returns: (sppasTranscription)
"""
tier_spk1, tier_spk2 = self.get_inputs(input_files)
new_tiers = self.detection(tier_spk1, tier_spk2)
trs_output = sppasTranscription(self.name)
trs_output.set_meta('annotation_result_of', input_files[0][0])
for tier in new_tiers:
trs_output.append(tier)
if output is not None:
if len(trs_output) > 0:
output_file = self.fix_out_file_ext(output)
parser = sppasTrsRW(output_file)
parser.write(trs_output)
return [output_file]
else:
raise EmptyOutputError
return trs_output
get_output_pattern
Pattern this annotation uses in an output filename.
View Source
def get_output_pattern(self):
"""Pattern this annotation uses in an output filename."""
return self._options.get('outputpattern', '-reocc')