Base class for any automatic annotation integrated into SPPAS.
Module sppas.src.annotations
Class sppasBaseAnnotation
Description
Constructor
Base class for any SPPAS automatic annotation.
Load default options/member values from a configuration file. This file must be in paths.etc
Log is used for a better communication of the annotation process and its results. If None, logs are redirected to the default logging system.
Parameters
- config: (str) Name of the JSON configuration file, without path.
- log: (sppasLog) Human-readable logs.
View Source
def __init__(self, config, log=None):
"""Base class for any SPPAS automatic annotation.
Load default options/member values from a configuration file.
This file must be in paths.etc
Log is used for a better communication of the annotation process and its
results. If None, logs are redirected to the default logging system.
:param config: (str) Name of the JSON configuration file, without path.
:param log: (sppasLog) Human-readable logs.
"""
if log is None:
self.logfile = sppasAnnReport()
else:
self.logfile = log
self.__types = list()
self._options = dict()
self.name = self.__class__.__name__
self._out_extensions = dict()
self.set_default_out_extensions()
self.__load(config)
Public functions
set_default_out_extensions
Return the default output extension of each format.
The default extension of each format is defined in the config.
View Source
def set_default_out_extensions(self):
"""Return the default output extension of each format.
The default extension of each format is defined in the config.
"""
self._out_extensions = dict()
for filetype in sppasFiles.OUT_FORMATS:
self._out_extensions[filetype] = sppasFiles.DEFAULT_EXTENSIONS[filetype]
set_out_extension
Set the extension for a specific out format.
Parameters
- extension: (str) File extension for created files
- out_format: (str) One of ANNOT, IMAGE, VIDEO
View Source
def set_out_extension(self, extension, out_format='ANNOT'):
"""Set the extension for a specific out format.
:param extension: (str) File extension for created files
:param out_format: (str) One of ANNOT, IMAGE, VIDEO
"""
all_ext = sppasFiles.get_outformat_extensions(out_format)
if extension.startswith('.') is False:
extension = '.' + extension
if extension not in all_ext and len(all_ext) > 0:
logging.error('Extension {} is not in the {} list.'.format(extension, out_format))
raise sppasExtensionWriteError(extension)
self._out_extensions[out_format] = extension
fix_out_file_ext
Return the output with an appropriate file extension.
If the output has already an extension, it is not changed.
Parameters
- output: (str) Base name or filename
- out_format: (str) One of ANNOT, IMAGE, VIDEO
Returns
- (str) filename
View Source
def fix_out_file_ext(self, output, out_format='ANNOT'):
"""Return the output with an appropriate file extension.
If the output has already an extension, it is not changed.
:param output: (str) Base name or filename
:param out_format: (str) One of ANNOT, IMAGE, VIDEO
:return: (str) filename
"""
_, fe = os.path.splitext(output)
if len(fe) == 0:
output = output + self._out_extensions[out_format]
if os.path.exists(output) and self.logfile is not None:
self.logfile.print_message(info(1300, 'annotations').format(output), indent=2, status=annots.warning)
return output
get_output_pattern
Pattern that the annotation uses for its output filename.
View Source
def get_output_pattern(self):
"""Pattern that the annotation uses for its output filename."""
return self._options.get('outputpattern', '')
get_input_patterns
List of patterns that the annotation expects for the input filenames.
Returns
- (list of str)
View Source
def get_input_patterns(self):
"""List of patterns that the annotation expects for the input filenames.
:return: (list of str)
"""
p = list()
for opt in sorted(self._options):
if opt.startswith('inputpattern') is True:
p.append(self._options[opt])
if len(p) == 0:
return ['']
return p
get_input_extensions
Extensions that the annotation expects for its input filename.
By default, the extensions are the annotated files. Can be overridden to change the list of supported extensions: they must contain the dot.
Returns
- (list of list)
View Source
@staticmethod
def get_input_extensions():
"""Extensions that the annotation expects for its input filename.
By default, the extensions are the annotated files. Can be overridden
to change the list of supported extensions: they must contain the dot.
:return: (list of list)
"""
return [sppasFiles.get_informat_extensions('ANNOT_ANNOT')]
get_opt_input_extensions
Extensions that the annotation expects for its optional input filename.
View Source
@staticmethod
def get_opt_input_extensions():
"""Extensions that the annotation expects for its optional input filename."""
return ()
get_out_name
Return the output filename from the input one.
Output filename is created from the given filename, the annotation output pattern and the given output format (if any).
Parameters
- filename: (str) Name of the input file
- output_format: (str) Extension of the output file with the dot
Returns
- (str)
View Source
def get_out_name(self, filename, output_format=''):
"""Return the output filename from the input one.
Output filename is created from the given filename, the annotation
output pattern and the given output format (if any).
:param filename: (str) Name of the input file
:param output_format: (str) Extension of the output file with the dot
:returns: (str)
"""
fn, _ = os.path.splitext(filename)
for r in self.get_input_patterns():
if len(r) > 0 and fn.endswith(r):
fn = fn[:-len(r)]
return fn + self.get_output_pattern() + output_format
get_out_name_old
Return the output filename from the input one.
Output filename is created from the given filename, the annotation output pattern and the given output format (if any).
Parameters
- filename: (str) Name of the input file
- output_format: (str) Extension of the output file with the dot
Returns
- (str)
View Source
def get_out_name_old(self, filename, output_format=''):
"""Return the output filename from the input one.
Output filename is created from the given filename, the annotation
output pattern and the given output format (if any).
:param filename: (str) Name of the input file
:param output_format: (str) Extension of the output file with the dot
:returns: (str)
"""
fn, _ = os.path.splitext(filename)
patterns = self.get_input_patterns()
r = patterns[0]
if len(r) > 0 and fn.endswith(r):
fn = fn[:-len(r)]
return fn + self.get_output_pattern() + output_format
get_option
Return the option value of a given key or raise KeyError.
Parameters
- key: (str) Return the value of an option, or None.
Raises
KeyError
View Source
def get_option(self, key):
"""Return the option value of a given key or raise KeyError.
:param key: (str) Return the value of an option, or None.
:raises: KeyError
"""
if key in self._options:
return self._options[key]
raise KeyError('{:s} is not a valid option for the automatic annotation.'.format(key))
fix_options
Fix all options of the annotation from a list of sppasOption().
Parameters
- options: (list of sppasOption)
View Source
def fix_options(self, options):
"""Fix all options of the annotation from a list of sppasOption().
:param options: (list of sppasOption)
"""
for opt in options:
key = opt.get_key()
if 'pattern' in key:
self._options[key] = opt.get_value()
else:
raise AnnotationOptionError(key)
get_types
Return the list of types this annotation can perform.
If this annotation is expecting another file, the type allow to find it by using the references of the workspace (if any).
View Source
def get_types(self):
"""Return the list of types this annotation can perform.
If this annotation is expecting another file, the type allow to
find it by using the references of the workspace (if any).
"""
return self.__types
load_resources
Load the linguistic resources.
View Source
def load_resources(self, *args, **kwargs):
"""Load the linguistic resources."""
pass
run
Run the automatic annotation process on a given input.
The input is a list of files the annotation needs: audio, video, transcription, pitch, etc.
Either returns the list of created files if the given output is not none, or the created object (often a sppasTranscription) if no output was given.
Parameters
- input_files: (list of str) The required and optional input(s)
- output: (str) The output name with or without extension
Returns
- (sppasTranscription OR list of created file names)
View Source
def run(self, input_files, output=None):
"""Run the automatic annotation process on a given input.
The input is a list of files the annotation needs: audio, video,
transcription, pitch, etc.
Either returns the list of created files if the given output is not
none, or the created object (often a sppasTranscription) if no
output was given.
:param input_files: (list of str) The required and optional input(s)
:param output: (str) The output name with or without extension
:returns: (sppasTranscription OR list of created file names)
"""
raise NotImplementedError
run_for_batch_processing
Perform the annotation on a file.
This method is called by 'batch_processing'. It fixes the name of the output file, and call the run method.
Parameters
- input_files: (list of str) the required input(s) for a run
Returns
- created output file name or None
View Source
def run_for_batch_processing(self, input_files):
"""Perform the annotation on a file.
This method is called by 'batch_processing'. It fixes the name of the
output file, and call the run method.
:param input_files: (list of str) the required input(s) for a run
:returns: created output file name or None
"""
if len(input_files) == 0:
return list()
opt = self._options.copy()
if isinstance(input_files[0], (list, tuple)) is True:
out_name = self.get_out_name(input_files[0][0])
else:
out_name = self.get_out_name(input_files[0])
try:
new_files = self.run(input_files, out_name)
except Exception as e:
new_files = list()
self.logfile.print_message('{:s}\n'.format(str(e)), indent=2, status=annots.error)
self._options = opt
return new_files
batch_processing
Perform the annotation on a bunch of files.
Can be used by an annotation manager to launch all the annotations on all checked files of a workspace in a single process.
The given list of inputs can then be either: - a list of file names: [file1, file2, ...], or - a list of lists of file names: [(file1a, file1b), (file2_a,)], or - a list of mixed files/list of files: [file1, (file2a, file2b), ...].
Parameters
- file_names: (list) List of inputs
- progress: ProcessProgressTerminal() or ProcessProgressDialog()
Returns
- (list of str) List of created files
View Source
def batch_processing(self, file_names, progress=None):
"""Perform the annotation on a bunch of files.
Can be used by an annotation manager to launch all the annotations on
all checked files of a workspace in a single process.
The given list of inputs can then be either:
- a list of file names: [file1, file2, ...], or
- a list of lists of file names: [(file1_a, file1_b), (file2_a,)], or
- a list of mixed files/list of files: [file1, (file2a, file2b), ...].
:param file_names: (list) List of inputs
:param progress: ProcessProgressTerminal() or ProcessProgressDialog()
:return: (list of str) List of created files
"""
if len(self._options) > 0:
self.print_options()
total = len(file_names)
if total == 0:
return list()
files_processed_success = list()
if progress:
progress.update(0, '')
for i, input_files in enumerate(file_names):
try:
inputs = self._fix_inputs(input_files)
except Exception as e:
logging.critical(e)
else:
self.print_diagnosis(*inputs)
if progress:
progress.set_fraction(round(float(i) / float(total), 2))
progress.set_text('{!s:s}'.format(*inputs))
out_names = self.run_for_batch_processing(inputs)
if out_names is None or (isinstance(out_names, (list, tuple)) and len(out_names) == 0):
self.logfile.print_message(info(1306, 'annotations'), indent=1, status=annots.info)
else:
files_processed_success.extend([f for f in out_names if f is not None])
self.logfile.print_message(out_names[0], indent=1, status=annots.ok)
self.logfile.print_newline()
if progress:
progress.update(1, info(9000, 'ui').format(len(files_processed_success), total))
return files_processed_success
transfer_metadata
Transfer the metadata from a sppasTranscription to another one.
The identifier is not copied and any already existing metadata is not copied.
Parameters
- from_trs
- to_trs
View Source
@staticmethod
def transfer_metadata(from_trs, to_trs):
"""Transfer the metadata from a sppasTranscription to another one.
The identifier is not copied and any already existing metadata is
not copied.
"""
for key in from_trs.get_meta_keys():
if to_trs.get_meta(key, default=None) is None:
to_trs.set_meta(key, from_trs.get_meta(key))
print_filename
Print the annotation name applied on a filename in the user log.
Parameters
- filename: (str) Name of the file to annotate.
View Source
def print_filename(self, filename):
"""Print the annotation name applied on a filename in the user log.
:param filename: (str) Name of the file to annotate.
"""
if self.logfile:
fn = os.path.basename(filename)
self.logfile.print_message(info(1056, 'annotations').format(fn), indent=0, status=None)
else:
logging.info(info(1056, 'annotations').format(filename))
print_options
Print the list of options in the user log.
View Source
def print_options(self):
"""Print the list of options in the user log.
"""
self.logfile.print_message(info(1050, 'annotations') + ': ', indent=0, status=None)
for k, v in self._options.items():
msg = ' ... {!s:s}: {!s:s}'.format(k, v)
self.logfile.print_message(msg, indent=0, status=None)
self.logfile.print_newline()
print_diagnosis
Print the diagnosis of a list of files in the user report.
Parameters
- filenames: (list) List of files.
View Source
def print_diagnosis(self, *filenames):
"""Print the diagnosis of a list of files in the user report.
:param filenames: (list) List of files.
"""
for filename in filenames:
if filename is not None:
if isinstance(filename, (list, tuple)) is True:
self.print_diagnosis(*filename)
elif os.path.exists(filename) is True:
fn = os.path.basename(filename)
s, m = sppasDiagnosis.check_file(filename)
msg = info(1056, 'annotations').format(fn) + ': {!s:s}'.format(m)
self.logfile.print_message(msg, indent=0, status=None)
Private functions
_fix_inputs
Return a list of input files.
The given input files can be:
- a single input: file1
- several inputs: (file1a, file1b)
Parameters
- input_files: (str, list of str)
Returns
- a list of files
View Source
def _fix_inputs(self, input_files):
"""Return a list of input files.
The given input files can be:
- a single input: file1
- several inputs: (file1_a, file1_b)
:param input_files: (str, list of str)
:returns: a list of files
"""
if len(input_files) == 0:
raise Exception(' ******* A non-empty list of input files was expected.')
if isinstance(input_files, (list, tuple)) is False:
return [input_files]
return input_files
_get_filename
Return a filename corresponding to one of the extensions.
Parameters
- filename: input file name
- extensions: the list of expected extension
Returns
- a file name of the first existing file with an expected extension or None
View Source
@staticmethod
def _get_filename(filename, extensions):
"""Return a filename corresponding to one of the extensions.
:param filename: input file name
:param extensions: the list of expected extension
:returns: a file name of the first existing file with an expected extension or None
"""
base_name = os.path.splitext(filename)[0]
for ext in extensions:
ext_filename = base_name + ext
new_filename = sppasFileUtils(ext_filename).exists()
if new_filename is not None and os.path.isfile(new_filename):
return new_filename
return None
Protected functions
__load
Fix members from a configuration file.
Parameters
- filename: (str) Name of the configuration file (json) The filename must NOT contain the path. This file must be in paths.etc
View Source
def __load(self, filename):
"""Fix members from a configuration file.
:param filename: (str) Name of the configuration file (json)
The filename must NOT contain the path. This file must be in
paths.etc
"""
config = os.path.join(paths.etc, filename)
if os.path.exists(config) is False:
raise IOError('Installation error: the file {:s} to configure the automatic annotations does not exist.'.format(config))
with open(config) as cfg:
dict_cfg = json.load(cfg)
for new_option in dict_cfg['options']:
opt = sppasOption(new_option['id'])
opt.set_type(new_option['type'])
opt.set_value(str(new_option['value']))
self._options[opt.get_key()] = opt.get_value()
self.name = dict_cfg.get('name', self.__class__.__name__)
self.__types = dict_cfg.get('anntype', [annots.types[0]])