SPPAS 4.22

https://sppas.org/

Module sppas.src.annotations

Class sppasBaseAnnotation

Description

Base class for any automatic annotation integrated into SPPAS.

Constructor

Base class for any SPPAS automatic annotation.

Load default options/member values from a configuration file. This file must be in paths.etc

Log is used for a better communication of the annotation process and its results. If None, logs are redirected to the default logging system.

Parameters
  • config: (str) Name of the JSON configuration file, without path.
  • log: (sppasLog) Human-readable logs.
View Source
def __init__(self, config, log=None):
    """Base class for any SPPAS automatic annotation.

    Load default options/member values from a configuration file.
    This file must be in paths.etc

    Log is used for a better communication of the annotation process and its
    results. If None, logs are redirected to the default logging system.

    :param config: (str) Name of the JSON configuration file, without path.
    :param log: (sppasLog) Human-readable logs.

    """
    if log is None:
        self.logfile = sppasAnnReport()
    else:
        self.logfile = log
    self.__types = list()
    self._options = dict()
    self.name = self.__class__.__name__
    self._out_extensions = dict()
    self.set_default_out_extensions()
    self.__load(config)

Public functions

set_default_out_extensions

Return the default output extension of each format.

The default extension of each format is defined in the config.

View Source
def set_default_out_extensions(self):
    """Return the default output extension of each format.

        The default extension of each format is defined in the config.

        """
    self._out_extensions = dict()
    for filetype in sppasFiles.OUT_FORMATS:
        self._out_extensions[filetype] = sppasFiles.DEFAULT_EXTENSIONS[filetype]

set_out_extension

Set the extension for a specific out format.

Parameters
  • extension: (str) File extension for created files
  • out_format: (str) One of ANNOT, IMAGE, VIDEO
View Source
def set_out_extension(self, extension, out_format='ANNOT'):
    """Set the extension for a specific out format.

        :param extension: (str) File extension for created files
        :param out_format: (str) One of ANNOT, IMAGE, VIDEO

        """
    all_ext = sppasFiles.get_outformat_extensions(out_format)
    if extension.startswith('.') is False:
        extension = '.' + extension
    if extension not in all_ext and len(all_ext) > 0:
        logging.error('Extension {} is not in the {} list.'.format(extension, out_format))
        raise sppasExtensionWriteError(extension)
    self._out_extensions[out_format] = extension

fix_out_file_ext

Return the output with an appropriate file extension.

If the output has already an extension, it is not changed.

Parameters
  • output: (str) Base name or filename
  • out_format: (str) One of ANNOT, IMAGE, VIDEO
Returns
  • (str) filename
View Source
def fix_out_file_ext(self, output, out_format='ANNOT'):
    """Return the output with an appropriate file extension.

        If the output has already an extension, it is not changed.

        :param output: (str) Base name or filename
        :param out_format: (str) One of ANNOT, IMAGE, VIDEO
        :return: (str) filename

        """
    _, fe = os.path.splitext(output)
    if len(fe) == 0:
        output = output + self._out_extensions[out_format]
    if os.path.exists(output) and self.logfile is not None:
        self.logfile.print_message(info(1300, 'annotations').format(output), indent=2, status=annots.warning)
    return output

get_output_pattern

Pattern that the annotation uses for its output filename.

View Source
def get_output_pattern(self):
    """Pattern that the annotation uses for its output filename."""
    return self._options.get('outputpattern', '')

get_input_patterns

List of patterns that the annotation expects for the input filenames.

Returns
  • (list of str)
View Source
def get_input_patterns(self):
    """List of patterns that the annotation expects for the input filenames.

        :return: (list of str)

        """
    p = list()
    for opt in sorted(self._options):
        if opt.startswith('inputpattern') is True:
            p.append(self._options[opt])
    if len(p) == 0:
        return ['']
    return p

get_input_extensions

Extensions that the annotation expects for its input filename.

By default, the extensions are the annotated files. Can be overridden to change the list of supported extensions: they must contain the dot.

Returns
  • (list of list)
View Source
@staticmethod
def get_input_extensions():
    """Extensions that the annotation expects for its input filename.

        By default, the extensions are the annotated files. Can be overridden
        to change the list of supported extensions: they must contain the dot.

        :return: (list of list)

        """
    return [sppasFiles.get_informat_extensions('ANNOT_ANNOT')]

get_opt_input_extensions

Extensions that the annotation expects for its optional input filename.

View Source
@staticmethod
def get_opt_input_extensions():
    """Extensions that the annotation expects for its optional input filename."""
    return ()

get_out_name

Return the output filename from the input one.

Output filename is created from the given filename, the annotation output pattern and the given output format (if any).

Parameters
  • filename: (str) Name of the input file
  • output_format: (str) Extension of the output file with the dot
Returns
  • (str)
View Source
def get_out_name(self, filename, output_format=''):
    """Return the output filename from the input one.

        Output filename is created from the given filename, the annotation
        output pattern and the given output format (if any).

        :param filename: (str) Name of the input file
        :param output_format: (str) Extension of the output file with the dot
        :returns: (str)

        """
    fn, _ = os.path.splitext(filename)
    for r in self.get_input_patterns():
        if len(r) > 0 and fn.endswith(r):
            fn = fn[:-len(r)]
    return fn + self.get_output_pattern() + output_format

get_out_name_old

Return the output filename from the input one.

Output filename is created from the given filename, the annotation output pattern and the given output format (if any).

Parameters
  • filename: (str) Name of the input file
  • output_format: (str) Extension of the output file with the dot
Returns
  • (str)
View Source
def get_out_name_old(self, filename, output_format=''):
    """Return the output filename from the input one.

        Output filename is created from the given filename, the annotation
        output pattern and the given output format (if any).

        :param filename: (str) Name of the input file
        :param output_format: (str) Extension of the output file with the dot
        :returns: (str)

        """
    fn, _ = os.path.splitext(filename)
    patterns = self.get_input_patterns()
    r = patterns[0]
    if len(r) > 0 and fn.endswith(r):
        fn = fn[:-len(r)]
    return fn + self.get_output_pattern() + output_format

get_option

Return the option value of a given key or raise KeyError.

Parameters
  • key: (str) Return the value of an option, or None.
Raises

KeyError

View Source
def get_option(self, key):
    """Return the option value of a given key or raise KeyError.

        :param key: (str) Return the value of an option, or None.
        :raises: KeyError

        """
    if key in self._options:
        return self._options[key]
    raise KeyError('{:s} is not a valid option for the automatic annotation.'.format(key))

fix_options

Fix all options of the annotation from a list of sppasOption().

Parameters
  • options: (list of sppasOption)
View Source
def fix_options(self, options):
    """Fix all options of the annotation from a list of sppasOption().

        :param options: (list of sppasOption)

        """
    for opt in options:
        key = opt.get_key()
        if 'pattern' in key:
            self._options[key] = opt.get_value()
        else:
            raise AnnotationOptionError(key)

get_types

Return the list of types this annotation can perform.

If this annotation is expecting another file, the type allow to find it by using the references of the workspace (if any).

View Source
def get_types(self):
    """Return the list of types this annotation can perform.

        If this annotation is expecting another file, the type allow to
        find it by using the references of the workspace (if any).

        """
    return self.__types

load_resources

Load the linguistic resources.

View Source
def load_resources(self, *args, **kwargs):
    """Load the linguistic resources."""
    pass

run

Run the automatic annotation process on a given input.

The input is a list of files the annotation needs: audio, video, transcription, pitch, etc.

Either returns the list of created files if the given output is not none, or the created object (often a sppasTranscription) if no output was given.

Parameters
  • input_files: (list of str) The required and optional input(s)
  • output: (str) The output name with or without extension
Returns
  • (sppasTranscription OR list of created file names)
View Source
def run(self, input_files, output=None):
    """Run the automatic annotation process on a given input.

        The input is a list of files the annotation needs: audio, video,
        transcription, pitch, etc.

        Either returns the list of created files if the given output is not
        none, or the created object (often a sppasTranscription) if no
        output was given.

        :param input_files: (list of str) The required and optional input(s)
        :param output: (str) The output name with or without extension
        :returns: (sppasTranscription OR list of created file names)

        """
    raise NotImplementedError

run_for_batch_processing

Perform the annotation on a file.

This method is called by 'batch_processing'. It fixes the name of the output file, and call the run method.

Parameters
  • input_files: (list of str) the required input(s) for a run
Returns
  • created output file name or None
View Source
def run_for_batch_processing(self, input_files):
    """Perform the annotation on a file.

        This method is called by 'batch_processing'. It fixes the name of the
        output file, and call the run method.

        :param input_files: (list of str) the required input(s) for a run
        :returns: created output file name or None

        """
    if len(input_files) == 0:
        return list()
    opt = self._options.copy()
    if isinstance(input_files[0], (list, tuple)) is True:
        out_name = self.get_out_name(input_files[0][0])
    else:
        out_name = self.get_out_name(input_files[0])
    try:
        new_files = self.run(input_files, out_name)
    except Exception as e:
        new_files = list()
        self.logfile.print_message('{:s}\n'.format(str(e)), indent=2, status=annots.error)
    self._options = opt
    return new_files

batch_processing

Perform the annotation on a bunch of files.

Can be used by an annotation manager to launch all the annotations on all checked files of a workspace in a single process.

The given list of inputs can then be either: - a list of file names: [file1, file2, ...], or - a list of lists of file names: [(file1a, file1b), (file2_a,)], or - a list of mixed files/list of files: [file1, (file2a, file2b), ...].

Parameters
  • file_names: (list) List of inputs
  • progress: ProcessProgressTerminal() or ProcessProgressDialog()
Returns
  • (list of str) List of created files
View Source
def batch_processing(self, file_names, progress=None):
    """Perform the annotation on a bunch of files.

        Can be used by an annotation manager to launch all the annotations on
        all checked files of a workspace in a single process.

        The given list of inputs can then be either:
            - a list of file names: [file1, file2, ...], or
            - a list of lists of file names: [(file1_a, file1_b), (file2_a,)], or
            - a list of mixed files/list of files: [file1, (file2a, file2b), ...].

        :param file_names: (list) List of inputs
        :param progress: ProcessProgressTerminal() or ProcessProgressDialog()
        :return: (list of str) List of created files

        """
    if len(self._options) > 0:
        self.print_options()
    total = len(file_names)
    if total == 0:
        return list()
    files_processed_success = list()
    if progress:
        progress.update(0, '')
    for i, input_files in enumerate(file_names):
        try:
            inputs = self._fix_inputs(input_files)
        except Exception as e:
            logging.critical(e)
        else:
            self.print_diagnosis(*inputs)
            if progress:
                progress.set_fraction(round(float(i) / float(total), 2))
                progress.set_text('{!s:s}'.format(*inputs))
            out_names = self.run_for_batch_processing(inputs)
            if out_names is None or (isinstance(out_names, (list, tuple)) and len(out_names) == 0):
                self.logfile.print_message(info(1306, 'annotations'), indent=1, status=annots.info)
            else:
                files_processed_success.extend([f for f in out_names if f is not None])
                self.logfile.print_message(out_names[0], indent=1, status=annots.ok)
        self.logfile.print_newline()
    if progress:
        progress.update(1, info(9000, 'ui').format(len(files_processed_success), total))
    return files_processed_success

transfer_metadata

Transfer the metadata from a sppasTranscription to another one.

The identifier is not copied and any already existing metadata is not copied.

Parameters
  • from_trs
  • to_trs
View Source
@staticmethod
def transfer_metadata(from_trs, to_trs):
    """Transfer the metadata from a sppasTranscription to another one.

        The identifier is not copied and any already existing metadata is
        not copied.

        """
    for key in from_trs.get_meta_keys():
        if to_trs.get_meta(key, default=None) is None:
            to_trs.set_meta(key, from_trs.get_meta(key))

print_filename

Print the annotation name applied on a filename in the user log.

Parameters
  • filename: (str) Name of the file to annotate.
View Source
def print_filename(self, filename):
    """Print the annotation name applied on a filename in the user log.

        :param filename: (str) Name of the file to annotate.

        """
    if self.logfile:
        fn = os.path.basename(filename)
        self.logfile.print_message(info(1056, 'annotations').format(fn), indent=0, status=None)
    else:
        logging.info(info(1056, 'annotations').format(filename))

print_options

Print the list of options in the user log.

View Source
def print_options(self):
    """Print the list of options in the user log.

        """
    self.logfile.print_message(info(1050, 'annotations') + ': ', indent=0, status=None)
    for k, v in self._options.items():
        msg = ' ... {!s:s}: {!s:s}'.format(k, v)
        self.logfile.print_message(msg, indent=0, status=None)
    self.logfile.print_newline()

print_diagnosis

Print the diagnosis of a list of files in the user report.

Parameters
  • filenames: (list) List of files.
View Source
def print_diagnosis(self, *filenames):
    """Print the diagnosis of a list of files in the user report.

        :param filenames: (list) List of files.

        """
    for filename in filenames:
        if filename is not None:
            if isinstance(filename, (list, tuple)) is True:
                self.print_diagnosis(*filename)
            elif os.path.exists(filename) is True:
                fn = os.path.basename(filename)
                s, m = sppasDiagnosis.check_file(filename)
                msg = info(1056, 'annotations').format(fn) + ': {!s:s}'.format(m)
                self.logfile.print_message(msg, indent=0, status=None)

Private functions

_fix_inputs

Return a list of input files.

The given input files can be:

  • a single input: file1
  • several inputs: (file1a, file1b)
Parameters
  • input_files: (str, list of str)
Returns
  • a list of files
View Source
def _fix_inputs(self, input_files):
    """Return a list of input files.

        The given input files can be:

            - a single input: file1
            - several inputs: (file1_a, file1_b)

        :param input_files: (str, list of str)
        :returns: a list of files

        """
    if len(input_files) == 0:
        raise Exception(' ******* A non-empty list of input files was expected.')
    if isinstance(input_files, (list, tuple)) is False:
        return [input_files]
    return input_files

_get_filename

Return a filename corresponding to one of the extensions.

Parameters
  • filename: input file name
  • extensions: the list of expected extension
Returns
  • a file name of the first existing file with an expected extension or None
View Source
@staticmethod
def _get_filename(filename, extensions):
    """Return a filename corresponding to one of the extensions.

        :param filename: input file name
        :param extensions: the list of expected extension
        :returns: a file name of the first existing file with an expected extension or None

        """
    base_name = os.path.splitext(filename)[0]
    for ext in extensions:
        ext_filename = base_name + ext
        new_filename = sppasFileUtils(ext_filename).exists()
        if new_filename is not None and os.path.isfile(new_filename):
            return new_filename
    return None

Protected functions

__load

Fix members from a configuration file.

Parameters
  • filename: (str) Name of the configuration file (json) The filename must NOT contain the path. This file must be in paths.etc
View Source
def __load(self, filename):
    """Fix members from a configuration file.

        :param filename: (str) Name of the configuration file (json)
        The filename must NOT contain the path. This file must be in
        paths.etc

        """
    config = os.path.join(paths.etc, filename)
    if os.path.exists(config) is False:
        raise IOError('Installation error: the file {:s} to configure the automatic annotations does not exist.'.format(config))
    with open(config) as cfg:
        dict_cfg = json.load(cfg)
    for new_option in dict_cfg['options']:
        opt = sppasOption(new_option['id'])
        opt.set_type(new_option['type'])
        opt.set_value(str(new_option['value']))
        self._options[opt.get_key()] = opt.get_value()
    self.name = dict_cfg.get('name', self.__class__.__name__)
    self.__types = dict_cfg.get('anntype', [annots.types[0]])