Source code for anndata.aio.anvil

# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.anndata.aio.anvil.py
:author:   Brigitte Bigi
:contact:  develop@sppas.org
:summary:  Input/Output of SPPAS native file formats (.xra, .jra).

.. _This file is part of SPPAS: http://www.sppas.org/
..
    -------------------------------------------------------------------------

     ___   __    __    __    ___
    /     |  \  |  \  |  \  /              the automatic
    \__   |__/  |__/  |___| \__             annotation and
       \  |     |     |   |    \             analysis
    ___/  |     |     |   | ___/              of speech

    Copyright (C) 2011-2021  Brigitte Bigi
    Laboratoire Parole et Langage, Aix-en-Provence, France

    Use of this software is governed by the GNU Public License, version 3.

    SPPAS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    SPPAS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with SPPAS. If not, see <http://www.gnu.org/licenses/>.

    This banner notice must not be removed.

    -------------------------------------------------------------------------

ANVIL is a free video annotation tool.

| Kipp, M. (2012)
| Multimedia Annotation, Querying and Analysis in ANVIL.
| In: M. Maybury (ed.) Multimedia Information Extraction,
| Chapter 21, John Wiley & Sons, pp: 351-368.

BE AWARE that the support of anvil files by SPPAS has to be verified,
tested and extended!!!
The last release of ANVIL was in 2017.

"""
import xml.etree.cElementTree as ET

from ..anndataexc import AnnDataTypeError
from ..ann.annlocation import sppasLocation
from ..ann.annlocation import sppasPoint
from ..ann.annlocation import sppasInterval

from .basetrsio import sppasBaseIO
from .aioutils import format_labels

# ---------------------------------------------------------------------------


[docs]class sppasAnvil(sppasBaseIO): """ANVIL (partially) reader. :author: Brigitte Bigi, Jibril Saffi :organization: Laboratoire Parole et Langage, Aix-en-Provence, France :contact: contact@sppas.org :license: GPL, v3 :copyright: Copyright (C) 2011-2018 Brigitte Bigi """
[docs] @staticmethod def detect(filename): """Check whether a file is of ANVIL format or not. :param filename: (str) Name of the file to check. :returns: (bool) """ try: tree = ET.parse(filename) root = tree.getroot() except IOError: return False except UnicodeDecodeError: return False return root.find('body') is not None
# -----------------------------------------------------------------------
[docs] @staticmethod def make_point(midpoint): """The localization is a time value, so always a float.""" try: midpoint = float(midpoint) except ValueError: raise AnnDataTypeError(midpoint, "float") return sppasPoint(midpoint, radius=0.005)
# -----------------------------------------------------------------------
[docs] def __init__(self, name=None): """Initialize a new ANVIL instance. :param name: (str) This transcription name. """ if name is None: name = self.__class__.__name__ super(sppasAnvil, self).__init__(name) self._accept_multi_tiers = True self._accept_no_tiers = True self._accept_metadata = True self._accept_ctrl_vocab = False # to be verified self._accept_media = True # to be verified self._accept_hierarchy = True self._accept_point = False self._accept_interval = True self._accept_disjoint = False self._accept_alt_localization = False self._accept_alt_tag = False self._accept_radius = False self._accept_gaps = True # to be verified self._accept_overlaps = False # to be verified self.default_extension = "anvil" self.software = "Anvil"
# -----------------------------------------------------------------------
[docs] def read(self, filename): """Read an ANVIL file and fill the Transcription. :param filename: (str) """ tree = ET.parse(filename) root = tree.getroot() # FIXME we ought to get the ctrl vocabs in the spec file # there also ought to be a representation of the hiererchy, # but since we have multiple, non aligned tiers, # it's not trivial to implement body_root = root.find('body') self._read_tracks(body_root)
# ----------------------------------------------------------------------- def _read_tracks(self, body_root): for track_root in body_root.findall('track'): if(track_root.attrib['type'] == "primary" or track_root.attrib['type'] == "primarypoint"): self._read_primary_track(track_root) elif track_root.attrib['type'] == "singleton": self._read_singleton_track(track_root, body_root) elif track_root.attrib['type'] == "span": self._read_span_track(track_root, body_root) elif track_root.attrib['type'] == "subdivision": self._read_subdivision_track(track_root, body_root) else: raise Exception('unknown track type') # ----------------------------------------------------------------------- def _read_primary_track(self, track_root): """Read a primary track (primary or primarypoint). :param track_root: """ # Create tiers of the primary track. self.__create_tier_from_attribute(track_root) # Parse elements and create annotations for el_root in track_root.findall('el'): if track_root.attrib['type'] == 'primary': begin = float(el_root.attrib['start']) end = float(el_root.attrib['end']) if begin > end: begin, end = end, begin elif begin == end: continue localization = sppasInterval(sppasAnvil.make_point(begin), sppasAnvil.make_point(end)) elif track_root.attrib['type'] == 'primarypoint': time = float(el_root.attrib['time']) localization = sppasAnvil.make_point(time) else: raise Exception('unknown primary track type') self.__create_annotation_from_el(track_root, el_root, localization) # ----------------------------------------------------------------------- def _read_singleton_track(self, track_root, body_root): # find ref ref_root = body_root.find( "track[@name='%s']" % track_root.attrib['ref']) self.__create_tier_from_attribute(track_root) for el_root in track_root.findall('el'): ref_el = ref_root.find( "el[@index='%s']" % el_root.attrib['ref']) begin = float(ref_el.attrib['start']) end = float(ref_el.attrib['end']) if begin > end: begin, end = end, begin elif begin == end: continue localization = sppasInterval(sppasAnvil.make_point(begin), sppasAnvil.make_point(end)) self.__create_annotation_from_el(track_root, el_root, localization) # ----------------------------------------------------------------------- def _read_span_track(self, track_root, body_root): # find ref ref_root = body_root.find( "track[@name='%s']" % track_root.attrib['ref']) self.__create_tier_from_attribute(track_root) for el_root in track_root.findall('el'): begin_ref = el_root.attrib['start'] end_ref = el_root.attrib['end'] begin_el = ref_root.find( "el[@index='%s']" % begin_ref) end_el = ref_root.find( "el[@index='%s']" % end_ref) begin = float(begin_el.attrib['start']) end = float(end_el.attrib['end']) if begin > end: begin, end = end, begin elif begin == end: continue localization = sppasInterval(sppasAnvil.make_point(begin), sppasAnvil.make_point(end)) self.__create_annotation_from_el(track_root, el_root, localization) # ----------------------------------------------------------------------- def _read_subdivision_track(self, track_root, body_root): # find ref ref_root = body_root.find( "track[@name='%s']" % track_root.attrib['ref']) self.__create_tier_from_attribute(track_root) for el_group_root in track_root.findall('el-group'): ref_el = ref_root.find( "el[@index='%s']" % el_group_root.attrib['ref']) time_slots = list() time_slots.append(float(ref_el.attrib['start'])) for el_root in el_group_root.findall('el'): if 'start' in el_root.attrib: time_slots.append(float(el_root.attrib['start'])) time_slots.append(float(ref_el.attrib['end'])) b = 0 e = 1 for el_root in el_group_root.findall('el'): begin = time_slots[b] b += 1 end = time_slots[e] e += 1 localization = sppasInterval(sppasAnvil.make_point(begin), sppasAnvil.make_point(end)) self.__create_annotation_from_el(track_root, el_root, localization) # ----------------------------------------------------------------------- def __create_tier_from_attribute(self, track_root): """Create a set of tiers from 'attribute' of 'track'. :param track_root: """ for attribute_node in track_root.iter('attribute'): tier_name = sppasAnvil.__fix_tier_name(track_root, attribute_node) if self.find(tier_name) is None: self.create_tier(tier_name) # ----------------------------------------------------------------------- def __create_annotation_from_el(self, track_root, el_root, localization): """Create a set of annotations from 'attribute' of 'el'. :param track_root: :param el_root: :param localization: """ for attribute_node in el_root.findall('attribute'): labels = format_labels(attribute_node.text) tier = self.find(sppasAnvil.__fix_tier_name(track_root, attribute_node)) tier.create_annotation(sppasLocation(localization), labels) # ----------------------------------------------------------------------- @staticmethod def __fix_tier_name(track_root, attribute_node): return track_root.attrib['name'] + \ '.' + \ attribute_node.attrib['name']