Source code for anndata.aio.audacity
# -*- coding: UTF-8 -*-
"""
:filename: sppas.src.anndata.aio.audacity.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: Input/Output of Audacity file formats (.eaf).
.. _This file is part of SPPAS: http://www.sppas.org/
..
-------------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
-------------------------------------------------------------------------
Audacity is a multi-platform, free, easy-to-use, multi-track audio editor
and recorder. Audacity is free software, developed by a group of
volunteers and distributed under the GNU General Public License (GPL).
See: http://www.audacityteam.org/
"""
import codecs
import xml.etree.cElementTree as ET
from .basetrsio import sppasBaseIO
from ..anndataexc import AnnDataTypeError
from ..ann.annotation import sppasAnnotation
from ..ann.annlocation import sppasLocation
from ..ann.annlocation import sppasPoint
from ..ann.annlocation import sppasInterval
from .aioutils import format_labels
# ---------------------------------------------------------------------------
[docs]class sppasAudacity(sppasBaseIO):
"""Audacity projects reader.
Can work on both Audacity projects and Audacity Label tracks.
"""
[docs] @staticmethod
def detect(filename):
"""Check whether a file is of AUP format or not.
AUP files are encoded in UTF-8 without BOM.
:param filename: (str) Name of the file to check.
:returns: (bool)
"""
try:
with codecs.open(filename, 'r', "UTF-8") as fp:
fp.readline()
doctype_line = fp.readline().strip()
fp.close()
except IOError:
return False
except UnicodeDecodeError:
return False
return 'audacityproject' in doctype_line
# -----------------------------------------------------------------------
[docs] @staticmethod
def make_point(midpoint):
"""The localization is a time value, so a float."""
try:
midpoint = float(midpoint)
except ValueError:
raise AnnDataTypeError(midpoint, "float")
return sppasPoint(midpoint, radius=0.0005)
# -----------------------------------------------------------------------
[docs] def __init__(self, name=None):
"""Initialize a new sppasAudacity instance.
:param name: (str) This transcription name.
"""
if name is None:
name = self.__class__.__name__
super(sppasAudacity, self).__init__(name)
self.default_extension = "aup"
self.software = "Audacity"
self._accept_multi_tiers = True
self._accept_no_tiers = True
self._accept_metadata = True
self._accept_ctrl_vocab = False
self._accept_media = True
self._accept_hierarchy = False
self._accept_point = True
self._accept_interval = True
self._accept_disjoint = False
self._accept_alt_localization = False
self._accept_alt_tag = False
self._accept_radius = False
self._accept_gaps = True
self._accept_overlaps = True
# -----------------------------------------------------------------------
[docs] def read(self, filename):
"""Read an AUP file and fill the Transcription.
<!ELEMENT project (tags, (wavetrack | labeltrack | timetrack)*)>
:param filename: (str)
"""
tree = ET.parse(filename)
root = tree.getroot()
# Get metadata for self
self._parse_metadata(root)
# Tags
self._parse_tags(root.find('tags'))
# The tiers are stored in labeltrack elements
for node in root.getiterator():
name = sppasAudacity.normalize(node.tag)
if name == "labeltrack":
self._parse_labeltrack(node)
# The audio files are stored in wavetrack elements
for node in root.getiterator():
name = sppasAudacity.normalize(node.tag)
if name == "wavetrack":
self._parse_wavetrack(node)
# timetrack elements
for node in root.getiterator():
name = sppasAudacity.normalize(node.tag)
if name == "timetrack":
self._parse_timetrack(node)
# -----------------------------------------------------------------------
[docs] @staticmethod
def normalize(name):
"""Provide namespaces in element names.
Example:
<Element '{http://audacity.sourceforge.net/xml/}simpleblockfile' at 0x03270230>
<Element '{http://audacity.sourceforge.net/xml/}envelope' at 0x032702C0>
<Element '{http://audacity.sourceforge.net/xml/}labeltrack' at 0x03270C50>
<Element '{http://audacity.sourceforge.net/xml/}label' at 0x032701E8>
See: http://effbot.org/zone/element-namespaces.htm
"""
if name[0] == "{":
uri, tag = name[1:].split("}")
return tag
else:
return name
# -----------------------------------------------------------------------
def _parse_metadata(self, root):
"""
<!ATTLIST project projname CDATA #REQUIRED>
<!ATTLIST project version CDATA #REQUIRED>
<!ATTLIST project audacityversion CDATA #REQUIRED>
<!ATTLIST project sel0 CDATA #REQUIRED>
<!ATTLIST project sel1 CDATA #REQUIRED>
<!ATTLIST project vpos CDATA #REQUIRED>
<!ATTLIST project h CDATA #REQUIRED>
<!ATTLIST project zoom CDATA #REQUIRED>
<!ATTLIST project rate CDATA #REQUIRED>
:param root: (ET) Main XML Element tree root of a TRS file.
:returns:
"""
pass
# -----------------------------------------------------------------------
def _parse_tags(self, tags_root):
"""
<!ELEMENT tags EMPTY>
<!ATTLIST tags title CDATA #REQUIRED>
<!ATTLIST tags artist CDATA #REQUIRED>
<!ATTLIST tags album CDATA #REQUIRED>
<!ATTLIST tags track CDATA #REQUIRED>
<!ATTLIST tags year CDATA #REQUIRED>
<!ATTLIST tags genre CDATA #REQUIRED>
<!ATTLIST tags comments CDATA #REQUIRED>
<!ATTLIST tags id3v2 (0|1) #REQUIRED>
but ... the DTD does not match what is observed in files.
:param root: XML Element tree root for the tags.
"""
pass
# -----------------------------------------------------------------------
def _parse_labeltrack(self, tier_root):
"""
The DTD:
<!ELEMENT labeltrack (label*)>
<!ATTLIST labeltrack name CDATA #REQUIRED>
<!ATTLIST labeltrack numlabels CDATA #REQUIRED>
but an example:
<labeltrack name="Piste de marqueurs" numlabels="3" height="73" minimized="0" isSelected="0">
<!ELEMENT label EMPTY>
<!ATTLIST label t CDATA #REQUIRED>
<!ATTLIST label t1 CDATA #REQUIRED>
<!ATTLIST label title CDATA #REQUIRED>
:param tier_root: XML Element tree root for a label track.
"""
tier = self.create_tier(tier_root.attrib['name'])
# Attributes are stored as metadata
if 'height' in tier_root.attrib:
tier.set_meta("tier_height", tier_root.attrib["height"])
if 'minimized' in tier_root.attrib:
minimized = tier_root.attrib['minimized']
if minimized == "0":
tier.set_meta("tier_is_closed", "false")
else:
tier.set_meta("tier_is_closed", "true")
if 'isSelected' in tier_root.attrib:
selected = tier_root.attrib['isSelected']
if selected == "0":
tier.set_meta("tier_is_selected", "false")
else:
tier.set_meta("tier_is_selected", "true")
# Annotations are labels.
# Attention: Audacity accepts mixed localizations.
point_anns = list()
interval_anns = list()
for node in tier_root.iter():
name = sppasAudacity.normalize(node.tag)
if name == "label":
# get annotation information
labels = format_labels(node.attrib['title'])
begin = sppasAudacity.make_point(node.attrib['t'])
end = sppasAudacity.make_point(node.attrib['t1'])
# create the annotation
if begin == end:
new_a = sppasAnnotation(sppasLocation(begin), labels)
point_anns.append(new_a)
else:
new_a = sppasAnnotation(sppasLocation(sppasInterval(begin, end)), labels)
interval_anns.append(new_a)
# Fill the tier(s) with the annotations
if len(point_anns) > 0 and len(interval_anns) > 0:
point_tier = tier.copy()
point_tier.set_name(tier_root.attrib['name']+"-points")
self.append(point_tier)
tier.set_name(tier_root.attrib['name'] + "-intervals")
sppasAudacity.__fill_tier(tier, interval_anns)
sppasAudacity.__fill_tier(point_tier, point_anns)
elif len(point_anns) > 0:
sppasAudacity.__fill_tier(tier, point_anns)
elif len(interval_anns) > 0:
sppasAudacity.__fill_tier(tier, interval_anns)
# -----------------------------------------------------------------------
def _parse_wavetrack(self, wave_root):
"""Not implemented.
<!ELEMENT wavetrack (waveclip*)>
:param wave_root: XML Element tree root for a wave track.
"""
pass
# -----------------------------------------------------------------------
def _parse_timetrack(self, time_root):
"""Not implemented.
<!ELEMENT timetrack (envelope)>
<!ATTLIST timetrack name CDATA #REQUIRED>
<!ATTLIST timetrack channel CDATA #REQUIRED>
<!ATTLIST timetrack offset CDATA #REQUIRED>
:param time_root: XML Element tree root for a time track.
"""
pass
# -----------------------------------------------------------------------
# Private
# -----------------------------------------------------------------------
@staticmethod
def __fill_tier(tier, annotations):
for ann in annotations:
tier.add(ann)