Source code for wkps.wio.wannotationpro
# -*- coding: utf-8 -*-
"""
:filename: sppas.src.wkps.wio.wannotationpro.py
:author: Laurent Vouriot, Brigitte Bigi
:contact: develop@sppas.org
:summary: A reader/writer of antw workspace file format.
.. _This file is part of SPPAS: http://www.sppas.org/
..
-------------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
-------------------------------------------------------------------------
An AnnotationPro workspace (antw) is an xml file
Here is an example of a very simple antw file
<?xml version="1.0" standalone="yes"?>
<WorkspaceDataSet xmlns="http://tempuri.org/WorkspaceDataSet.xsd">
<WorkspaceItem>
<Id>b1b36c56-652c-4390-81ce-8eabd4b0260f</Id>
<IdGroup>00000000-0000-0000-0000-000000000000</IdGroup>
<Name>annprows.ant</Name>
<OpenCount>0</OpenCount>
<EditCount>4</EditCount>
<ListenCount>5</ListenCount>
<Accepted>false</Accepted>
</WorkspaceItem>
</WorkspaceDataSet>
<WorkspaceItem /> correspond to a file
<Name /> is the name of the said file
each filename of the workspace got an id <Id />
a group id <idGroup />
and some information about the edition of the file
how many times it has been opened <OpenCount />,
how many editions (adding segment, layer, meta...
each char typed count as an edition) <EditCount />
how many times it has been listened (played) <ListenCount />
<Accepted/> ?
"""
import os
import sppas
import xml.etree.cElementTree as ET
from sppas.src.config import sg
from sppas.src.wkps.wio.basewkpio import sppasBaseWkpIO
from sppas.src.wkps.wkpexc import FileOSError
# ----------------------------------------------------------------------------
[docs]class sppasWANT(sppasBaseWkpIO):
"""Reader and writer to import/export a workspace from/to annotationpro.
"""
[docs] def __init__(self, name=None):
"""Initialize aa sppasWANT instance.
:param name: (str) The name of the workspace
"""
if name is None:
name = self.__class__.__name__
super(sppasWANT, self).__init__(name)
self.default_extension = "antw"
self.software = "Annotation Pro"
# -------------------------------------------------------------------------
[docs] @staticmethod
def detect(filename):
"""Check whether a file is of antw format or not.
:param filename: (str) Name of the file to detect
:returns: (bool)
"""
try:
with open(filename, 'r') as f:
f.readline()
doctype_line = f.readline().strip()
f.close()
except IOError:
return False
except UnicodeDecodeError:
return False
return "WorkspaceDataSet" in doctype_line
# -----------------------------------------------------------------------
[docs] @staticmethod
def indent(elem, level=0):
"""Pretty indent of an ElementTree.
http://effbot.org/zone/element-lib.htm#prettyprint
"""
i = "\n" + level * "\t"
if len(elem) > 0:
if not elem.text or not elem.text.strip():
elem.text = i + "\t"
if not elem.tail or not elem.tail.strip():
if level < 2:
elem.tail = "\n" + i
else:
elem.tail = i
for elem in elem:
sppasWANT.indent(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
# -------------------------------------------------------------------------
[docs] def read(self, filename):
"""Read a antw file and fill the sppasWANT instance.
:param filename: (str)
"""
if os.path.isfile(filename) is False:
raise FileOSError(filename)
try:
tree = ET.parse(filename)
root = tree.getroot()
# uri looks like this "http://tempuri.org/WorkspaceDataSet.xsd"
uri = root.tag[:root.tag.index('}')+1]
# parsing each file contained in the antw
for workspaceItem in tree.iter(tag=uri + "WorkspaceItem"):
self._parse(workspaceItem, uri)
# TODO RAISE PROPER EXCEPTIONS
except Exception:
raise
# -------------------------------------------------------------------------
[docs] def write(self, filename):
"""Write in the filename.
:param filename: (str)
:returns: xml file
"""
root = ET.Element("WorkspaceDataSet")
root.set("xmlns", "http://tempuri.org/WorkspaceDataSet.xsd")
uri = "{http://tempuri.org/WorkspaceDataSet.xsd}"
# serializing the elements saved in subjoined in the FileName instance
for fp in self.get_paths():
for fr in fp:
for fn in fr:
workspace_item = ET.SubElement(root, "WorkspaceItem")
self._serialize(fn, workspace_item, uri)
sppasWANT.indent(root)
tree = ET.ElementTree(root)
tree.write(filename,
encoding=sg.__encoding__,
xml_declaration=True,
method="xml")
# -------------------------------------------------------------------------
@staticmethod
def _serialize(fn, workspace_item, uri=""):
"""Convert a FileName into a serializable structure.
:param fn: (FileName) FileName we want to serialize
:workspace_item: (ElementTree.Element) Element in which we are going
to serialize the data
:param uri: (str)
:returns: (Element) a tree element that can be serialized
"""
sub = fn.subjoined
# we create a sub element in workspace_item and we add the data
# kept in subjoined
# Id
child_id = ET.SubElement(workspace_item, "Id")
# IdGroup
child_id_group = ET.SubElement(workspace_item, "IdGroup")
# Name
child_name = ET.SubElement(workspace_item, "Name")
# OpenCount
child_open_count = ET.SubElement(workspace_item, "OpenCount")
# EditCount
child_edit_count = ET.SubElement(workspace_item, "EditCount")
# ListenCount
child_listen_count = ET.SubElement(workspace_item, "ListenCount")
# Accepted
child_accepted = ET.SubElement(workspace_item, "Accepted")
# if sub is not none it means that the file has been edited in
# AnnotationPro already
if sub is not None:
child_id.text = sub[uri + "Id"]
child_id_group.text = sub[uri + "IdGroup"]
child_name.text = sub[uri + "Name"]
child_open_count.text = sub[uri + "OpenCount"]
child_edit_count.text = sub[uri + "EditCount"]
child_listen_count.text = sub[uri + "ListenCount"]
child_accepted.text = sub[uri + "Accepted"]
else:
# if we added the file from sppas subjoined will be empty
# so we add manually the information
child_id.text = "0"
child_id_group.text = "0"
child_name.text = os.path.basename(fn.get_id())
child_open_count.text = "0"
child_edit_count.text = "0"
child_listen_count.text = "0"
child_accepted.text = "false"
return workspace_item
# -------------------------------------------------------------------------
def _parse(self, tree, uri=""):
"""Fill the data of a sppasWANT reader with a tree.
:param tree: (ElementTree) tree to parse
:param uri: (str)
:returns: (FileName)
"""
# as the antw file contains only the filename + ext and not the path
# all the files that are going to be parsed must be contained in the
# workspace folder otherwise we can't locate it on the computer
# the name contained in the .want file is the the filename + ext
name = tree.find(uri + "Name")
self.add_file(os.path.abspath(os.path.join(sppas.paths.wkps, name.text)))
# getting the filename object that we added
fn = self.get_object(os.path.abspath(os.path.join(sppas.paths.wkps, name.text)))
# parsing the tree
# ----------------
identifier = tree.find(uri + "Id")
id_group = tree.find(uri + "IdGroup")
open_count = tree.find(uri + "OpenCount")
edit_count = tree.find(uri + "EditCount")
listen_count = tree.find(uri + "ListenCount")
accepted = tree.find(uri + "Accepted")
sub = dict()
# adding the information contained in the tree in a dictionary
# using their tag as the key
# a tag looks like this : {http://tempuri.org/WorkspaceDataSet.xsd}Id
sub[identifier.tag] = identifier.text
sub[name.tag] = name.text
sub[id_group.tag] = id_group.text
sub[open_count.tag] = open_count.text
sub[edit_count.tag] = edit_count.text
sub[listen_count.tag] = listen_count.text
sub[accepted.tag] = accepted.text
# we add the information of the filename in the subjoined member
fn.subjoined = sub
return fn