SPPAS XRA reader and writer.
xra files are the native file format of the GPL tool SPPAS.
SPPAS XRA reader and writer.
xra files are the native file format of the GPL tool SPPAS.
Initialize a new XRA instance.
def __init__(self, name=None):
"""Initialize a new XRA instance.
:param name: (str) This transcription name.
"""
if name is None:
name = self.__class__.__name__
super(sppasXRA, self).__init__(name)
self.default_extension = 'xra'
self.software = 'SPPAS4'
self._accept_multi_tiers = True
self._accept_no_tiers = True
self._accept_metadata = True
self._accept_ctrl_vocab = True
self._accept_media = True
self._accept_hierarchy = True
self._accept_point = True
self._accept_interval = True
self._accept_disjoint = True
self._accept_alt_localization = True
self._accept_alt_tag = True
self._accept_radius = True
self._accept_gaps = True
self._accept_overlaps = True
self.__format = '1.5'
Check whether a file is of XRA format or not.
@staticmethod
def detect(filename):
"""Check whether a file is of XRA format or not.
:param filename: (str) Name of the file to check.
:returns: (bool)
"""
try:
with open(filename, 'r') as fp:
for i in range(10):
line = fp.readline()
if '<Document' in line:
return True
fp.close()
except IOError:
return False
return False
Read an XRA file and fill the Transcription.
def read(self, filename):
"""Read an XRA file and fill the Transcription.
:param filename: (str)
"""
try:
tree = ET.parse(filename)
root = tree.getroot()
except ET.ParseError as e:
raise sppasReadError(filename, msg=str(e))
if 'name' in root.attrib:
self.set_name(root.attrib['name'])
if 'version' in root.attrib:
self.set_meta('file_created_format_version', root.attrib['version'])
if 'date' in root.attrib:
self.set_meta('file_created_date', root.attrib['date'])
if 'author' in root.attrib:
self.set_meta('file_created_author', root.attrib['author'])
metadata_root = root.find('Metadata')
if metadata_root is not None:
sppasXRA._parse_metadata(self, metadata_root)
for tier_root in root.findall('Tier'):
self._parse_tier(tier_root)
for media_root in root.findall('Media'):
self._parse_media(media_root)
hierarchy_root = root.find('Hierarchy')
if hierarchy_root is not None:
self._parse_hierarchy(hierarchy_root)
for vocabulary_root in root.findall('Vocabulary'):
self._parse_vocabulary(vocabulary_root)
Parse a 'Label' element and return it.
@staticmethod
def parse_label(label_root):
"""Parse a 'Label' element and return it.
:param label_root: (ET) XML Element tree root.
:returns: (sppasLabel)
"""
label = None
for tag_root in label_root.findall('Tag'):
tag, score = sppasXRA._parse_tag(tag_root)
if label is None:
label = sppasLabel(tag, score)
else:
label.append(tag, score)
if label is None:
for tag_root in label_root.findall('Text'):
tag, score = sppasXRA._parse_tag(tag_root)
if label is None:
label = sppasLabel(tag, score)
else:
label.append(tag, score)
return label
Write an XRA file.
def write(self, filename):
"""Write an XRA file.
:param filename: (str)
"""
root = ET.Element('Document')
author = sg.__name__ + ' ' + sg.__version__ + ' (C) ' + sg.__author__
root.set('author', author)
root.set('date', sppasTime().now)
root.set('format', self.__format)
root.set('name', self.get_name())
metadata_root = ET.SubElement(root, 'Metadata')
sppasXRA.format_metadata(metadata_root, self)
if len(metadata_root.findall('Entry')) == 0:
root.remove(metadata_root)
for tier in self:
tier_root = ET.SubElement(root, 'Tier')
sppasXRA.format_tier(tier_root, tier)
for media in self.get_media_list():
media_root = ET.SubElement(root, 'Media')
self._format_media(media_root, media)
hierarchy_root = ET.SubElement(root, 'Hierarchy')
self._format_hierarchy(hierarchy_root)
for vocabulary in self.get_ctrl_vocab_list():
vocabulary_root = ET.SubElement(root, 'Vocabulary')
self._format_vocabulary(vocabulary_root, vocabulary)
sppasXRA.indent(root)
tree = ET.ElementTree(root)
tree.write(filename, encoding=sg.__encoding__, method='xml', xml_declaration=True)
Add 'Metadata' element in the tree from a sppasMetaData().
@staticmethod
def format_metadata(metadata_root, meta_object, exclude=[]):
"""Add 'Metadata' element in the tree from a sppasMetaData().
:param metadata_root: (ET) XML Element tree root.
:param meta_object: (sppasMetadata)
:param exclude: (list) List of keys to exclude
"""
for key in meta_object.get_meta_keys():
if key not in exclude:
value = meta_object.get_meta(key)
entry = ET.SubElement(metadata_root, 'Entry')
entry.set('key', key)
entry.text = value
Add a 'Tier' object in the tree from a sppasTier().
@staticmethod
def format_tier(tier_root, tier):
"""Add a 'Tier' object in the tree from a sppasTier().
:param tier_root: (ET) XML Element tree root.
:param tier: (sppasTier)
"""
tier_id = tier.get_meta('id')
tier_root.set('id', tier_id)
tier_root.set('tiername', tier.get_name())
metadata_root = ET.SubElement(tier_root, 'Metadata')
sppasXRA.format_metadata(metadata_root, tier, exclude=['id'])
if len(metadata_root.findall('Entry')) == 0:
tier_root.remove(metadata_root)
for annotation in tier:
annotation_root = ET.SubElement(tier_root, 'Annotation')
sppasXRA.format_annotation(annotation_root, annotation)
Add an 'Annotation' element in the tree from a sppasAnnotation().
@staticmethod
def format_annotation(annotation_root, annotation):
"""Add an 'Annotation' element in the tree from a sppasAnnotation().
:param annotation_root: (ET) XML Element tree root.
:param annotation: (sppasAnnotation)
"""
ann_id = annotation.get_meta('id')
annotation_root.set('id', ann_id)
if annotation.get_score() is not None:
annotation_root.set('score', str(annotation.get_score()))
metadata_root = ET.SubElement(annotation_root, 'Metadata')
sppasXRA.format_metadata(metadata_root, annotation, exclude=['id'])
if len(metadata_root.findall('Entry')) == 0:
annotation_root.remove(metadata_root)
location_root = ET.SubElement(annotation_root, 'Location')
sppasXRA.format_location(location_root, annotation.get_location())
for label in annotation.get_labels():
label_root = ET.SubElement(annotation_root, 'Label')
sppasXRA.format_label(label_root, label)
Add a 'Location' element in the tree from a sppasLocation().
@staticmethod
def format_location(location_root, location):
"""Add a 'Location' element in the tree from a sppasLocation().
:param location_root: (ET) XML Element tree root.
:param location: (sppasLocation)
"""
for localization, score in location:
if localization.is_point():
point_node = ET.SubElement(location_root, 'Point')
sppasXRA._format_point(point_node, localization)
if score is not None:
point_node.set('score', u(str(score)))
elif localization.is_interval():
interval_root = ET.SubElement(location_root, 'Interval')
sppasXRA._format_interval(interval_root, localization)
if score is not None:
interval_root.set('score', u(str(score)))
elif localization.IsTimeDisjoint():
disjoint_root = ET.SubElement(location_root, 'Disjoint')
sppasXRA._format_disjoint(disjoint_root, localization)
if score is not None:
disjoint_root.set('score', u(str(score)))
Add a 'Label' element in the tree from a sppasLabel().
@staticmethod
def format_label(label_root, label):
"""Add a 'Label' element in the tree from a sppasLabel().
:param label_root: (ET) XML Element tree root.
:param label: (sppasLabel)
"""
if label.get_key() is not None:
label_root.set('key', label.get_key())
for tag, score in label:
tag_node = ET.SubElement(label_root, 'Tag')
if score is not None:
tag_node.set('score', str(score))
sppasXRA._format_tag(tag_node, tag)
Pretty indent.
http://effbot.org/zone/element-lib.htm#prettyprint
@staticmethod
def indent(elem, level=0):
"""Pretty indent.
http://effbot.org/zone/element-lib.htm#prettyprint
"""
i = '\n' + level * '\t'
if len(elem) > 0:
if not elem.text or not elem.text.strip():
elem.text = i + '\t'
if not elem.tail or not elem.tail.strip():
if level < 2:
elem.tail = '\n' + i
else:
elem.tail = i
for elem in elem:
sppasXRA.indent(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
elif level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
Read any kind of metadata.
@staticmethod
def _parse_metadata(meta_object, metadata_root):
"""Read any kind of metadata.
:param meta_object: (sppasMetadata)
:param metadata_root: (ET) XML Element tree root.
"""
if metadata_root is not None:
for entry_node in metadata_root.findall('Entry'):
try:
key = entry_node.attrib['key']
except Exception:
key = entry_node.attrib['Key']
if entry_node.text is not None:
meta_object.set_meta(key, entry_node.text)
Parse a 'Tier' element to create a sppasTier().
def _parse_tier(self, tier_root):
"""Parse a 'Tier' element to create a sppasTier().
:param tier_root: (ET) XML Element tree root.
"""
name = None
if 'tiername' in tier_root.attrib:
name = tier_root.attrib['tiername']
try:
tid = tier_root.attrib['id']
except Exception:
tid = tier_root.attrib['ID']
if name is not None:
tier = self.create_tier(name)
else:
tier = self.create_tier(tid)
sppasXRA._parse_metadata(tier, tier_root.find('Metadata'))
tier.set_meta('id', tid)
for annotation_root in tier_root.findall('Annotation'):
sppasXRA._parse_annotation(tier, annotation_root)
Parse an 'Annotation' element and create a sppasAnnotation().
@staticmethod
def _parse_annotation(tier, annotation_root):
"""Parse an 'Annotation' element and create a sppasAnnotation().
:param tier: (sppasTier) Tier to add the newly created annotation.
:param annotation_root: (ET) XML Element tree root.
"""
location_root = annotation_root.find('Location')
location = sppasXRA._parse_location(location_root)
labels = list()
for label_root in annotation_root.findall('Label'):
label = sppasXRA.parse_label(label_root)
if 'key' in label_root.attrib:
label.set_key(label_root.attrib['key'])
labels.append(label)
ann = tier.create_annotation(location, labels)
sppasXRA._parse_metadata(ann, annotation_root.find('Metadata'))
if 'id' in annotation_root.attrib:
ann.set_meta('id', annotation_root.attrib['id'])
if 'score' in annotation_root.attrib:
ann.set_score(float(annotation_root.attrib['score']))
Parse a 'Location' element an create a sppasLocation().
@staticmethod
def _parse_location(location_root):
"""Parse a 'Location' element an create a sppasLocation().
:param location_root: (ET) XML Element tree root.
:returns: (sppasLocation)
"""
location = sppasLocation()
for localization_root in list(location_root):
localization, score = sppasXRA._parse_localization(localization_root)
if localization is not None:
location.append(localization, score)
if len(location) == 0:
for localization_root in location_root.findall('Localization'):
for loc_root in list(localization_root):
localization, score = sppasXRA._parse_localization(loc_root)
score = float(localization_root.attrib['score'])
location.append(localization, score)
return location
Parse a 'Localization' element and create a sppasLocalization().
@staticmethod
def _parse_localization(localization_root):
"""Parse a 'Localization' element and create a sppasLocalization().
:param localization_root: (ET) XML Element tree root.
:returns: (sppasLocalization)
"""
localization = None
score = None
loc_str = localization_root.tag.lower()
if 'point' in loc_str:
localization, score = sppasXRA._parse_point(localization_root)
elif 'interval' in loc_str:
localization, score = sppasXRA._parse_interval(localization_root)
elif 'disjoint' in loc_str:
localization, score = sppasXRA._parse_disjoint(localization_root)
return (localization, score)
Parse a 'Point' element and create a sppasPoint().
@staticmethod
def _parse_point(point_node):
"""Parse a 'Point' element and create a sppasPoint().
:param point_node: (ET) XML Element node.
:returns: (sppasPoint)
"""
if 'score' in point_node.attrib:
score = float(point_node.attrib['score'])
else:
score = None
midpoint_str = point_node.attrib['midpoint']
try:
radius_str = point_node.attrib['radius']
except:
radius_str = None
if midpoint_str.isdigit():
midpoint = int(midpoint_str)
try:
radius = int(radius_str)
except:
radius = None
else:
try:
midpoint = float(midpoint_str)
try:
radius = float(radius_str)
except:
radius = None
except:
midpoint = midpoint_str
radius = radius_str
return (sppasPoint(midpoint, radius), score)
Parse an 'Interval' element and create a sppasInterval().
@staticmethod
def _parse_interval(interval_root):
"""Parse an 'Interval' element and create a sppasInterval().
:param interval_root: (ET) XML Element tree root.
:returns: (sppasInterval)
"""
if 'score' in interval_root.attrib:
score = float(interval_root.attrib['score'])
else:
score = None
begin_node = interval_root.find('Begin')
end_node = interval_root.find('End')
begin, s1 = sppasXRA._parse_point(begin_node)
end, s2 = sppasXRA._parse_point(end_node)
return (sppasInterval(begin, end), score)
Parse a 'Disjoint' element and create a sppasDisjoint().
@staticmethod
def _parse_disjoint(disjoint_root):
"""Parse a 'Disjoint' element and create a sppasDisjoint().
:param disjoint_root: (ET) XML Element tree root.
:returns: (sppasDisjoint)
"""
if 'score' in disjoint_root.attrib:
score = float(disjoint_root.attrib['score'])
else:
score = None
disjoint = sppasDisjoint()
for interval_root in disjoint_root.findall('Interval'):
interval = sppasXRA._parse_interval(interval_root)
disjoint.append_interval(interval)
if len(disjoint) == 0:
for interval_root in disjoint_root.findall('TimeInterval'):
interval = sppasXRA._parse_interval(interval_root)
disjoint.append_interval(interval)
for interval_root in disjoint_root.findall('FrameInterval'):
interval = sppasXRA._parse_interval(interval_root)
disjoint.append_interval(interval)
return (disjoint, score)
Parse a 'Tag' element and create a sppasTag().
@staticmethod
def _parse_tag(tag_node):
"""Parse a 'Tag' element and create a sppasTag().
:param tag_node: (ET) XML Element node.
:returns: (sppasTag)
"""
if 'score' in tag_node.attrib:
score = float(tag_node.attrib['score'])
else:
score = None
if 'type' in tag_node.attrib:
data_type = tag_node.attrib['type']
else:
data_type = 'str'
content = tag_node.text if tag_node.text is not None else ''
tag = sppasTag(content, data_type)
return (tag, score)
Parse a 'Media' element and add create a sppasMedia().
def _parse_media(self, media_root):
"""Parse a 'Media' element and add create a sppasMedia().
:param media_root: (ET) XML Element tree root.
"""
media_url = media_root.attrib['url']
media_id = media_root.attrib['id']
media_mime = None
if 'mimetype' in media_root.attrib:
media_mime = media_root.attrib['mimetype']
media = sppasMedia(media_url, media_id, media_mime)
self.add_media(media)
sppasXRA._parse_metadata(media, media_root.find('Metadata'))
content_root = media_root.find('Content')
if content_root:
media.set_content(content_root.text)
for tier_node in media_root.findall('Tier'):
tier_id = tier_node.attrib['id']
for tier in self._tiers:
if tier.get_id() == tier_id:
tier.set_media(media)
Parse a 'Hierarchy' element and set it.
def _parse_hierarchy(self, hierarchy_root):
"""Parse a 'Hierarchy' element and set it.
:param hierarchy_root: (ET) XML Element tree root.
"""
for link_node in hierarchy_root.findall('Link'):
try:
hierarchy_type = link_node.attrib['type']
parent_tier_id = link_node.attrib['from']
child_tier_id = link_node.attrib['to']
except:
hierarchy_type = link_node.attrib['Type']
parent_tier_id = link_node.attrib['From']
child_tier_id = link_node.attrib['To']
parent_tier = None
child_tier = None
for tier in self:
if tier.get_id() == parent_tier_id:
parent_tier = tier
if tier.get_id() == child_tier_id:
child_tier = tier
try:
self.add_hierarchy_link(hierarchy_type, parent_tier, child_tier)
except Exception as e:
logging.error('Corrupted hierarchy link: {:s}'.format(str(e)))
pass
Parse a 'Vocabulary' element and set it.
def _parse_vocabulary(self, vocabulary_root):
"""Parse a 'Vocabulary' element and set it.
:param hierarchy_root: (ET) XML Element tree root.
"""
if 'id' in vocabulary_root.attrib:
id_vocab = vocabulary_root.attrib['id']
else:
id_vocab = vocabulary_root.attrib['ID']
ctrl_vocab = sppasCtrlVocab(id_vocab)
self.add_ctrl_vocab(ctrl_vocab)
sppasXRA._parse_metadata(ctrl_vocab, vocabulary_root.find('Metadata'))
if 'description' in vocabulary_root.attrib:
ctrl_vocab.set_description(vocabulary_root.attrib['description'])
for entry_node in vocabulary_root.findall('Entry'):
if 'type' in entry_node.attrib:
tag_type = entry_node.attrib['type']
else:
tag_type = 'str'
entry_text = sppasTag(entry_node.text, tag_type)
entry_desc = ''
if 'description' in entry_node.attrib:
entry_desc = entry_node.attrib['description']
ctrl_vocab.add(entry_text, entry_desc)
for tier_node in vocabulary_root.findall('Tier'):
if 'id' in tier_node.attrib:
tier_id = tier_node.attrib['id']
else:
tier_id = tier_node.attrib['ID']
for tier in self:
if tier.get_meta('id') == tier_id:
tier.set_ctrl_vocab(ctrl_vocab)
Add a 'Point' element in the tree from a sppasPoint().
@staticmethod
def _format_point(point_node, point):
"""Add a 'Point' element in the tree from a sppasPoint().
:param point_node: (ET) XML Element node.
:param point: (sppasPoint)
"""
point_node.set('midpoint', u(str(point.get_midpoint())))
if point.get_radius() is not None:
point_node.set('radius', u(str(point.get_radius())))
Add an 'Interval' element in the tree from a sppasInterval().
@staticmethod
def _format_interval(interval_root, interval):
"""Add an 'Interval' element in the tree from a sppasInterval().
:param interval_root: (ET) XML Element node.
:param interval: (sppasInterval)
"""
begin = ET.SubElement(interval_root, 'Begin')
sppasXRA._format_point(begin, interval.get_begin())
end = ET.SubElement(interval_root, 'End')
sppasXRA._format_point(end, interval.get_end())
Add a 'Disjoint' element in the tree from a sppasDisjoint().
@staticmethod
def _format_disjoint(disjoint_root, disjoint):
"""Add a 'Disjoint' element in the tree from a sppasDisjoint().
:param disjoint_root: (ET) XML Element node.
:param disjoint: (sppasDisjoint)
"""
for interval in disjoint:
interval_root = ET.SubElement(disjoint_root, 'Interval')
sppasXRA._format_interval(interval_root, interval)
Add a 'Tag' element in the tree from a sppasTag().
@staticmethod
def _format_tag(tag_node, tag):
"""Add a 'Tag' element in the tree from a sppasTag().
:param tag_node: (ET) XML Element node.
:param tag: (sppasTag)
"""
if tag.get_type() != 'str':
tag_node.set('type', tag.get_type())
tag_node.text = tag.get_content()
Add a 'Media' element in the tree from a sppasMedia.
def _format_media(self, media_root, media):
"""Add a 'Media' element in the tree from a sppasMedia.
:param media_root: (ET) XML Element tree root.
:param media: (sppasMedia)
"""
media_root.set('id', media.get_meta('id'))
media_root.set('url', media.get_filename())
media_root.set('mimetype', media.get_mime_type())
for tier in self:
if tier.get_media() is None:
continue
if tier.get_media() == media:
tier_node = ET.SubElement(media_root, 'Tier')
tier_node.set('id', tier.get_meta('id'))
metadata_root = ET.SubElement(media_root, 'Metadata')
if len(media.get_meta_keys()) > 1:
sppasXRA.format_metadata(metadata_root, media, exclude=['id'])
if len(metadata_root.findall('Entry')) == 0:
media_root.remove(metadata_root)
if len(media.get_content()) > 0:
content_node = ET.SubElement(media_root, 'Content')
content_node.text = media.get_content()
Add a 'Hierarchy' element in the tree from a sppasHierarchy().
def _format_hierarchy(self, hierarchy_root):
"""Add a 'Hierarchy' element in the tree from a sppasHierarchy().
:param hierarchy_root: (ET) XML Element tree root.
"""
for child_tier in self:
parent_tier = self._hierarchy.get_parent(child_tier)
if parent_tier is not None:
link_type = self._hierarchy.get_hierarchy_type(child_tier)
link = ET.SubElement(hierarchy_root, 'Link')
link.set('type', link_type)
link.set('from', parent_tier.get_meta('id'))
link.set('to', child_tier.get_meta('id'))
Add a 'Vocabulary' element in the tree from a sppasVocabulary().
def _format_vocabulary(self, vocabulary_root, vocabulary):
"""Add a 'Vocabulary' element in the tree from a sppasVocabulary().
:param vocabulary_root: (ET) XML Element tree root.
:param vocabulary: (sppasCtrlVocab)
"""
vocabulary_root.set('id', vocabulary.get_name())
if len(vocabulary.get_description()) > 0:
vocabulary_root.set('description', vocabulary.get_description())
for entry in vocabulary:
entry_node = ET.SubElement(vocabulary_root, 'Entry')
entry_node.text = entry.get_content()
if entry.get_type() != 'str':
entry_node.set('type', entry.get_type())
if len(vocabulary.get_tag_description(entry)) > 0:
entry_node.set('description', vocabulary.get_tag_description(entry))
for tier in self:
if tier.get_ctrl_vocab() == vocabulary:
tier_node = ET.SubElement(vocabulary_root, 'Tier')
tier_node.set('id', tier.get_meta('id'))
metadata_root = ET.SubElement(vocabulary_root, 'Metadata')
sppasXRA.format_metadata(metadata_root, vocabulary)
if len(metadata_root.findall('Entry')) == 0:
vocabulary_root.remove(metadata_root)