Source code for anndata.aio.readwrite
# -*- coding : UTF-8 -*-
"""
:filename: sppas.anndata.aio.readwrite.py
:author: Brigitte Bigi
:contact: develop@sppas.org
:summary: The annotated files main reader/writer.
.. _This file is part of SPPAS: http://www.sppas.org/
..
-------------------------------------------------------------------------
___ __ __ __ ___
/ | \ | \ | \ / the automatic
\__ |__/ |__/ |___| \__ annotation and
\ | | | | \ analysis
___/ | | | | ___/ of speech
Copyright (C) 2011-2021 Brigitte Bigi
Laboratoire Parole et Langage, Aix-en-Provence, France
Use of this software is governed by the GNU Public License, version 3.
SPPAS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
SPPAS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SPPAS. If not, see <http://www.gnu.org/licenses/>.
This banner notice must not be removed.
-------------------------------------------------------------------------
"""
import logging
import os
from collections import OrderedDict
from sppas.src.config import IOExtensionError
from sppas.src.config.makeunicode import u
from sppas.src.utils.datatype import sppasTime
from ..anndataexc import AioEncodingError
from ..anndataexc import AioError
from .text import sppasRawText
from .text import sppasCSV
from .sclite import sppasCTM
from .sclite import sppasSTM
from .xtrans import sppasTDF
from .praat import sppasTextGrid
from .praat import sppasPitchTier
from .praat import sppasIntensityTier
from .phonedit import sppasMRK
from .phonedit import sppasSignaix
from .htk import sppasLab
from .subtitle import sppasSubRip
from .subtitle import sppasSubViewer
from .subtitle import sppasWebVTT
from .table import sppasTRA
from .table import sppasARFF
from .table import sppasXRFF
from .transcriber import sppasTRS
from .audacity import sppasAudacity
from .anvil import sppasAnvil
from .elan import sppasEAF
from .annotationpro import sppasANT
from .annotationpro import sppasANTX
from .xra import sppasXRA
# ---------------------------------------------------------------------------
[docs]class sppasTrsRW(object):
"""Main parser of annotated data: Reader and writer of annotated data.
All the 3 types of annotated files are supported: ANNOT, MEASURE, TABLE.
"""
# A dictionary to associate a file extension and a class to instantiate.
TRANSCRIPTION_TYPES = OrderedDict()
# ANNOT
TRANSCRIPTION_TYPES[sppasXRA().default_extension] = sppasXRA
TRANSCRIPTION_TYPES[sppasTextGrid().default_extension] = sppasTextGrid
TRANSCRIPTION_TYPES[sppasAnvil().default_extension] = sppasAnvil
TRANSCRIPTION_TYPES[sppasEAF().default_extension] = sppasEAF
TRANSCRIPTION_TYPES[sppasANT().default_extension] = sppasANT
TRANSCRIPTION_TYPES[sppasANTX().default_extension] = sppasANTX
TRANSCRIPTION_TYPES[sppasTRS().default_extension] = sppasTRS
TRANSCRIPTION_TYPES[sppasMRK().default_extension] = sppasMRK
TRANSCRIPTION_TYPES[sppasSignaix().default_extension] = sppasSignaix
TRANSCRIPTION_TYPES[sppasLab().default_extension] = sppasLab
TRANSCRIPTION_TYPES[sppasSubRip().default_extension] = sppasSubRip
TRANSCRIPTION_TYPES[sppasSubViewer().default_extension] = sppasSubViewer
TRANSCRIPTION_TYPES[sppasWebVTT().default_extension] = sppasWebVTT
TRANSCRIPTION_TYPES[sppasCTM().default_extension] = sppasCTM
TRANSCRIPTION_TYPES[sppasSTM().default_extension] = sppasSTM
TRANSCRIPTION_TYPES[sppasAudacity().default_extension] = sppasAudacity
TRANSCRIPTION_TYPES[sppasTDF().default_extension] = sppasTDF
TRANSCRIPTION_TYPES[sppasCSV().default_extension] = sppasCSV
TRANSCRIPTION_TYPES[sppasRawText().default_extension] = sppasRawText
# TABLE
TRANSCRIPTION_TYPES[sppasTRA().default_extension] = sppasTRA
TRANSCRIPTION_TYPES[sppasARFF().default_extension] = sppasARFF
TRANSCRIPTION_TYPES[sppasXRFF().default_extension] = sppasXRFF
# MEASURE
TRANSCRIPTION_TYPES[sppasIntensityTier().default_extension] = sppasIntensityTier
TRANSCRIPTION_TYPES[sppasPitchTier().default_extension] = sppasPitchTier
# -----------------------------------------------------------------------
[docs] @staticmethod
def extensions():
"""Return the whole list of supported extensions (case sensitive)."""
return list(sppasTrsRW.TRANSCRIPTION_TYPES.keys())
# -----------------------------------------------------------------------
[docs] @staticmethod
def extensions_in():
"""Return the list of supported extensions if the reader exists."""
e = list()
for ext in list(sppasTrsRW.TRANSCRIPTION_TYPES.keys()):
fp = FileFormatProperty(extension=ext)
if fp.get_reader() is True:
e.append(ext)
return e
# -----------------------------------------------------------------------
[docs] @staticmethod
def extensions_out():
"""Return the list of supported extensions if the writer exists."""
e = list()
for ext in list(sppasTrsRW.TRANSCRIPTION_TYPES.keys()):
fp = FileFormatProperty(extension=ext)
if fp.get_writer() is True:
e.append(ext)
return e
# -----------------------------------------------------------------------
[docs] @staticmethod
def annot_extensions():
"""Return the list of ANNOT extensions (case sensitive)."""
e = list()
for ext in list(sppasTrsRW.TRANSCRIPTION_TYPES.keys()):
fp = FileFormatProperty(extension=ext)
if fp.get_trs_type() == "ANNOT":
e.append(ext)
return e
# -----------------------------------------------------------------------
[docs] @staticmethod
def measure_extensions():
"""Return the list of MEASURE extensions (case sensitive)."""
e = list()
for ext in list(sppasTrsRW.TRANSCRIPTION_TYPES.keys()):
fp = FileFormatProperty(extension=ext)
if fp.get_trs_type() == "MEASURE":
e.append(ext)
return e
# -----------------------------------------------------------------------
[docs] @staticmethod
def table_extensions():
"""Return the list of TABLE extensions (case sensitive)."""
e = list()
for ext in list(sppasTrsRW.TRANSCRIPTION_TYPES.keys()):
fp = FileFormatProperty(extension=ext)
if fp.get_trs_type() == "TABLE":
e.append(ext)
return e
# -----------------------------------------------------------------------
[docs] def __init__(self, filename):
"""Create a Transcription reader-writer.
:param filename: (str)
"""
self.__filename = u(filename)
# -----------------------------------------------------------------------
[docs] def get_filename(self):
"""Return the filename."""
return self.__filename
# -----------------------------------------------------------------------
[docs] def set_filename(self, filename):
"""Set a new filename.
:param filename: (str)
"""
self.__filename = u(filename)
# -----------------------------------------------------------------------
[docs] def read(self, heuristic=False):
"""Read a transcription from a file.
:param heuristic: (bool) if the extension of the file is unknown, use
an heuristic to detect the format, then to choose the reader-writer.
:returns: sppasTranscription reader-writer
"""
try:
trs = sppasTrsRW.create_trs_from_extension(self.__filename)
except IOExtensionError:
if heuristic is True:
trs = sppasTrsRW.create_trs_from_heuristic(self.__filename)
else:
raise
if os.path.exists(self.__filename) is False:
raise AioError(self.__filename)
try:
# Add metadata about the file
fn = u(self.__filename)
trs.set_meta('file_reader', trs.__class__.__name__)
trs.set_meta('file_name', os.path.basename(fn))
trs.set_meta('file_path', os.path.dirname(fn))
trs.set_meta('file_ext', os.path.splitext(fn)[1])
trs.set_meta('file_read_date', sppasTime().now)
# Read the file content dans store into a Transcription()
trs.read(self.__filename)
except UnicodeError as e:
raise AioEncodingError(filename=self.__filename, error_msg=str(e))
except IOError:
raise
except Exception:
raise
return trs
# -----------------------------------------------------------------------
[docs] @staticmethod
def create_trs_from_extension(filename):
"""Return a transcription according to a given filename.
Only the extension of the filename is used.
:param filename: (str)
:returns: Transcription()
"""
extension = os.path.splitext(filename)[1][1:]
logging.debug("Parse an annotated file. Extension: {:s}".format(extension))
for ext in sppasTrsRW.TRANSCRIPTION_TYPES.keys():
if ext.lower() == extension.lower():
return sppasTrsRW.TRANSCRIPTION_TYPES[ext]()
raise IOExtensionError(filename)
# -----------------------------------------------------------------------
[docs] @staticmethod
def create_trs_from_heuristic(filename):
"""Return a transcription according to a given filename.
The given file is opened and an heuristic allows to fix the format.
:param filename: (str)
:returns: Transcription()
"""
for file_reader in sppasTrsRW.TRANSCRIPTION_TYPES.values():
try:
if file_reader.detect(filename) is True:
return file_reader()
except:
continue
return sppasRawText()
# -----------------------------------------------------------------------
[docs] def write(self, transcription):
"""Write a transcription into a file.
:param transcription: (sppasTranscription)
"""
trs_rw = sppasTrsRW.create_trs_from_extension(self.__filename)
trs_rw.set(transcription)
# Add metadata about the file
trs_rw.set_meta('file_writer', trs_rw.__class__.__name__)
trs_rw.set_meta('file_name', os.path.basename(self.__filename))
trs_rw.set_meta('file_path', os.path.dirname(self.__filename))
trs_rw.set_meta('file_ext', os.path.splitext(self.__filename)[1])
trs_rw.set_meta('file_write_date', "{:s}".format(sppasTime().now))
file_version = int(trs_rw.get_meta("file_version", "0")) + 1
trs_rw.set_meta('file_version', str(file_version))
try:
trs_rw.write(self.__filename)
except UnicodeError as e:
raise AioEncodingError(self.__filename, str(e))
except Exception:
raise
# ---------------------------------------------------------------------------