SPPAS integration of the automatic Cued Speech key-code generation.
Module sppas.src.annotations
Class sppasCuedSpeech
Description
Constructor
Create a new instance.
Log is used for a better communication of the annotation process and its results. If None, logs are redirected to the default logging system.
Parameters
- log: (sppasLog) Human-readable logs.
View Source
def __init__(self, log=None):
"""Create a new instance.
Log is used for a better communication of the annotation process and its
results. If None, logs are redirected to the default logging system.
:param log: (sppasLog) Human-readable logs.
"""
super(sppasCuedSpeech, self).__init__('cuedspeech.json', log)
self.__lang = 'und'
self.__cued = CuedSpeechKeys()
self.__genkey = sppasWhatKeyPredictor()
self.__genhand = sppasWhenHandTransitionPredictor(predictor_version=self._options['handtrans'])
self.__gencue = sppasWhereCuePredictor()
self.__tagger = None
self.__ann_on_media = sppasAnnsOnFrames(fps=60.0)
Public functions
load_resources
Fix the keys from a configuration file.
Parameters
- config_filename: Name of the configuration file with the keys
- lang: (str) Iso639-3 of the language or "und" if unknown.
View Source
def load_resources(self, config_filename, lang='und', **kwargs):
"""Fix the keys from a configuration file.
:param config_filename: Name of the configuration file with the keys
:param lang: (str) Iso639-3 of the language or "und" if unknown.
"""
self.__lang = lang
if lang != 'und':
self.__cued.load(config_filename)
self.__genkey.set_cue_rules(self.__cued)
self.__genhand.set_cue_rules(self.__cued)
self.__gencue.set_cue_rules(self.__cued)
if self._options['createvideo'] is True:
self.__set_video_tagger()
fix_options
Fix all options.
Available options are:
- inputpattern1, inputpattern2, inputpattern3, outputpattern,
- createvideo: boolean
- handtrans: version of the hand transition estimator model
- handangle: version of the hand angle estimator model
- handsset: name of the hand pictures set, or empty string to draw badges
- infotext: boolean
- voxwelpos: boolean
Parameters
- options: (sppasOption)
View Source
def fix_options(self, options):
"""Fix all options.
Available options are:
- inputpattern1, inputpattern2, inputpattern3, outputpattern,
- createvideo: boolean
- handtrans: version of the hand transition estimator model
- handangle: version of the hand angle estimator model
- handsset: name of the hand pictures set, or empty string to draw badges
- infotext: boolean
- voxwelpos: boolean
:param options: (sppasOption)
"""
for opt in options:
key = opt.get_key()
if 'createvideo' == key:
self.set_create_video(opt.get_value())
elif 'pattern' in key:
self._options[key] = opt.get_value()
elif 'handtrans' == key:
self.set_when_handtrans_version(opt.get_value())
elif 'handangle' == key:
self.set_where_handangle_version(opt.get_value())
elif self.__tagger is not None:
self.__tagger.set_option(key, opt.get_value())
set_when_handtrans_version
Fix the version of the hand transition times generator.
Parameters
- version: (int)
View Source
def set_when_handtrans_version(self, version=4):
"""Fix the version of the hand transition times generator.
:param version: (int)
"""
all_versions = self.__genhand.get_whenpredictor_versions()
version = int(version)
if version not in all_versions:
msg = "Invalid version number '{}' for transition times. Expected one of {}".format(version, all_versions)
self.logfile.print_message(msg, status=annots.error)
version = 4
self.__genhand.set_whenpredictorr_version(version)
self._options['handtrans'] = version
set_where_handposition_version
Fix the version of the vowel' positions generator.
Parameters
- version: (int)
View Source
def set_where_handposition_version(self, version=1):
"""Fix the version of the vowel' positions generator.
:param version: (int)
"""
all_versions = self.__gencue.get_wherepositionpredictor_versions()
version = int(version)
if version not in all_versions:
msg = "Invalid version number '{}' for vowels positions predictor. Expected one of {}".format(version, all_versions)
self.logfile.print_message(msg, status=annots.error)
version = 1
self.__gencue.set_wherepositionpredictor_version(version)
self._options['handpos'] = version
set_where_handangle_version
Fix the version of the hand angle generator.
Parameters
- version: (int)
View Source
def set_where_handangle_version(self, version=1):
"""Fix the version of the hand angle generator.
:param version: (int)
"""
all_versions = self.__gencue.get_whereanglepredictor_versions()
version = int(version)
if version not in all_versions:
msg = "Invalid version number '{}' for angle predictor. Expected one of {}".format(version, all_versions)
self.logfile.print_message(msg, status=annots.error)
version = 1
self.__gencue.set_whereanglepredictor_version(version)
self._options['handangle'] = version
set_create_video
Fix the createvideo option.
Parameters
- create: (bool)
View Source
def set_create_video(self, create=True):
"""Fix the createvideo option.
:param create: (bool)
"""
create = bool(create)
self._options['createvideo'] = create
if create is True:
self.__set_video_tagger()
else:
self.__tagger = None
convert
Syllabify labels of a time-aligned phones tier.
Parameters
- phonemes: (sppasTier) time-aligned phonemes tier
- media: (sppasMedia) a media representing the video file
Returns
- (sppasTier*6)
View Source
def convert(self, phonemes, media):
"""Syllabify labels of a time-aligned phones tier.
:param phonemes: (sppasTier) time-aligned phonemes tier
:param media: (sppasMedia) a media representing the video file
:returns: (sppasTier*6)
"""
cs_segments = self.__genkey.phons_to_segments(phonemes)
cs_keys, cs_class, cs_struct = self.__genkey.segments_to_keys(cs_segments, phonemes.get_first_point(), phonemes.get_last_point())
cs_pos, cs_shapes = self.__genhand.when_hands(cs_keys, cs_segments)
return (cs_segments, cs_keys, cs_class, cs_struct, cs_shapes, cs_pos)
make_video
Create a video with the tagged keys.
Parameters
- video_file: (str) Filename of the given video
- trs: (sppasTranscription) All required tiers to tag the video
- output: (str) Output file name
View Source
def make_video(self, video_file, trs, output):
"""Create a video with the tagged keys.
:param video_file: (str) Filename of the given video
:param trs: (sppasTranscription) All required tiers to tag the video
:param output: (str) Output file name
"""
if cfg.feature_installed('video') is True and self.__tagger is not None:
self.logfile.print_message('Create the tagged video', status=annots.info)
self.__tagger.load(video_file)
self.__tagger.tag_with_keys(trs, output)
self.__tagger.close()
else:
self.logfile.print_message('To tag a video, the video support feature must be enabled.', status=annots.error)
get_inputs
Return the media and the annotated filenames.
Parameters
- input_files: (list)
Raises
NoInputError
Returns
- (str, str) Names of the 3 expected files
View Source
def get_inputs(self, input_files):
"""Return the media and the annotated filenames.
:param input_files: (list)
:raise: NoInputError
:return: (str, str) Names of the 3 expected files
"""
ext = self.get_input_extensions()
media_ext = [e.lower() for e in ext[1]]
phons_ext = [e.lower() for e in ext[0]]
sights_ext = [e.lower() for e in ext[2]]
media = None
annot_phons = None
annot_sights = None
pphones = self._options['inputpattern1']
psights = self._options['inputpattern3']
for filename in input_files:
if filename is None:
continue
fn, fe = os.path.splitext(filename)
if media is None and fe in media_ext:
media = filename
elif annot_phons is None and fe.lower() in phons_ext and fn.endswith(pphones):
annot_phons = filename
elif annot_sights is None and fe.lower() in sights_ext and fn.endswith(psights):
annot_sights = filename
if annot_phons is None:
logging.error('The annotated file with time-aligned phonemes was not found.')
raise NoInputError
return (media, annot_phons, annot_sights)
create_media
Create a sppasMedia() instance from a video filename.
Parameters
- video_filename
View Source
def create_media(self, video_filename):
"""Create a sppasMedia() instance from a video filename.
"""
if video_filename is None:
return None
extm = os.path.splitext(video_filename.lower())[1]
video_media = sppasMedia(os.path.abspath(video_filename), mime_type='video/' + extm)
try:
vid = sppasVideoReader()
vid.open(video_filename)
video_media.set_meta('fps', str(vid.get_framerate()))
video_media.set_meta('duration', str(vid.get_duration()))
video_media.set_meta('size', str(vid.get_size()))
vid.close()
except:
pass
return video_media
run
Run the automatic annotation process on an input.
Parameters
- input_files: (list of str) time-aligned phonemes, and optionally video, csv files
- output: (str) the output name
Returns
- (sppasTranscription)
View Source
def run(self, input_files, output=None):
"""Run the automatic annotation process on an input.
:param input_files: (list of str) time-aligned phonemes, and optionally video, csv files
:param output: (str) the output name
:returns: (sppasTranscription)
"""
try:
do_vid = False
file_video, file_phons, file_sights = self.get_inputs(input_files)
video_media = self.create_media(file_video)
parser = sppasTrsRW(file_phons)
trs_input = parser.read()
tier_phon = sppasFindTier.aligned_phones(trs_input)
trs_output = sppasTranscription(self.name)
self._set_trs_metadata(trs_output, file_phons)
tier_cs, tier_key, tier_class, tier_struct, tier_shapes_transitions, tier_pos_transitions = self.convert(tier_phon, video_media)
trs_output.append(tier_cs)
trs_output.append(tier_struct)
trs_output.append(tier_key)
trs_output.append(tier_class)
trs_output.append(tier_shapes_transitions)
trs_output.append(tier_pos_transitions)
trs_coords = sppasTranscription(self.name)
self._set_trs_metadata(trs_coords, file_phons)
if file_sights is not None:
if video_media is not None:
trs_coords.add_media(video_media)
adjusted_pos = tier_pos_transitions.copy()
self._set_media_to_tier(adjusted_pos, video_media, adjust=True)
trs_coords.append(adjusted_pos)
adjusted_shapes = tier_shapes_transitions.copy()
self._set_media_to_tier(adjusted_shapes, video_media, adjust=True)
trs_coords.append(adjusted_shapes)
trs_coords = self.__gencue.predict_where(file_sights, adjusted_pos, adjusted_shapes)
else:
trs_coords = self.__gencue.predict_where(file_sights, tier_pos_transitions, tier_shapes_transitions)
trs_coords.append(tier_phon)
for tier in trs_coords:
self._set_media_to_tier(tier, video_media, adjust=False)
if self._options['createvideo']:
if len(input_files) > 2:
if file_video is not None and file_sights is not None:
do_vid = True
self.make_video(file_video, trs_coords, output)
if do_vid is False:
self.logfile.print_message('The option to tag the video was enabled but no video/csv corresponding to the annotated file {:s} was found.'.format(input_files[0]), status=-1)
else:
logging.info('No sights available.')
if output is None:
return trs_output
outputs = list()
output_file = self.fix_out_file_ext(output)
if len(trs_output) > 0:
parser = sppasTrsRW(output_file)
parser.write(trs_output)
outputs.append(output_file)
else:
raise EmptyOutputError
if output is not None:
coords_output = output_file.replace(self.get_output_pattern(), '-coords')
parser = sppasTrsRW(coords_output)
parser.write(trs_coords)
outputs.append(coords_output)
except Exception as e:
import traceback
traceback.print_exception(e)
raise
return outputs
get_output_pattern
Pattern this annotation uses in an output filename.
View Source
def get_output_pattern(self):
"""Pattern this annotation uses in an output filename."""
return self._options.get('outputpattern', '-cuedspeech')
get_input_patterns
Pattern this annotation expects for its input filename.
View Source
def get_input_patterns(self):
"""Pattern this annotation expects for its input filename."""
return [self._options.get('inputpattern1', '-palign'), self._options.get('inputpattern2', ''), self._options.get('inputpattern3', '-sights')]
get_input_extensions
Extensions that the annotation expects for its input filename.
Priority is given to video files, then image files.
View Source
@staticmethod
def get_input_extensions():
"""Extensions that the annotation expects for its input filename.
Priority is given to video files, then image files.
"""
media_ext = list(sppasFiles.get_informat_extensions('VIDEO'))
for img_ext in sppasFiles.get_informat_extensions('IMAGE'):
media_ext.append(img_ext)
return [sppasFiles.get_informat_extensions('ANNOT_ANNOT'), media_ext, ['.xra', '.csv']]
get_hands_sets
List of all hands sets in the resources.cuedspeech folder.
Returns
- (list) A list of all hands sets available.
View Source
@staticmethod
def get_hands_sets() -> list:
"""List of all hands sets in the resources.cuedspeech folder.
:return: (list) A list of all hands sets available.
"""
hand_manager = sppasHandResource()
hand_manager.automatic_loading()
return hand_manager.get_hand_sets_identifiers()
get_hands_filters
List of all available hands filters.
Returns
- (list) A list of all hands filters.
View Source
@staticmethod
def get_hands_filters() -> list:
"""List of all available hands filters.
:return: (list) A list of all hands filters.
"""
return CuedSpeechVideoTagger.get_hands_filters()
Private functions
_set_media_to_tier
Set the media to the tier and adjust annotation boundaries.
Parameters
- tier
- media
- adjust
View Source
def _set_media_to_tier(self, tier, media, adjust=False):
"""Set the media to the tier and adjust annotation boundaries.
"""
if media is None:
return
tier.set_media(media)
fps = media.get_meta('fps', None)
if fps is not None and adjust is True:
self.__ann_on_media.fps = fps
self.__ann_on_media.adjust_boundaries(tier)
_set_trs_metadata
View Source
def _set_trs_metadata(self, trs, filename):
trs.set_meta('annotation_result_of', filename)
trs.set_meta('language_iso', 'iso639-3')
trs.set_meta('language_name_0', 'Undetermined')
if len(self.__lang) == 3:
trs.set_meta('language_code_0', self.__lang)
trs.set_meta('language_url_0', 'https://iso639-3.sil.org/code/' + self.__lang)
else:
trs.set_meta('language_code_0', 'und')
trs.set_meta('language_url_0', 'https://iso639-3.sil.org/code/und')
Protected functions
__set_video_tagger
View Source
def __set_video_tagger(self):
try:
if self.__tagger is None:
self.__tagger = CuedSpeechVideoTagger(self.__cued)
else:
self.__tagger.set_cue_rules(self.__cued)
except sppasEnableFeatureError as e:
self.__tagger = None
logging.warning("Cued Speech Video Tagger can't be enabled: {:s}".format(str(e)))