def __build_timeslots(self): from operator import itemgetter self.timeSlotIds = OrderedDict() timeSlotIds = list() for tier in self: if tier.IsPoint(): tier = point2interval(tier, ELAN_RADIUS) tier = merge_overlapping_annotations(tier) for annotation in tier: location = annotation.GetLocation() begin = round(location.GetBeginMidpoint(), 4) end = round(location.GetEndMidpoint(), 4) timeSlotIds.append((begin, annotation)) timeSlotIds.append((end, annotation)) # sort by time values and assign the TS i = 0 for key in sorted(timeSlotIds, key=itemgetter(0)): i += 1 ts = 'ts%s' % i self.timeSlotIds[key] = ts
def __format_tier(self, tierRoot, tier): linguisticType = linguistic_type_from_tier(tier) tierRoot.set('LINGUISTIC_TYPE_REF', linguisticType) tierRoot.set('TIER_ID', tier.GetName()) for key in ['DEFAULT_LOCALE', 'PARTICIPANT']: if key in tier.metadata.keys(): tierRoot.set(key, tier.metadata[key]) if tier.IsPoint(): tier = point2interval(tier, ELAN_RADIUS) tier = merge_overlapping_annotations(tier) parentTier = self._hierarchy.get_parent(tier) if parentTier is not None: tierRoot.set('PARENT_REF', parentTier.GetName()) self.previousRefId = None for annotation in tier: annotationRoot = ET.SubElement(tierRoot, 'ANNOTATION') self.__format_ref_annotation(annotationRoot, annotation, parentTier) del self.previousRefId else: for annotation in tier: annotationRoot = ET.SubElement(tierRoot, 'ANNOTATION') created = self.__format_alignable_annotation(annotationRoot, annotation) if created is False: tierRoot.remove(annotationRoot)
def __write_tier(self, tier, filefp): if tier.IsPoint(): tier = point2interval(tier, SCLITE_RADIUS) for annotation in tier: wavname = tier.GetMedia().url if tier.GetMedia() is not None else self.GetName() begin = annotation.GetLocation().GetBeginMidpoint() end = annotation.GetLocation().GetEndMidpoint() word = annotation.GetLabel().GetValue() if('speaker' not in tier.metadata): speaker = 'none' else: speaker = tier.metadata['speaker'] channel = tier.GetName() filefp.write('%s %s %s %s %s %s\n' % ( wavname, channel, speaker, begin, end, word))
def write(self, filename, encoding='UTF-8'): """ Write an Antx file. :param filename: :param encoding: """ try: root = ET.Element('AnnotationSystemDataSet') root.set('xmlns', 'http://tempuri.org/AnnotationSystemDataSet.xsd') # Write layers for tier in self: Antx.__format_tier(root, tier) # Write segments for tier in self: if tier.IsPoint(): tier = point2interval(tier, ANTX_RADIUS) tier = merge_overlapping_annotations(tier) for ann in tier: self.__format_segment(root, tier, ann) # Write media if len(self.GetMedia()) > 0: for media in self.GetMedia(): if media: Antx.__format_media(root, media) # Write configurations for key, value in ELT_REQUIRED_Configuration.items(): Antx.__format_configuration(root, key, self.metadata.get(key, value)) for key, value in self.metadata.items(): if key not in ELT_REQUIRED_Configuration.keys(): Antx.__format_configuration(root, key, self.metadata.get(key, value)) indent(root) tree = ET.ElementTree(root) tree.write(filename, encoding=encoding, xml_declaration=True, method="xml") # TODO: add standalone="yes" in the declaration # (but not available with ElementTree) except Exception: # import traceback # print(traceback.format_exc()) raise
def __write_tier(self, tier, filefp, channel): if tier.IsPoint(): tier = point2interval(tier, SCLITE_RADIUS) for annotation in tier: wavname = tier.GetMedia().url if tier.GetMedia() is not None else self.GetName() begin = annotation.GetLocation().GetBeginMidpoint() duration = annotation.GetLocation().GetDuration().GetValue() word = annotation.GetLabel().GetValue() score = annotation.GetLabel().GetLabel().GetScore() filefp.write('%s %s %s %s %s %s\n' % ( wavname, channel, begin, duration, word, score))
def __build_timeslots(self): timevalues = [] for tier in self: if tier.IsPoint(): tier = point2interval(tier,ELAN_RADIUS) tier = merge_overlapping_annotations(tier) for annotation in tier: location = annotation.GetLocation() #What about PointTiers??????? #TODO !! begin = round(location.GetBeginMidpoint(),4) end = round(location.GetEndMidpoint(),4) if not begin in timevalues: timevalues.append(begin) if not end in timevalues: timevalues.append(end) self.timeSlotIds = {} for i,v in enumerate(timevalues): self.timeSlotIds[v] = 't%s' % i