def test_to_file(self): for codec in ['utf-8', 'latin_1', 'mac_roman']: self.tg = TextGrid(xmax=20) tier1 = self.tg.add_tier('tier') tier1.add_interval(1, 2, 'i1') tier1.add_interval(2, 3, 'i2') tier1.add_interval(4, 5, 'i3') tier4 = self.tg.add_tier('tier') tier4.add_interval(1, 2, u'i1ü') tier4.add_interval(2.0, 3, 'i2') tier4.add_interval(4, 5.0, 'i3') tier2 = self.tg.add_tier('tier2', tier_type='TextTier') tier2.add_point(1, u'p1ü') tier2.add_point(2, 'p1') tier2.add_point(3, 'p1') tempf = tempfile.mkstemp()[1] # Normal mode self.tg.to_file(tempf, codec=codec) TextGrid(tempf, codec=codec) # Short mode self.tg.to_file(tempf, codec=codec, mode='s') TextGrid(tempf, codec=codec) # Binary mode self.tg.to_file(tempf, mode='b') TextGrid(tempf) os.remove(tempf)
def test_to_file(codec, tmp_path): tg = TextGrid(xmax=20) tier1 = tg.add_tier('tier') tier1.add_interval(1, 2, 'i1') tier1.add_interval(2, 3, 'i2') tier1.add_interval(4, 5, 'i3') tier4 = tg.add_tier('tier') tier4.add_interval(1, 2, u'i1ü') tier4.add_interval(2.0, 3, 'i2') tier4.add_interval(4, 5.0, 'i3') tier2 = tg.add_tier('tier2', tier_type='TextTier') tier2.add_point(1, u'p1ü') tier2.add_point(2, 'p1') tier2.add_point(3, 'p1') tempf = str(tmp_path / 'test') # Normal mode tg.to_file(pathlib.Path(tempf), codec=codec) TextGrid(tempf, codec=codec) # Short mode tg.to_file(tempf, codec=codec, mode='s') TextGrid(tempf, codec=codec) # Binary mode tg.to_file(tempf, mode='b') TextGrid(tempf)
def write_to_text_grid(words: List[dict], sentences: List[dict], duration: float): """Write results to Praat TextGrid. Because we are using pympi, we can also export to Elan EAF. Args: words (List[dict]): List of word times containing start, end, and value keys sentences (List[dict]): List of sentence times containing start, end, and value keys duration (float): duration of entire audio Returns: TextGrid: Praat TextGrid with word and sentence alignments """ text_grid = TextGrid(xmax=duration) sentence_tier = text_grid.add_tier(name="Sentence") word_tier = text_grid.add_tier(name="Word") for s in sentences: sentence_tier.add_interval( begin=s[0]["start"], end=s[-1]["end"], value=" ".join([w["text"] for w in s]), ) for w in words: word_tier.add_interval(begin=w["start"], end=w["end"], value=w["text"]) return text_grid
def toTextGrid(self, filePath, excludedTiers=[], includedTiers=[]): """ Convert the elan file to praat's TextGrid, returns 0 if succesfull<br /> <br /> filePath -- The output file path - for stdout<br /> excludedTiers -- Tiers to exclude<br /> includedTiers -- Tiers to include if empty all tiers are included""" try: from pympi.Praat import TextGrid except ImportError: warnings.warn( 'Please install the pympi.Praat module from the py' + 'mpi module found at https://github.com/dopefishh/pympi') return 1 tgout = TextGrid() for tier in [ a for a in self.tiers if a not in excludedTiers and ( not includedTiers or a in includedTiers) ]: currentTier = tgout.addTier(tier) for interval in self.getAnnotationDataForTier(tier): if interval[0] == interval[1]: continue currentTier.addInterval(interval[0] / 1000.0, interval[1] / 1000.0, interval[2]) tgout.tofile(filePath) return 0
def setUp(self): self.tg = TextGrid(xmax=20) self.maxdiff = None
def test_to_file(self): for codec in ['utf-8', 'utf-16', 'latin_1', 'mac_roman']: self.tg = TextGrid(xmax=20) tier1 = self.tg.add_tier('tier') tier1.add_interval(1, 2, 'i1') tier1.add_interval(2, 3, 'i2') tier1.add_interval(4, 5, 'i3') tier4 = self.tg.add_tier('tier') tier4.add_interval(1, 2, u'i1ü') tier4.add_interval(2.0, 3, 'i2') tier4.add_interval(4, 5.0, 'i3') tier2 = self.tg.add_tier('tier2', tier_type='TextTier') tier2.add_point(1, u'p1ü') tier2.add_point(2, 'p1') tier2.add_point(3, 'p1') # Normal mode tgfile = io.StringIO() self.tg.to_stream(tgfile, codec=codec) tgfile.seek(0) tg1 = tgfile.read() tgfile.seek(0) self.tg = TextGrid(tgfile, codec=codec, stream=True) tgfile = io.StringIO() self.tg.to_stream(tgfile, codec=codec) tgfile.seek(0) tg2 = tgfile.read() tgfile.seek(0) self.assertEqual(tg2, tg1) # Short mode tgfile = io.StringIO() self.tg.to_stream(tgfile, codec=codec, mode='s') tgfile.seek(0) tg1 = tgfile.read() tgfile.seek(0) self.tg = TextGrid(tgfile, codec=codec, stream=True) tgfile = io.StringIO() self.tg.to_stream(tgfile, codec=codec, mode='s') tgfile.seek(0) tg2 = tgfile.read() tgfile.seek(0) self.assertEqual(tg2, tg1) # Binary mode tgfile = io.BytesIO() self.tg.to_stream(tgfile, codec=codec, mode='b') tgfile.seek(0) tg1 = tgfile.read() tgfile.seek(0) self.tg = TextGrid(tgfile, codec=codec, stream=True) tgfile = io.BytesIO() self.tg.to_stream(tgfile, codec=codec, mode='b') tgfile.seek(0) tg2 = tgfile.read() tgfile.seek(0) self.assertEqual(tg2, tg1)