if not args.quiet: setup_logging(1,None) # ---------------------------------------------------------------------------- # Automatic Phonetization is here: # ---------------------------------------------------------------------------- unkopt = True if args.nounk: unkopt = False mapfile = None if args.map: mapfile = args.map if args.i: p = sppasPhon( args.dict, mapfile ) p.set_unk( unkopt ) p.set_usestdtokens( False ) p.run( args.i,args.o ) else: pdict = DictPron( args.dict, unkstamp=UNKSTAMP, nodump=False ) maptable = Mapping() if mapfile is not None: maptable = Mapping( mapfile ) phonetizer = DictPhon( pdict, maptable ) for line in sys.stdin: print phonetizer.phonetize( line, unkopt ) # ----------------------------------------------------------------------------
def run_phonetization(self, stepidx): """ Execute the SPPAS-Phonetization program. @return number of files processed successfully """ # Initializations step = self.parameters.get_step(stepidx) stepname = self.parameters.get_step_name(stepidx) files_processed_success = 0 self._progress.set_header(stepname) self._progress.update(0,"") # Get the list of input file names, with the ".wav" (or ".wave") extension filelist = self.set_filelist(".wav")#,not_start=["track_"]) if len(filelist) == 0: return 0 total = len(filelist) # Create annotation instance try: self._progress.set_text("Loading resources...") p = sppasPhon( step.get_langresource(), logfile=self._logfile ) except Exception as e: if self._logfile is not None: self._logfile.print_message( "%s\n"%e, indent=1,status=4 ) return 0 # Execute the annotation for each file in the list for i,f in enumerate(filelist): # fix the default values p.fix_options( step.get_options() ) # Indicate the file to be processed self._progress.set_text( os.path.basename(f)+" ("+str(i+1)+"/"+str(total)+")" ) if self._logfile is not None: self._logfile.print_message(stepname+" of file " + f, indent=1) # Get the input file ext = ['-token'+self.parameters.get_output_format()] for e in annotationdata.io.extensions_out_multitiers: ext.append( '-token'+e ) inname = self._get_filename(f, ext) if inname is not None: # Fix output file name outname = os.path.splitext(f)[0] + '-phon' + self.parameters.get_output_format() # Execute annotation try: p.run( inname, outname ) except Exception as e: import traceback print traceback.format_exc() if self._logfile is not None: self._logfile.print_message( "%s for file %s\n"%(str(e),outname), indent=2,status=-1 ) else: files_processed_success += 1 if self._logfile is not None: self._logfile.print_message(outname, indent=2,status=0 ) else: if self._logfile is not None: self._logfile.print_message("Failed to find a file with toketization. Read the documentation for details.",indent=2,status=2) # Indicate progress self._progress.set_fraction(float((i+1))/float(total)) if self._logfile is not None: self._logfile.print_newline() # Indicate completed! self._progress.update(1,"Completed (%d files successfully over %d files).\n"%(files_processed_success,total)) self._progress.set_header("") return files_processed_success