Exemplo n.º 1
0
def generate_data(sub_names, mov_names):
    speech = []
    noise = []

    for sub_name, mov_name in zip(sub_names, mov_names):
        fsub = open(sub_name)
        intervels = []
        while True:
            if fsub.readline().strip() == '[Events]':
                break
        for line in fsub:
            if line.startswith('Dialogue:'):
                line = line.strip().split(',')

                if line[3] != 'Default' or line[9].startswith('{'):
                    intervels.append(
                        (parse_time(line[1]), parse_time(line[2]), False))
                else:
                    intervels.append(
                        (parse_time(line[1]), parse_time(line[2]), True))

        intervels.sort(cmp=lambda x, y: cmp(x[0], y[0]))

        i = 0
        while i < len(intervels) - 1:
            if intervels[i][1] > intervels[i + 1][0]:
                intervels[i] = (intervels[i][0], intervels[i + 1][1],
                                intervels[i][2] and intervels[i + 1][2])
                del intervels[i + 1]
            else:
                i = i + 1

        dec = ffmpeg_decoder(mov_name, SAMPLE_RATE)
        spec = spectrum(dec.ostream.get_handle(), squared=False)
        feat = feature_extractor(spec.ostream.get_handle(), intervels, speech,
                                 noise)
        dec.start()
        spec.start()
        feat.start()

        feat.join()

    return speech, noise
Exemplo n.º 2
0
def generate_data(sub_names, mov_names):
    speech = []
    noise = []
    
    for sub_name, mov_name in zip(sub_names, mov_names):
        fsub = open(sub_name)
        intervels = []
        while True:
            if fsub.readline().strip() == '[Events]':
                break
        for line in fsub:
            if line.startswith('Dialogue:'):
                line = line.strip().split(',')
                
                if line[3] != 'Default' or line[9].startswith('{'):
                    intervels.append( (parse_time(line[1]), parse_time(line[2]), False) )
                else:
                    intervels.append( (parse_time(line[1]), parse_time(line[2]), True) )
    
        intervels.sort(cmp=lambda x,y: cmp(x[0], y[0]))
        
        i = 0
        while i < len(intervels)-1:
            if intervels[i][1] > intervels[i+1][0]:
                intervels[i] = (intervels[i][0], intervels[i+1][1], intervels[i][2] and intervels[i+1][2])
                del intervels[i+1]
            else:
                i = i + 1
        
        dec = ffmpeg_decoder(mov_name, SAMPLE_RATE)
        spec = spectrum(dec.ostream.get_handle(), squared = False)
        feat = feature_extractor(spec.ostream.get_handle(), intervels, speech, noise)
        dec.start()
        spec.start()
        feat.start()
        
        feat.join()
    
    return speech, noise
Exemplo n.º 3
0
import core.audio_decoder as ad
import core.ffmpeg_decoder as fd
import core.sub_generator as sg
import core.spectrum as spec
from core.naive_vad2 import *

constants.DEBUG = True

dec = fd.ffmpeg_decoder('../data/demo.mp4')
vad = naive_vad(dec.ostream.get_handle())
sub = sg.sub_generator(vad.ostream.get_handle(), '../data/demo.mp4',
                       '../data/demo.srt')
dec.start()
vad.start()
sub.start()

sub.join()
Exemplo n.º 4
0
    def OnOpen(self,evt):        
        #self.playerpanel.OnOpen(None)
        self.playerpanel.OnStop(None)
        # Create a file dialog opened in the current home directory, where
        # you can display all kind of files, having as title "Choose a file".
        dlg = wx.FileDialog(self, "Choose a file", user.home, "","*.*", wx.OPEN)
        if dlg.ShowModal() == wx.ID_OK:
                dirname = dlg.GetDirectory()
                filename = dlg.GetFilename()
                # Creation
                self.playerpanel.mediapath=unicode(os.path.join(dirname, filename))
                self.playerpanel.Media = self.playerpanel.Instance.media_new(self.playerpanel.mediapath)
                #m=self.Instance.media_new(r'D:\shiyan\number3\New folder\1.rmvb')
                self.playerpanel.player.set_media(self.playerpanel.Media)
                # Report the title of the file chosen
                title = self.playerpanel.player.get_title()
                #  if an error was encountred while retriving the title, then use
                #  filename
                if title == -1:
                        title = filename
                #self.SetTitle("%s - AutoSub" % title)

                # set the window id where to render VLC's video output
                self.playerpanel.player.set_hwnd(self.playerpanel.videopanel.GetHandle())
                # set the volume slider to the current volume
                self.playerpanel.volslider.SetValue(self.playerpanel.player.audio_get_volume() / 2)                        
                self.playerpanel.title=title             

                # finally destroy the dialog
                dlg.Destroy()
                
                # create the new dialog to choose the recognization and translation
                self.playerpanel.select_dialog=SelectDialog(None,"Choice")
                self.playerpanel.select_dialog.ShowModal()
                # Finally Play~FIXME: this should be made cross-platform
                self.OnPlay(None)
                #self.Spec.GetAddr(self.Spec,self.mediapath)                        
        else:
                dlg.Destroy()        
        time.sleep(0.5)
        leng=self.playerpanel.player.get_length()        
        self.spec.GetLength(self.spec,length=leng)            
        self.playerpanel.timer.Start(100)
        
        #self.bitmap.Hide()
        self.SetTitle("%s - AutoSub" % self.playerpanel.title)
        lan={"English":"en" ,"Chinese":"zh-cn" ,"Japanese":"ja"}
        lang_from = None
        lang_to = None
        source = None
        target = None
        if self.playerpanel.select_dialog.isrecognize==True:            
            # Set recognize parameter
            lang_from=lan[self.playerpanel.select_dialog.sorcelan]
            
        if self.playerpanel.select_dialog.istranslate==True:
            # Set translation parameter
            lang_to=lan[self.playerpanel.select_dialog.targetlan]

        source=self.playerpanel.mediapath
        # Set target name
        if not target:
            target = source[:source.rfind('.')] + '.srt'
        self.subtitle=target       
        
        self.currentfile=None
        
        self.dec = fd.ffmpeg_decoder(source,output_rate = 8000)
        self.vad = naive_vad(self.dec.ostream.get_handle())
        self.sub = sg.sub_generator(self.vad.ostream.get_handle(), source, target, lang_from = lang_from, lang_to = lang_to)
        
        self.ohandle = self.sub.ostream.get_handle()
        self.specd = sp.spectrum(self.dec.ostream.get_handle(), window_size = 1024)
        handle = self.specd.ostream.get_handle()
        #self.Spec.OpenData(self.Spec,self.ohandle)
        self.dec.start()
        self.vad.start()
        self.sub.start()
        self.specd.start()
        self.spec.OpenData(self.spec,handle)
Exemplo n.º 5
0
    def OnOpen(self, evt):
        #self.playerpanel.OnOpen(None)
        self.playerpanel.OnStop(None)
        # Create a file dialog opened in the current home directory, where
        # you can display all kind of files, having as title "Choose a file".
        dlg = wx.FileDialog(self, "Choose a file", user.home, "", "*.*",
                            wx.OPEN)
        if dlg.ShowModal() == wx.ID_OK:
            dirname = dlg.GetDirectory()
            filename = dlg.GetFilename()
            # Creation
            self.playerpanel.mediapath = unicode(
                os.path.join(dirname, filename))
            self.playerpanel.Media = self.playerpanel.Instance.media_new(
                self.playerpanel.mediapath)
            #m=self.Instance.media_new(r'D:\shiyan\number3\New folder\1.rmvb')
            self.playerpanel.player.set_media(self.playerpanel.Media)
            # Report the title of the file chosen
            title = self.playerpanel.player.get_title()
            #  if an error was encountred while retriving the title, then use
            #  filename
            if title == -1:
                title = filename
            #self.SetTitle("%s - AutoSub" % title)

            # set the window id where to render VLC's video output
            self.playerpanel.player.set_hwnd(
                self.playerpanel.videopanel.GetHandle())
            # set the volume slider to the current volume
            self.playerpanel.volslider.SetValue(
                self.playerpanel.player.audio_get_volume() / 2)
            self.playerpanel.title = title

            # finally destroy the dialog
            dlg.Destroy()

            # create the new dialog to choose the recognization and translation
            self.playerpanel.select_dialog = SelectDialog(None, "Choice")
            self.playerpanel.select_dialog.ShowModal()
            # Finally Play~FIXME: this should be made cross-platform
            self.OnPlay(None)
            #self.Spec.GetAddr(self.Spec,self.mediapath)
        else:
            dlg.Destroy()
        time.sleep(0.5)
        leng = self.playerpanel.player.get_length()
        self.spec.GetLength(self.spec, length=leng)
        self.playerpanel.timer.Start(100)

        #self.bitmap.Hide()
        self.SetTitle("%s - AutoSub" % self.playerpanel.title)
        lan = {"English": "en", "Chinese": "zh-cn", "Japanese": "ja"}
        lang_from = None
        lang_to = None
        source = None
        target = None
        if self.playerpanel.select_dialog.isrecognize == True:
            # Set recognize parameter
            lang_from = lan[self.playerpanel.select_dialog.sorcelan]

        if self.playerpanel.select_dialog.istranslate == True:
            # Set translation parameter
            lang_to = lan[self.playerpanel.select_dialog.targetlan]

        source = self.playerpanel.mediapath
        # Set target name
        if not target:
            target = source[:source.rfind('.')] + '.srt'
        self.subtitle = target

        self.currentfile = None

        self.dec = fd.ffmpeg_decoder(source, output_rate=8000)
        self.vad = naive_vad(self.dec.ostream.get_handle())
        self.sub = sg.sub_generator(self.vad.ostream.get_handle(),
                                    source,
                                    target,
                                    lang_from=lang_from,
                                    lang_to=lang_to)

        self.ohandle = self.sub.ostream.get_handle()
        self.specd = sp.spectrum(self.dec.ostream.get_handle(),
                                 window_size=1024)
        handle = self.specd.ostream.get_handle()
        #self.Spec.OpenData(self.Spec,self.ohandle)
        self.dec.start()
        self.vad.start()
        self.sub.start()
        self.specd.start()
        self.spec.OpenData(self.spec, handle)
Exemplo n.º 6
0
            else:
                print sys.argv[i]
                source = sys.argv[i]
                i = i + 1
                
        if not source:
            raise ValueError()
        if not target:
            target = source[:source.rfind('.')] + '.srt'
    except:
        print 'Usage: autosub [options...] <input video>'
        print 'Example: autosub -r ja -t zh-cn demo.mp4'
        print 'Options:'
        print ' -r <language code> enable speech recognition and set source language to <language code>'
        print ' -t <language code> enable translation and set target language to <language code>'
        print ' -o <output>.srt specify output subtitle file name (default: same as input)'
        print 'Language codes:'
        print ' Chinese     zh-cn'
        print ' English     en'
        print ' Japanese    ja'
        exit()
    dec = fd.ffmpeg_decoder(source)
    vad = naive_vad(dec.ostream.get_handle())
    sub = sg.sub_generator(vad.ostream.get_handle(), source, target, lang_from = lang_from, lang_to = lang_to)
    dec.start()
    vad.start()
    sub.start()
    
    sub.join()

Exemplo n.º 7
0
            
            if line[3] != 'Default' or line[9].startswith('{\\an8'):
                intervels.append( (parse_time(line[1]), parse_time(line[2]), False) )
            else:
                intervels.append( (parse_time(line[1]), parse_time(line[2]), True) )

    intervels.sort(cmp=lambda x,y: cmp(x[0], y[0]))
    
    i = 0
    while i < len(intervels)-1:
        if intervels[i][1] > intervels[i+1][0]:
            intervels[i] = (intervels[i][0], intervels[i+1][1], intervels[i][2] and intervels[i+1][2])
            del intervels[i+1]
        else:
            i = i + 1
    
    dec = ffmpeg_decoder(mov_name, SAMPLE_RATE)
    spec = spectrum(dec.ostream.get_handle(), squared = False)
    feat = feature_extractor(spec.ostream.get_handle(), intervels, speech, noise)
    dec.start()
    spec.start()
    feat.start()
    
    feat.join()
    
random.shuffle(speech)
random.shuffle(noise)
for f in speech:
    plt.imshow(np.log(abs(f)**2).reshape((5, 128)))
    plt.show()
    
Exemplo n.º 8
0
import core.audio_decoder as ad
import core.ffmpeg_decoder as fd
import core.sub_generator as sg
import core.spectrum as spec
from core.naive_vad2 import *

constants.DEBUG = True

dec = fd.ffmpeg_decoder('../data/demo.mp4')
vad = naive_vad(dec.ostream.get_handle())
sub = sg.sub_generator(vad.ostream.get_handle(), '../data/demo.mp4', '../data/demo.srt')
dec.start()
vad.start()
sub.start()

sub.join()