예제 #1
0
 def process_corpus(self, cutMedia=True):
     """
     Take every eaf file from the source directory subtree, turn it
     into a parsed json and store it in the target directory.
     """
     Txt2JSON.process_corpus(self)
     if not cutMedia:
         return
     mediaDir = os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)
     if 'media_dir' in self.corpusSettings:
         mediaDir = self.corpusSettings['media_dir']
     for path, dirs, files in os.walk(mediaDir):
         # Process video files first
         files = [fname for fname in files if fname.lower().endswith(('.avi', '.mts', '.mov'))] + \
                 [fname for fname in files if fname.lower().endswith('.mp4')] + \
                 [fname for fname in files if not fname.lower().endswith(('.avi', '.mts', '.mov', '.mp4'))]
         for fname in files:
             fileExt = os.path.splitext(fname.lower())[1]
             if fileExt in self.mediaExtensions:
                 privacySegments = []
                 if fname in self.privacySegments:
                     privacySegments = self.privacySegments[fname]
                 fname = os.path.abspath(os.path.join(path, fname))
                 print('Cutting media file', fname)
                 self.mc.cut_media(fname,
                                   usedFilenames=self.usedMediaFiles,
                                   privacySegments=privacySegments)
예제 #2
0
 def process_corpus(self, cutMedia=False):
     """
     Take every eaf file from the source directory subtree, turn it
     into a parsed json and store it in the target directory.
     """
     Txt2JSON.process_corpus(self)
     if not cutMedia:
         return
     for path, dirs, files in os.walk(os.path.join('..', self.srcExt)):
         for fname in files:
             fileExt = os.path.splitext(fname.lower())[1]
             if fileExt in self.mediaExtensions:
                 fname = os.path.abspath(os.path.join(path, fname))
                 print('Cutting media file', fname)
                 self.mc.cut_media(fname)
예제 #3
0
 def process_corpus(self):
     """
     Take every Exmaralda file from the source directory subtree, turn it
     into a parsed json and store it in the target directory.
     Split all the corpus media files into overlapping chunks of
     small duration.
     This is the main function of the class.
     """
     Txt2JSON.process_corpus(self)
     for path, dirs, files in os.walk(
             os.path.join(self.corpusSettings['corpus_dir'], self.srcExt)):
         for fname in files:
             fileExt = os.path.splitext(fname.lower())[1]
             if fileExt in self.mediaExtensions:
                 fname = os.path.abspath(os.path.join(path, fname))
                 print('Cutting media file', fname)
                 self.mc.cut_media(fname)
예제 #4
0
    def process_corpus(self, cutMedia=True, filenames = None, isToProcessCorpus = True):
        """
        Take every eaf file from the source directory subtree, turn it
        into a parsed json and store it in the target directory.
        """
        if isToProcessCorpus:
            Txt2JSON.process_corpus(self)
        if not cutMedia:
            return
        print(os.path.join(self.corpusSettings['corpus_dir'],
                                                      self.srcExt))
        for path, dirs, files in os.walk(os.path.join(self.corpusSettings['corpus_dir'],
                                                      self.srcExt)):

            for fname in files:
                if filenames and fname not in filenames:
                    continue
                print('!!!!')
                fileExt = os.path.splitext(fname.lower())[1]
                if fileExt in self.mediaExtensions:
                    fname = os.path.abspath(os.path.join(path, fname))
                    print('Cutting media file', fname)
                    self.mc.cut_media(fname)