Пример #1
0
 def resample(self, file):
     """Uses Sox to resample the wav file to 16kHz, 1 channel, 16 bit wav which is
     the ideal format for processing"""
     sampler = sox.Transformer()
     sampler.convert(samplerate=16000, n_channels=1, bitdepth=16)
     resampled_file = '{0}_sampled'.format(file)
     resampled_path = common.file_path(resampled_file)
     sampler.build(common.file_path(file), resampled_path)
     common.file_exists(resampled_path)
     return resampled_file
Пример #2
0
def __rename_extensions(list_files, list_extensions, list_renamed,
                        list_skipped, conflict_mode, extension_target):
    """
        Core method to rename the file extensions.
    """
    fs_case = common.get_fs_case_sensitivity(os.path.dirname(list_files[0]))

    for file_path in list_files:
        num = 1
        list_path = file_path.split(os.path.sep)
        file_name = list_path[-1]
        file_ext = os.path.splitext(file_name)[1]

        if file_ext == "":
            list_skipped.append(file_path)
            continue

        file_newpath = file_path.replace(file_ext,
                                         os.path.extsep + extension_target)
        if file_path == file_newpath:
            list_skipped.append(file_path)
            continue

        if conflict_mode == "rename":
            while True:
                if common.file_exists(file_newpath, list_renamed, fs_case):
                    if not fs_case:
                        if file_path.lower() == file_newpath.lower():
                            break
                    file_newpath = file_path.replace(
                        file_ext,
                        "_" + str(num) + os.path.extsep + extension_target)
                    num += 1
                else:
                    break
        elif conflict_mode == "skip":
            if common.file_exists(file_newpath, list_renamed, fs_case):
                if not fs_case:
                    if not file_path.lower() == file_newpath.lower():
                        list_skipped.append(file_path)
                        continue
                else:
                    list_skipped.append(file_path)
                    continue

        if os.path.exists(file_path):
            list_renamed.append(
                [file_path, file_newpath + ".__temp__", file_newpath])

    return list_renamed, list_skipped
Пример #3
0
 def export_audio_chunks(self, file, chunks):
     """
     For each chunk of audio it gets exported to wav
     :param file: file name with extension
     :param chunks: small chunks of wav
     :return: new exported file names
     """
     chuck_names = []
     for i, chunk in enumerate(chunks):
         file_path = common.file_path(file)
         chunk_name = "{0}_{1}.wav".format(file, i)
         chunk.export(file_path, format="wav")
         chuck_names.append(chunk_name)
         common.file_exists(file_path)
     return chuck_names
 def spk_train_init(file):
     """
         TODO: Generate train data
         This improves the Speech Recognition so that the system can recognize speakers recorded from past data
         Train the speaker model using the Gaussian mixture model (GMM) model."""
     name, _ = common.split_file_ext(file)
     args = [
         common.JAVA_EXE, '-Xmx256m', '-cp', common.LIUM_PATH,
         'fr.lium.spkDiarization.programs.MTrainInit',
         '--sInputMask={}.seg'.format(common.seg_path(name)),
         '--fInputMask={}'.format(common.file_path(file)),
         '--sInputMask={}.ubm.gmm'.format(common.seg_path(name)),
         '--emInitMethod=copy',
         '--tOutputMask={}.init.gmm'.format(common.seg_path(name)), name
     ]
     common.call_subproc(args)
     common.file_exists('%s.init.gmm' % name)
 def spk_train_map(file):
     """
        TODO: Generate train data
        This improves the Speech Recognition so that the system can recognize speakers recorded from past data
        Train the speaker model using the Maximum a posteriori (MAP) adaptation of GMM
     """
     name, _ = common.split_file_ext(file)
     args = [
         common.JAVA_EXE, '-Xmx256m', '-cp', common.LIUM_PATH,
         'fr.lium.spkDiarization.programs.MTrainMAP',
         '--sInputMask={}.ident.seg'.format(common.seg_path(name)),
         '--fInputMask={}'.format(common.file_path(file)),
         '--sInputMask={}.init.gmm'.format(common.seg_path(name)),
         '--emCtrl=1,5,0.01', '--varCtrl=0.01,10.0',
         '--tOutputMask={}.gmm'.format(common.seg_path(name)), name
     ]
     common.call_subproc(args)
     common.file_exists(name + '.gmm')
Пример #6
0
    def audio_segmentation(self,
                           file,
                           start_list,
                           end_list,
                           concat=False,
                           file_name=None):
        """
        Breaks the file into small parts based on time slices and puts it back together if
        the concat option is True
        :param file: filename with extension
        :param start_list: list of ints representing start time ms
        :param end_list: list of ints representing en time ms
        :param concat: option to merge the file
        :param file_name: new file name for export
        :return: new file name/s
        """

        file_names = []
        baseName, ext = common.split_file_ext(file)
        seg_name = '{0}_{1}.{2}'.format(baseName,
                                        file_name if file_name else 'seg', ext)
        audio = AudioSegment.from_file(common.file_path(file), "wav")
        duration_in_ms = len(audio) * 1000
        audio_segs = [
            audio[start:end] for start, end in izip(start_list, end_list)
            if (duration_in_ms >= start >= 0) and (duration_in_ms >= end > 0)
        ]
        if not audio_segs:
            return file_names
        if concat:
            seg_path = common.seg_path(seg_name)
            audio_concat = reduce(lambda x, y: x + y, audio_segs)
            audio_concat.export(seg_path, format="wav")
            file_names.append(seg_name)
            common.file_exists(seg_path)
        else:
            file_names = self.export_audio_chunks(seg_name, audio_segs)
        return file_names
 def diarization(self, file):
     """Take a wav file in the right format and build a segmentation file.
     The seg file stores the speaker, start time, duration, gender and also additional info for speech recognition"""
     name, _ = common.split_file_ext(file)
     seg_file = '{}.seg'.format(name)
     seg_path = common.seg_path(seg_file)
     args = [
         common.JAVA_EXE,
         '-Xmx{}m'.format(common.JAVA_MEM),
         '-jar',
         common.LIUM_PATH,
         '--fInputMask={}'.format(common.file_path(file)),  # Input file
         '--sOutputMask={}'.format(seg_path),  # Output file
         '--doCEClustering',
         name
     ]  # Add cluster for each speaker
     log.info('Processing diariazation for {}'.format(file))
     common.call_subproc(args)
     common.file_exists(seg_path)
     log.info('File {} successfully diarized!'.format(file))
     data = self.build_speakers_segments(seg_file, name)
     # Put together audio files for each speaker's part
     sp_file_names = {}
     for speaker in data:
         speaker_id_file = speaker['speaker_id']
         file_names = self.speechClassifier.audioProcessor.audio_segmentation(
             file,
             speaker['start'],
             speaker['end'],
             concat=True,
             file_name=speaker_id_file)
         if not file_names:
             log.warn('Waring! Failed to perform audio segmentation for {}'.
                      format(speaker_id_file))
         sp_file_names[speaker_id_file] = file_names[0]
     return self.build_speakers_transcript(sp_file_names)
Пример #8
0
def __fill_num_gaps(list_files, separator, padding, list_renamed,
                    list_skipped, fs_case, step):
    """
        Core method to fill numeration gaps.
    """
    list_temp = []
    list_temp.extend(list_skipped)

    for i in list_renamed:
        list_temp.append(i[2])
    list_temp.sort()
    list_gaps = __get_num_gaps(list_files, separator, padding, step)

    if len(list_gaps) > 0:
        list_gaps.sort(reverse=True)
        list_skipped.sort(reverse=True)

        while len(list_gaps) > 0:
            if len(list_skipped) < 1:
                break

            file_path = list_skipped.pop(0)
            list_path = file_path.split(os.path.sep)
            file_dir = list_path[-2]
            file_name = list_path[-1]

            if os.path.extsep in file_name:
                file_ext = os.path.splitext(file_name)[1]
            else:
                file_ext = ""

            num = list_gaps.pop(0)
            file_num = str(num).rjust(int(padding), "0")
            file_newname = file_dir + separator + \
                           file_num.replace(" ", "0") + file_ext
            file_newpath = file_path.replace(file_name, file_newname)

            if common.file_exists(file_newpath, list_renamed, fs_case):
                list_skipped.append(file_path)
            else:
                list_renamed.append([file_path, None, file_newpath])

    return list_renamed, list_skipped
Пример #9
0
def compare(file1, file2, output=sys.stdout):

    def dump_lines(special_word):
        output.write(sep_line)
        output.write("line: " + str(i) + " " + special_word +"\n")
        output.write("case: " + saved_line + "\n")
        output.write("cp: " + l1 + "\n")
        output.write("ip: " + l2 + "\n")
    
    if common.file_exists(file1) == False:
        return common.result_fail, "%s not found" % file1
    if common.file_exists(file2) == False:
        return common.result_fail, "%s not found" % file2
        
    f1 = open(file1)
    f2 = open(file2)
    
    ## is it too crazy?
    ls1 = f1.readlines()
    ls2 = f2.readlines()
    
    len1 = len(ls1)
    len2 = len(ls2)
    
    if len1 != len2:
        return common.result_fail, "different file length: %d %d" % (len1, len2)

    fail_cnt = 0        
    sep_char = "##"
    sep_line = "-" * 80 + "\n"
    lsep = len(sep_char)
    
    for i in range(len1):
        l1 = remove_newline(ls1[i])
        l2 = remove_newline(ls2[i])
        
        if l1 == l2 and not l1.startswith("case"): continue
        
        pos1 = l1.find(sep_char)        
        pos2 = l2.find(sep_char)        
        r1 = l1[:pos1]
        r2 = l2[:pos2]
        
        if r1 != r2:
            dump_lines("different rule")
            fail_cnt += 1
            continue
        
        c1 = l1[pos1+lsep+1:]
        c2 = l2[pos2+lsep+1:]
        
        if rules[r1](c1, c2) == False:
            dump_lines("different output")
            fail_cnt += 1
    
    f1.close()
    f2.close()
    output.close()
     
    if fail_cnt == 0 :
        return common.result_pass, "pass"
    else:
        return common.result_fail, "diff count: %d" % fail_cnt
Пример #10
0
 def test_file_exists(self):
     common.file_exists(out)
Пример #11
0
def compare(file1, file2, output=sys.stdout):

    def dump_lines(special_word):
        output.write(sep_line)
        output.write("line: " + str(i) + " " + special_word +"\n")
        output.write("case: " + saved_line + "\n")
        output.write("cp: " + l1 + "\n")
        output.write("ip: " + l2 + "\n")
    
    if common.file_exists(file1) == False:
        return common.result_fail, "%s not found" % file1
    if common.file_exists(file2) == False:
        return common.result_fail, "%s not found" % file2
        
    f1 = open(file1)
    f2 = open(file2)
    
    ## is it too crazy?
    ls1 = f1.readlines()
    ls2 = f2.readlines()
    
    len1 = len(ls1)
    len2 = len(ls2)
    
    if len1 != len2:
        return common.result_fail, "different file length: %d %d" % (len1, len2)

    fail_cnt = 0        
    sep_char = "##"
    sep_line = "-" * 80 + "\n"
    lsep = len(sep_char)
    
    for i in range(len1):
        l1 = remove_newline(ls1[i])
        l2 = remove_newline(ls2[i])
        
        if l1 == l2 and not l1.startswith("case"): continue
        
        pos1 = l1.find(sep_char)        
        pos2 = l2.find(sep_char)        
        r1 = l1[:pos1]
        r2 = l2[:pos2]
        
        if r1 != r2:
            dump_lines("different rule")
            fail_cnt += 1
            continue
        
        c1 = l1[pos1+lsep+1:]
        c2 = l2[pos2+lsep+1:]
        
        if rules[r1](c1, c2) == False:
            dump_lines("different output")
            fail_cnt += 1
    
    f1.close()
    f2.close()
    output.close()
     
    if fail_cnt == 0 :
        return common.result_pass, "pass"
    else:
        return common.result_fail, "diff count: %d" % fail_cnt
Пример #12
0
def __rename_files_keep_order(list_files, list_renamed, list_skipped,
                              separator, padding, ignore_file_ext=False,
                              custom_name=None, step=1, order_by=None):
    """
        Core method to rename the base name of files based on the name of the
        directory where they are stored in using "keep-order" rename mode.
    """
    file_newpath = ""
    file_temppath = ""
    temp_file_ext = ""
    list_new = []
    list_ren = []
    num = 0

    fs_case = common.get_fs_case_sensitivity(os.path.dirname(list_files[0]))

    if padding == 0:
        padding = len(str(len(list_files)))

    for file_path in list_files:
        list_path = file_path.split(os.path.sep)
        file_dir = list_path[-2]
        file_name = list_path[-1]

        if file_name.startswith(file_dir + separator):
            list_ren.append(file_path)
        else:
            list_new.append(file_path)

    list_files = []
    list_files.extend(list_ren)
    list_files.extend(list_new)

    for file_path in list_files:
        list_path = file_path.split(os.path.sep)
        file_name = list_path[-1]

        if custom_name == None:
            file_dir = list_path[-2]
        else:
            file_dir = custom_name

        if os.path.extsep in file_name:
            file_ext = os.path.splitext(file_name)[1]
        else:
            file_ext = ""

        if not ignore_file_ext:
            if not file_ext == temp_file_ext:
                num = 0

        file_temppath = file_path
        temp_file_ext = file_ext
        while common.file_exists(file_temppath, list_renamed, fs_case):
            num += step
            file_num = str(num).rjust(int(padding), "0")
            file_newname = \
                file_dir + separator + file_num.replace(" ", "0") + file_ext
            file_newpath = file_path.replace(file_name, file_newname)
            if not file_newpath in list_skipped:
                file_temppath = file_newpath + ".__temp__"

        if os.path.exists(file_path):
            if file_path == file_newpath:
                list_skipped.append(file_path)
            else:
                list_renamed.append([file_path, file_temppath, file_newpath])

    return list_renamed, list_skipped
Пример #13
0
def __rename_files_fill(list_files, list_renamed, list_skipped, separator,
                        padding, fill_gaps=False, ignore_file_ext=False,
                        custom_name=None, step=1):
    """
        Core method to rename the base name of files based on the name of the
        directory where they are stored in using one of the "fill" rename
        modes (such as "fill-gaps" and "rename-new").
    """
    file_newpath = ""
    num = 0

    fs_case = common.get_fs_case_sensitivity(os.path.dirname(list_files[0]))

    if fill_gaps:
        list_temp_renamed = []
        list_temp_skipped = []
        obj_ren = list_temp_renamed
        obj_skip = list_temp_skipped
    else:
        obj_ren = list_renamed
        obj_skip = list_skipped

    if padding == 0:
        padding = len(str(len(list_files)))

    for file_path in list_files:
        list_path = file_path.split(os.path.sep)
        file_name = list_path[-1]

        if custom_name == None:
            file_dir = list_path[-2]
        else:
            file_dir = custom_name

        if os.path.extsep in file_name:
            file_ext = os.path.splitext(file_name)[1]
        else:
            file_ext = ""

        if file_name.startswith(file_dir + separator):
            try:
                temp = file_name.replace(file_dir + separator, "")
                list_pad = temp.split(".")
                file_padding = len(list_pad[0])

                if step > 1:
                    if int(list_pad[0]) % step == 0:
                        obj_skip.append(file_path)
                        continue
                else:
                    if int(padding) == file_padding:
                        obj_skip.append(file_path)
                        continue
            except:
                pass

        if not ignore_file_ext:
            num = 0

        file_newpath = file_path
        while common.file_exists(file_newpath, obj_ren, fs_case) or \
              common.file_exists(file_newpath, obj_skip, fs_case):
            num += step
            file_num = str(num).rjust(int(padding), "0")
            file_newname = \
                file_dir + separator + file_num.replace(" ", "0") + file_ext
            file_newpath = file_path.replace(file_name, file_newname)

        if os.path.exists(file_path):
            if file_path == file_newpath:
                obj_skip.append(file_path)
            else:
                obj_ren.append([file_path, None, file_newpath])

    if fill_gaps:
        list_temp_renamed, list_temp_skipped = \
             __fill_num_gaps(list_files, separator, padding,
                             list_temp_renamed, list_temp_skipped,
                             fs_case, step)
        list_renamed.extend(list_temp_renamed)
        list_skipped.extend(list_temp_skipped)

    return list_renamed, list_skipped
Пример #14
0
        if static_case:
            base_name_target = __static_case(base_name_target, case,
                                             list_lower, list_mixed,
                                             list_title,
                                             list_upper).rstrip()

        file_newpath = file_path.replace(base_name + file_ext,
                                         base_name_target + file_ext)
        if file_path == file_newpath:
            list_skipped.append(file_path)
            continue

        if conflict_mode == "rename":
            while True:
                if common.file_exists(file_newpath, list_renamed, fs_case):
                    if not fs_case:
                        if file_path.lower() == file_newpath.lower():
                            break
                    file_newpath = \
                        file_path.replace(base_name,
                                          base_name_target + "_" + str(num))
                    num += 1
                else:
                    break
        elif conflict_mode == "skip":
            if common.file_exists(file_newpath, list_renamed, fs_case):
                if not fs_case:
                    if not file_path.lower() == file_newpath.lower():
                        list_skipped.append(file_path)
                        continue
Пример #15
0
def get_status(task_id, delay=0):
    """
        Get the status of the Erfr process with the given task ID.
    """
    task_file = common.get_task_file(task_id)
    pv.intrange(task_id, "task ID", 1, common.get_max_tasks(), False)
    pv.intvalue(delay, "delay", True, True, False)

    delay = int(delay)
    task_id = int(task_id)
    progress_key = True
    process_type = ""
    process_type_list = ["encryption", "decryption", "key generation"]
    file_input_path = ""
    file_input_size = 0
    file_key_path = ""
    file_key_size = 0
    file_output_path = ""
    file_output_size = 0
    valid_type = False

    if not common.file_exists(task_file):
        common.exception("No process is running with the given task ID.")

    dict_contents = __read_content(task_file)
    process_type = dict_contents["process_type"]
    if process_type == "":
        common.exception("The process type cannot be empty.")

    for item in process_type_list:
        if process_type == item:
            valid_type = True

    if not valid_type:
        common.exception("The process type '%s' is not supported." \
                         % process_type)

    file_input_path = dict_contents["file_input_path"]
    file_input_size = dict_contents["file_input_size"]

    if "crypt" in process_type:
        file_key_path = dict_contents["file_key_path"]
        file_key_size = dict_contents["file_key_size"]
        file_output_path = dict_contents["file_output_path"]
        file_output_size = dict_contents["file_output_size"]
        if process_type == "decryption":
            progress_key = False

    print
    print "Monitoring Erfr %s process with task ID %s." % \
              (process_type, task_id)
    if delay > 0:
        if delay == 1:
            print "Refreshing the process status every second."
        else:
            print "Refreshing the process status every %s seconds." % \
                  str(delay)

    print
    print "-" * 78
    if file_key_path == "" and file_output_path == "":
        __monitor_file(task_file, file_input_path, file_input_size,
                       "File name", delay, True)
    else:
        __monitor_file(task_file, file_input_path, file_input_size,
                       "Input file", delay, False)
        print
        __monitor_file(task_file, file_key_path, file_key_size, "Key file",
                       delay, progress_key)
        print
        __monitor_file(task_file, file_output_path, file_output_size,
                       "Output file", delay, True)
    print "-" * 78
    print

    if delay > 0:
        print "Process finished."
Пример #16
0
def __monitor_file(task_file, file_path, file_size, description, delay,
                   progress):
    """
        Monitor the file size of the given file.
    """
    file_name = os.path.basename(file_path)
    file_dir = __remove_duplicate_chars( \
        file_path.rstrip(file_name).rstrip(os.path.sep), os.path.sep)
    file_size = int(file_size)

    file_size_init = 0
    file_size_current = 0
    file_size_perc = 0

    chars_running = ["-", "\\", "|", "/"]
    chars_stalled = ["?", " "]
    chars_missing = ["X", " "]
    delay_running = 0.1
    delay_stalled = 0.6

    progress_chars = chars_running
    progress_count = 0
    stalled = False
    wait = delay_running

    display_file_info = \
        bool(int(common.global_config(["KeyGenerator", "Monitor"],
                                      ["display_file_info"], "1")))

    if display_file_info:
        print("%s:" % description).ljust(16, " ") + file_name
        print("File path:").ljust(16, " ") + file_dir
    else:
        print "%s" % description

    if file_size < 1000:
        print("File size:").ljust(16, " ") + ("%s bytes total" % file_size)
    else:
        size_round = __format_size(file_size)
        print ("File size:").ljust(16, " ") + \
              ("%s (%s bytes total)" % (size_round, file_size))

    if not progress:
        return

    try:
        file_size_init = file_size
        file_size_current = common.get_file_size(file_path)
        file_size_perc = int((file_size_current * 100) / file_size)
    except:
        pass

    count = 0
    while file_size_current < file_size:
        try:
            file_size_current = common.get_file_size(file_path)
        except:
            pass

        if file_size_current == file_size:
            break

        file_exists_task = common.file_exists(task_file)
        file_exists_input = common.file_exists(file_path)

        if not file_exists_task or not file_exists_input:
            if not file_exists_input:
                progress_chars = chars_missing
            else:
                progress_chars = chars_stalled
            stalled = True
            wait = delay_stalled
        else:
            progress_chars = chars_running
            wait = delay_running
            if stalled:
                dict_contents = __read_content(task_file)
                if not int(dict_contents["file_input_size"]) == \
                       file_size_init:
                    print "-" * 78
                    common.exception("Task mismatch. Process cancelled.")
                stalled = False

        progress_count += 1
        if progress_count >= len(progress_chars):
            progress_count = 0

        if delay == 0:
            __progress(file_size_perc, None, True)
            return

        if delay > 0:
            if file_size_perc < 100:
                __progress( \
                    file_size_perc, progress_chars[progress_count], False)

                time.sleep(wait)
                if count < delay:
                    count += 0.1
                    continue
                else:
                    count = 0

        try:
            file_size_current = common.get_file_size(file_path)
            if not stalled:
                file_size_perc = int((file_size_current * 100) / file_size)
        except:
            pass

    __progress(100, " ", True)