示例#1
0
def alignment_score(ref, hyp):
    if len(hyp) == 0: return 0
    n, m = float(len(ref)), float(len(hyp))
    alignment = align.alignment(ref, hyp)
    n_matches = len(alignment)
    if n_matches == 0: return 0
    return sum(abs(i/n-j/m) for i, j, _, _, _ in alignment)/float(n_matches)
示例#2
0
def blaster(input, pident, qcovs):
    '''
	Iterando sobre un archivo fasta, realiza un blastp de cada query contra un
	archivo multifasta que actúa como subject. Además, realiza un árbol filogenético
	(formatos .nw y .png) y busca dominios proteicos en cada una de las secuencias
	resultantes del blast (tanto query como subject)
	'''
    dir_results = fl.main_folder()
    for record in SeqIO.parse(input, "fasta"):
        dir_query = fl.query_folder(dir_results, record.id)

        #Blast y filtrado
        bls.blast(record, qcovs, pident, dir_query, "multifasta.txt")
        blast_aligned = record.id + "aligned.fasta"
        blast_fasta = record.id + "blast_fasta.fa"

        #Alineamiento y creación de árboles, si estos contienen más de una secuencia
        try:
            al.alignment(blast_fasta, blast_aligned)
            nw_tree = dir_query + record.id + ".nw"
            al.tree(blast_aligned, nw_tree)
            tree_img = dir_query + record.id + ".png"
            if save_pngtree.get() == 1:
                al.tree_drawer(nw_tree, tree_img)
        except:
            print("Número de secuencias insuficiente para crear un árbol")

        #Búsqueda de dominios proteicos si hay una db válida
        try:
            domain_file = dir_query + record.id + "_domains.txt"
            pst.domain_scanner(blast_fasta, domain_file, prosite_db)
            print("Base de datos de PROSITE encontrada")
        except:
            print(
                'No se ha introducido ninguna base de datos Prosite, no se buscarán dominios.\n'
            )

        #Eliminar archivos temporales
        os.remove(blast_aligned)
        os.remove(blast_fasta)
    return
示例#3
0
def features_student(wav_t, wav_s, midi_t, annotation_txt_s, plot_align=False):
    # load wav
    y_t, sr_t = librosa.load(wav_t, sr=None)
    y_s, sr_s = librosa.load(wav_s, sr=None)

    # parse score of sonic visualizer
    score_t = mid_note_parser(midi_t)
    score_s = sv_score_parser(annotation_txt_s)

    # align teacher and student scores, output format:
    # [ [[teacher_onset_0, teacher_pitch_0, teacher_duration_0, teacher_number_0], [student_onset_0, student_pitch_0, student_duration_0, student_number_0]],
    # [[teacher_onset_1, teacher_pitch_1, teacher_duration_1, teacher_number_1], [student_onset_1, student_pitch_1, student_duration_1, student_number_1]], ... ]
    list_score_aligned = alignment(y_t,
                                   y_s,
                                   sr_t,
                                   sr_s,
                                   score_t,
                                   score_s,
                                   plot=plot_align)

    # segment the student score according the rules
    score_s_segmented = notes_segmenation(score_s, list_score_aligned)

    # find the indices of list_score_aligned correspond to student score segments
    segment_start_end = indices_segment_start_end(list_score_aligned,
                                                  score_s_segmented)

    # strech the student score notes in each segment so that the tempo equals to the teacher's.
    # segment the aligned score into sub lists (segments)
    list_score_aligned_seg, _, list_tempo_s = streching_student_notes(
        list_score_aligned, segment_start_end)

    # calculate features for each segment
    list_features = []
    for ii in range(len(list_score_aligned_seg)):
        list_features_seg = all_features(list_score_aligned_seg[ii],
                                         list_tempo_s[ii])
        list_features += list_features_seg

    # list_features_students ignores the missing notes
    list_features_student = []
    for ii in range(len(list_score_aligned)):
        if list_score_aligned[ii][1]:
            list_features_student.append(list_features[ii])
    return list_features_student
        else:
            list_tempo_t.append(None)
            list_tempo_s.append(None)

    return list_score_aligned_seg, list_tempo_t, list_tempo_s


if __name__ == "__main__":
    y_t, sr_t = librosa.load("./test/seconds(t).wav", sr=None)
    y_s, sr_s = librosa.load("./test/seconds1(s).wav", sr=None)

    # load score of sonic visualizer
    score_t = mid_note_parser("./test/seconds(t).mid")
    score_s = sv_score_parser("./test/seconds1(s).txt")

    list_score_aligned = alignment(y_t, y_s, sr_t, sr_s, score_t, score_s)

    score_s_segmented = notes_segmenation(score_s, list_score_aligned)

    segment_start_end = indices_segment_start_end(list_score_aligned,
                                                  score_s_segmented)

    list_score_aligned_seg, list_tempo_t, list_tempo_s = streching_student_notes(
        list_score_aligned, segment_start_end)

    for seg in list_score_aligned_seg:
        print(seg)

    print(list_tempo_t)
    print(list_tempo_s)
# 3. render midi to wav
save_midi_2_audio(filename_xml_midi, filename_xml_wav)

# video processing in pipeline.sh

# 8. alignment
score_t = sv_score_parser(filename_xml_txt)
score_s = mid_note_parser(filename_video_midi)

y_t, sr_t = librosa.load(filename_xml_wav, sr=None)
y_s, sr_s = librosa.load(filename_video_wav, sr=None)

list_score_aligned = alignment(y_t,
                               y_s,
                               sr_t,
                               sr_s,
                               score_t,
                               score_s,
                               plot=False)

with open(filename_alignment, "w") as f:
    for note_t_s in list_score_aligned:
        if not note_t_s[1]:
            note_t_s[1] = [None, None, None, None]
        if not note_t_s[0]:
            note_t_s[0] = [None, None, None, None]
        f.write(
            str(note_t_s[0][0]) + '\t' + str(note_t_s[0][1]) + '\t' +
            str(note_t_s[0][2]) + '\t' + str(note_t_s[0][3]) + '\t' +
            str(note_t_s[1][0]) + '\t' + str(note_t_s[1][1]) + '\t' +
            str(note_t_s[1][2]) + '\t' + str(note_t_s[1][3]) + '\n')
示例#6
0
def word_similarity(ref, hyp):
    alignment = align.alignment(ref, hyp)
    n_matches = len(alignment)
    if n_matches == 0: return 0
    return sum(score for _, _, _, _, score in alignment)/float(n_matches)
示例#7
0
def max_chunk(ref, hyp):
    n_matches = len(align.alignment(ref, hyp))
    if n_matches == 0: return 0
    return max(len(chunk) for chunk in align.chunks(ref, hyp))/float(n_matches)
示例#8
0
def fragmentation(ref, hyp):
    n_matches = len(align.alignment(ref, hyp))
    if n_matches == 0: return 0
    n_chunks = sum(1 for c in align.chunks(ref, hyp))
    return n_chunks/float(n_matches)
示例#9
0
def n_matches(ref, hyp):
    return len(align.alignment(ref, hyp))
示例#10
0
def align_recall(ref, hyp):
    return len(align.alignment(ref, hyp))/float(len(ref))
示例#11
0
def align_precision(ref, hyp):
    if len(hyp) == 0: return 0
    return len(align.alignment(ref, hyp))/float(len(hyp))