def do_IS_speakers():
    output = []
    Productions = ['2','3']
    for w in Words:
        mod_path = os.path.join(MODEL_DIR,'modeltalker_%s.wav' %w)
        mod_env = calc_envelope(mod_path,**opt_dict)
        print(w)
        for s in Speakers:
            if 'IS' not in s:
                continue
            if s == 'modeltalker':
                continue
            sp = s.split('_')[0]
            s_dir = os.path.join(JAM_DIR,s)
            for p in Productions:
                base_path = os.path.join(s_dir,'%s_%s1is.wav' % (sp,w))
                shad_path = os.path.join(s_dir,'%s_%s%sis.wav' % (sp,w,p))
                if not os.path.isfile(base_path):
                    continue
                if not os.path.isfile(shad_path):
                    continue
                base_env = calc_envelope(base_path,**opt_dict)
                shad_env = calc_envelope(shad_path,**opt_dict)
                b_to_m_sim = envelope_match(mod_env,base_env)
                s_to_m_sim = envelope_match(mod_env,shad_env)
                output.append([sp,p,w,b_to_m_sim,s_to_m_sim])


    with open(os.path.join(BASE_DIR,'jam_output8IS.txt'),'w') as f:
        csvw = csv.writer(f,delimiter='\t')
        csvw.writerow(['Shadower_number','Block',
                        'Word','base_to_mod_env_sim','shad_to_mod_env_sim',
                        #'mfcc_shad_to_mod','mfcc_mod_to_shad',
                        #'spec_shad_to_mod','spec_mod_to_shad',
                        #'mfcc_base_to_mod','mfcc_mod_to_base',
                        #'spec_base_to_mod','spec_mod_to_base'
                        ])
        for l in output:
            csvw.writerow(l)
Пример #2
0
def convert_tokens_to_graph(s):
    print('%s' % (s))
    filepath = os.path.join(fetch_buckeye_resource('Graphs'),'%s.txt' % (str(s),))
    if os.path.isfile(filepath):
        return None
    g = nx.Graph()
    tokens = WordToken.objects.select_related('WordType','Dialog__Speaker','Category').filter(Dialog__Speaker = s)
    #tokens = tokens.exclude(Category__CategoryType__in = ['Pause','Disfluency','Other'])
    tokens = tokens.filter(WordType__Label__in = GOOD_WORDS)
    print(len(tokens))
    if len(tokens) == 0:
        return None
    g.add_nodes_from([(x.pk,{'Word':x.WordType.Label,
                            'Frequency':x.WordType.get_frequency(),
                            'ND': x.WordType.get_ND(),
                            'DialogPlace':x.get_dialog_place(),
                            'Duration':x.get_duration(),
                            'Speaker':str(s),
                            'SpeakerGender':s.Gender,
                            'SpeakerAge': s.Age,
                            'PrevSpeakRate' :x.get_previous_speaking_rate(),
                            'FollSpeakRate' : x.get_following_speaking_rate(),
                            'AvgSpeakRate' : s.get_avg_speaking_rate(),
                            'PrevCondProb' : x.get_previous_cond_prob(),
                            'FollCondProb' : x.get_following_cond_prob(),
                            'Repetitions' : x.get_repetitions(),
                            'wasRepeatedRecently' : x.get_recent_repetition(),
                            'OrthoLength' : len(x.WordType.Label),
                            'PhonoLength': x.WordType.UR.count(),
                            }) for x in tokens if x.is_acceptable()])
    for i in range(len(tokens)):
        envone = tokens[i].get_envelope()
        for j in range(i+1,len(tokens)):
            envtwo = tokens[j].get_envelope()
            sim = envelope_match(envone,envtwo)
            g.add_edge(tokens[i].pk,tokens[j].pk,weight = sim)
        path = fetch_temp_resource('buckeye-wt-%d.wav' % tokens[i].pk)
        try:
            os.remove(path)
        except FileNotFoundError:
            pass
    pickle.dump(g,open(filepath,'wb'))