Пример #1
0
                         normjoin('C:\Experiments\JK302'))
#filehandler = open(dataDir + '\\sc.pickle', 'w')
#pickle.dump(sc, filehandler)

sc.phonTable.to_csv(dataDir + '\\phonTable.csv')

# Make pandas table
sentT = pd.DataFrame(lktable)

#Join the ngram and IPHoD info and "correctness"  at the word level
wordT = pd.concat([sc.tngram, sc.tphod], axis=1, join_axes=[sc.tngram.index])
#
#Table with phoneme Level info
phonT = sc.phonTable
bigPhonT = pd.concat([
    landkit.IndexFill(sentT, phonT['SentenceCount'], phonT['PhonemeCount']),
    landkit.IndexFill(wordT, phonT['WordCount'], phonT['PhonemeCount']), phonT
],
                     axis=1,
                     join_axes=[phonT.index])

audioTableTM = pd.DataFrame.from_csv(normjoin(tablePath, 'audioTableTM.csv'))
audioTableTM['PhonemeIndex'] = [int(x) for x in audioTableTM['PhonemeIndex']]
#atm = audioTableTM.groupby(['Talker','SentenceID']).first().reset_index()
bigPhonT = pd.merge(bigPhonT,
                    audioTableTM,
                    how='left',
                    on=['Talker', 'SentenceID', 'PhonemeIndex'])
del bigPhonT['TargetPhoneme_y']
bigPhonT = bigPhonT.rename(columns={'TargetPhoneme_x': 'TargetPhoneme'})
Пример #2
0
lktable['SourceSentence'] = lktable['SourceSentence'].fillna(" ")        
sc = landkit.SentCompare(list(lktable['FullSentence']),list(lktable['SourceSentence']),True,normjoin('C:\Experiments\JK310'))
#filehandler = open(dataDir + '\\sc.pickle', 'w') 
#pickle.dump(sc, filehandler)

sc.phonTable.to_csv(dataDir+'\\phonTable.csv')

# Make pandas table 
sentT = pd.DataFrame(lktable)

#Join the ngram and IPHoD info and "correctness"  at the word level
wordT = pd.concat([sc.tngram, sc.tphod], axis=1, join_axes=[sc.tngram.index])
#
#Table with phoneme Level info
phonT = sc.phonTable
bigPhonT = pd.concat([landkit.IndexFill(sentT,phonT['SentenceCount'],phonT['PhonemeCount']),landkit.IndexFill(wordT,phonT['WordCount'],phonT['PhonemeCount']),phonT], axis=1, join_axes=[phonT.index])

audioTableTM = pd.DataFrame.from_csv(normjoin(tablePath,'audioTableTM.csv'))
audioTableTM['PhonemeIndex']= [int(x) for x in audioTableTM['PhonemeIndex']]
#atm = audioTableTM.groupby(['Talker','SentenceID']).first().reset_index()
bigPhonT = pd.merge(bigPhonT,audioTableTM, how='left', on=['Talker','SentenceID','PhonemeIndex'])
del bigPhonT['TargetPhoneme_y']
bigPhonT=bigPhonT.rename(columns = {'TargetPhoneme_x':'TargetPhoneme'})

#subjectTable = pd.DataFrame.from_csv(normjoin(dataDir,'SubjectInfoJK302.csv')).reset_index()
#bigPhonT = pd.merge(bigPhonT,subjectTable,how='left',on=['Subject'])

POSTable = pd.DataFrame.from_csv(normjoin(tablePath,'posTags_r1.csv')).reset_index()
POSTable=POSTable.rename(columns = {'Subject':'Talker'})
POSTable=POSTable.rename(columns = {'File':'SentenceID'})
POSTable['Talker'] = POSTable['Talker'].apply(lambda x: 's' +x)