def extractSingleDailyActDiet(subjectID): file_location = 'subject_template_' + subjectID + '.xlsx' workbook = xlrd.open_workbook(file_location) sheet = workbook.sheet_by_index(3) duration = dietActInfoRetrv.getDuration(subjectID) for n in range(1, duration + 1): f_act = open( 'activity/activityFromExcel/activity_' + subjectID + '_' + str(n) + '.txt', 'w') f_diet = open( 'diet/dietFromExcel/diet_' + subjectID + '_' + str(n) + '.txt', 'w') f_act.close() f_diet.close() count = 0 for row in range(8, sheet.nrows): if sheet.cell_value(row, 0): count += 1 if sheet.cell_value(row, 3): temp = str(sheet.cell_value(row, 3).encode('utf-8')) f_act = open( 'activity/activityFromExcel/activity_' + subjectID + '_' + str(count) + '.txt', 'a') f_act.write(temp) f_act.write('\n') f_act.close() if sheet.cell_value(row, 4): temp = str(sheet.cell_value(row, 4)) f_diet = open( 'diet/dietFromExcel/diet_' + subjectID + '_' + str(count) + '.txt', 'a') f_diet.write(temp) f_diet.write('\n') f_diet.close()
def buildDailyItemFreqTXTFile(): ''' write daily diet and activity index of each subject into txt files ''' print 'in buildDailyItemFreqTXTFile()' for subjectID in available_list: # print subjectID duration = dietActInfoRetrv.getDuration(subjectID) # print duration for n in range(1,duration+1): f_act = open('activity/activityItemFreq/activity_frequency_'+subjectID+'_'+str(n)+'.txt','w') f_diet = open('diet/dietItemFreq/diet_frequency_'+subjectID+'_'+str(n)+'.txt','w') index_act = buildItemIndex.build_daily_single_activity_index(subjectID,n) index_diet = buildItemIndex.build_daily_single_diet_index(subjectID,n) # print index_act # print index_diet for key in index_act: f_act.write("%-25s%-10s"%(key,index_act[key])) f_act.write('\n') for key in index_diet: f_diet.write("%-25s%-10s"%(key,index_diet[key])) f_diet.write('\n') f_act.close() f_diet.close()
def buildDailySingleActTypeFreqFile(): for subjectID in available_list: duration = dietActInfoRetrv.getDuration(subjectID) for n in range(1,duration+1): f_act = open('activity/activityTypeFreq/activityType_frequency_'+subjectID+'_'+str(n)+'.txt','w') singleActType_dict = buildTypeIndex.build_daily_single_activity_index(subjectID,n) for key in singleActType_dict: f_act.write("%-25s%-10s"%(key,singleActType_dict[key])) f_act.write('\n') f_act.close()
def genDailyActTypeDataSet(): dataset = [] for subjectid in available_list: duration = dietActInfoRetrv.getDuration(subjectid) for i in range(duration): indexDict = buildTypeIndex.build_daily_single_activity_index(subjectid,i+1) temp = tuple(indexDict) dataset.append(temp) dataset = tuple(dataset) print len(dataset) return dataset
def genDailyDietTypeDataSet(): dataset = [] for subjectid in available_list: duration = dietActInfoRetrv.getDuration(subjectid) for i in range(duration): indexDict = buildTypeIndex.build_daily_single_diet_index(subjectid,i+1) if 'compositeP' in indexDict: del indexDict['compositeP'] temp = tuple(indexDict) dataset.append(temp) dataset = tuple(dataset) print len(dataset) return dataset
def preprocessDailyDiaryWithTime(): print 'in preprocessDailyDiaryWithTime()' for subjectID in available_list: print subjectID duration = dietActInfoRetrv.getDuration(subjectID) for n in range(1, duration + 1): preprocessingWithTime( 'activity/activityFromExcel/activity_' + subjectID + '_' + str(n) + '_with_time.txt', 'activity/activityProcessed/processed_activity_' + subjectID + '_' + str(n) + '_with_time.txt') preprocessingWithTime( 'diet/dietFromExcel/diet_' + subjectID + '_' + str(n) + '_with_time.txt', 'diet/dietProcessed/processed_diet_' + subjectID + '_' + str(n) + '_with_time.txt')
def genDailySingleActTypeTFArray(subjectID): item_dict = genActTypeDict() duration = dietActInfoRetrv.getDuration(subjectID) n = len(item_dict) dims = (duration, n) array = np.zeros(dims) for i in range(duration): ItemIndex = buildTypeIndex.build_daily_single_activity_index( subjectID, i + 1) for key in item_dict: if item_dict[key] in ItemIndex: # print item_dict[key] array[i, key] = ItemIndex[item_dict[key]] return array
def getDietTypeTFArray4DC(): type_dict = dataGen4DietAct.genDietTypeDict() x = len(available_list) n = len(type_dict) array = np.zeros((x,n)) i = 0 for subjectID in available_list: duration = dietActInfoRetrv.getDuration(subjectID) for n in range(1,duration+1): dictWithTime = buildTypeIndex.build_daily_single_diet_index_with_time4DC(subjectID,n) for time in dictWithTime: for key in type_dict: if type_dict[key] in dictWithTime[time]: array[i,key] += dictWithTime[time][type_dict[key]] i += 1 return array
def genDailyActDietTypeDataSet(): dataset = [] for subjectid in available_list: duration = dietActInfoRetrv.getDuration(subjectid) for i in range(duration): indexDict1 = buildTypeIndex.build_daily_single_activity_index(subjectid,i+1) if 'others' in indexDict1: del indexDict1['others'] temp1 = tuple(indexDict1) indexDict2 = buildTypeIndex.build_daily_single_diet_index(subjectid,i+1) if 'compositeP' in indexDict2: del indexDict2['compositeP'] temp2 = tuple(indexDict2) temp = temp1+temp2 dataset.append(temp) dataset = tuple(dataset) print len(dataset) return dataset
def singleSubjectDailyArray(domain, subjectID): ''' build daily item TFIDF normalization array ''' if domain == 'ActItem': item_dict = dataGen4DietAct.genActItemDict() elif domain == 'DietItem': item_dict = dataGen4DietAct.genDietItemDict() duration = dietActInfoRetrv.getDuration(subjectID) x = duration n = len(item_dict) dims = (x, n) array = np.zeros(dims) if domain == 'ActItem': for i in range(duration): ItemIndex = buildItemIndex.build_daily_single_activity_index( subjectID, i + 1) for key in item_dict: if "'" + item_dict[key] + "'" in ItemIndex: array[i, key] = ItemIndex["'" + item_dict[key] + "'"] if domain == 'DietItem': for i in range(duration): ItemIndex = buildItemIndex.build_daily_single_diet_index( subjectID, i + 1) for key in item_dict: if "'" + item_dict[key] + "'" in ItemIndex: array[i, key] = ItemIndex["'" + item_dict[key] + "'"] transformer = TfidfTransformer(norm=None) tfidf = transformer.fit_transform(array) aa = tfidf.toarray() tfidfNorm = utilise.normArray(aa) print tfidfNorm.shape return tfidfNorm
def singleSubjectDailyArray(domain,subjectID): ''' build daily item TF array ''' if domain == 'DietType': item_dict = dataGen4DietAct.genDietTypeDict() elif domain == 'ActType': item_dict = dataGen4DietAct.genActTypeDict() # print item_dict duration = dietActInfoRetrv.getDuration(subjectID) x = duration n = len(item_dict) dims = (x,n) array = np.zeros(dims) if domain == 'ActItem': for i in range(duration): ItemIndex = buildItemIndex.build_daily_single_activity_index(subjectID,i+1) for key in item_dict: if item_dict[key] in ItemIndex: array[i,key] = ItemIndex[item_dict[key]] else: array[i,key] = 0.0 if domain == 'DietItem': for i in range(duration): ItemIndex = buildItemIndex.build_daily_single_diet_index(subjectID,i+1) # print ItemIndex for key in item_dict: if item_dict[key] in ItemIndex: array[i,key] = ItemIndex[item_dict[key]] else: array[i,key] = 0.0 if domain == 'DietType': for i in range(duration): ItemIndex = buildTypeIndex.build_daily_single_diet_index(subjectID,i+1) # print ItemIndex for key in item_dict: if item_dict[key] in ItemIndex: array[i,key] = ItemIndex[item_dict[key]] else: array[i,key] = 0.0 if domain == 'ActType': for i in range(duration): ItemIndex = buildTypeIndex.build_daily_single_activity_index(subjectID,i+1) for key in item_dict: if item_dict[key] in ItemIndex: array[i,key] = ItemIndex[item_dict[key]] else: array[i,key] = 0.0 ''' change the TF array to TFIDF array. But the DF here is not equal to the one we use for mean Vector ''' # transformer = TfidfTransformer(norm=None) # tfidf = transformer.fit_transform(array) # aa = tfidf.toarray() # tfidfNorm = utilise.normArray(aa) # result = utilise.normArray(array) # print array return array