def import_data(): file = "save_obj.txt" if os.path.isfile(file): ##if the file exists import from file print "importing data from file..." with open(file, "rb") as myFile: total_set = pickle.load(myFile) else: ##we havee to create the file again print "creating load file data... (need to test this approach...)" total_set = [] ids = myDB.getIds() for id in ids: if id not in ids_new_tag: continue print id info = myDB.getStressedUts( id, 1, "Stress_based_on_hf_10sec_window" ) ##returns event, starttime for each utterance for i in info: ut = myDB.getUtterancesFeatures( i[0], id, int(i[1]) ) ##receives event, id, starttime and returns 6125 values for that utterance if ut == []: ##if there is no utterace features proceed to next one continue f = data_instance( id, i[0], int(i[1]), 0, ut, 1) ##0 is for the duration im not reading it from the db total_set.append(f) info = myDB.getStressedUts( id, 0, "Stress_based_on_hf_10sec_window" ) ##returns event, starttime for each utterance for i in info: ut = myDB.getUtterancesFeatures( i[0], id, int(i[1]) ) ##receives event, id, starttime and returns 6125 values for that utterance if ut == []: ##if there is no utterace features proceed to next one continue f = data_instance(id, i[0], int(i[1]), 0, ut, 0) total_set.append(f) #for i in total_set: # print i.id, " ", i.start_time, " ", i.stress with open(file, "wb") as myFile: pickle.dump(total_set, myFile) #for i in data["stress"]: # print i #print "________________________________________________" #for i in data["no_stress"]: # print i return total_set
def import_data_csv(data_file, info_file): total_set = [] f_data = open(data_file, "r") ##ler file data = f_data.readlines() if info_file != "": ##abrir file de info se necessario f_info = open(info_file, "r") info = f_info.readlines() for i in range(0, len(data)): line = data[i].split(", ") ft_values = [] for j in range( 0, len(line) - 1 ): ##read the values for the line, the last collumn represents the class ft_values.append(float(line[j])) line_class = int(line[len(line) - 1]) id = 0 event = "" startTime = 0 duration = 0 if info_file != "": ##if we have an info file lets update the fields line_info = info[i + 1].split( ", ") ##info file starts with the headers... must add +1 id = int(line_info[0]) event = line_info[1] startTime = int(line_info[2]) duration = int(line_info[3]) instance = data_instance( id, event, startTime, duration, ft_values, line_class) ##now that we have all info lets save the info total_set.append(instance) return total_set
def import_data_csv(data_file, info_file): total_set = [] f_data = open(data_file, "r") ##ler file data = f_data.readlines() if info_file != "": ##abrir file de info se necessario f_info = open(info_file, "r") info = f_info.readlines() for i in range(0,len(data)): line = data[i].split(", ") if len(line) < 10: line = data[i].split(",") ft_values = [] for j in range(0, len(line)-1): ##read the values for the line, the last collumn represents the class ft_values.append(float(line[j])) line_class = line[len(line)-1] if "\r" in line_class: line_class = line_class.split("\r")[0] line_class = int(float(line[len(line)-1])) id = 0 event = "" startTime = 0 duration = 0 if info_file != "": ##if we have an info file lets update the fields line_info = info[i+1].split(", ") ##info file starts with the headers... must add +1 if len(line_info) < 10: line_info = info[i+1].split(",") id = int(line_info[0]) event = line_info[1] startTime = int(line_info[2]) duration = int(line_info[3]) instance = data_instance(id, event, startTime, duration, ft_values, line_class) ##now that we have all info lets save the info total_set.append(instance) return total_set
def import_nips_sparse_binary(data_file, labels_file, n_fts): total_set = [] f_data = open(data_file, "r") data = f_data.readlines() f_data.close() if labels_file != "": f_labels = open(labels_file, "r") labels = f_labels.readlines() f_labels.close() for i in range(0, len(data)): vals = [0.0] * n_fts ##nao se poder usar isto porque cria apontadores inves de valores line = data[i] line = line.split(" ") for l in line: ##read data from line if l == "\n": continue ft = int(l)-1 vals[ft] = 1.0 ##read label from the other file if labels_file != "": label = float(labels[i]) else: label = "?" instance = data_instance("", vals, label) total_set.append(instance) return total_set
def import_nips_dense(data_file, labels_file): total_set = [] f_data = open(data_file, "r") data = f_data.readlines() f_data.close() if labels_file != "": f_labels = open(labels_file, "r") labels = f_labels.readlines() f_labels.close() for i in range(0, len(data)): vals = [] line = data[i] line = line.split(" ") #print line for l in line: ##read data from line if l == "\n": continue # print l vals.append(float(l)) if labels_file != "": label = float(labels[i]) else: label = "?" instance = data_instance("", vals, label) total_set.append(instance) return total_set
def import_sonar(): file_read = open("sonar.all-data", "r") lines = file_read.readlines() file_read.close() class_rocks = [] class_mines = [] ## read instances separated by class and ordered for line in lines: line = line.split(",") vals = [] for i in range(0, len(line)-1): ##armazena os valores vals.append(float(line[i])) ##print "label=", line[len(line)-1] if "R" in line[len(line)-1]: #class do tipo R, R = 1 label = 1 instance = data_instance("", vals, label) class_rocks.append(instance) else: # class do tipo M, M = 0 label = 0 instance = data_instance("", vals, label) class_mines.append(instance) ##division into train and test set by the owner of the dataset train_set = [] test_set = [] rocks_test_div = import_sonar_division("sonar.rocks") for i in range(0, len(class_rocks)): if i in rocks_test_div: test_set.append(class_rocks[i]) else: train_set.append(class_rocks[i]) mines_test_div = import_sonar_division("sonar.mines") for i in range(0, len(class_mines)): if i in mines_test_div: test_set.append(class_mines[i]) else: train_set.append(class_mines[i]) return train_set, test_set
def import_data(): file = "save_obj.txt" if os.path.isfile(file): ##if the file exists import from file print "importing data from file..." with open(file, "rb") as myFile: total_set = pickle.load(myFile) else: ##we havee to create the file again print "creating load file data... (need to test this approach...)" total_set = [] ids = myDB.getIds() for id in ids: if id not in ids_new_tag: continue print id info = myDB.getStressedUts(id, 1, "Stress_based_on_hf_10sec_window") ##returns event, starttime for each utterance for i in info: ut = myDB.getUtterancesFeatures(i[0], id, int(i[1])) ##receives event, id, starttime and returns 6125 values for that utterance if ut == []: ##if there is no utterace features proceed to next one continue f = data_instance(id, i[0], int(i[1]), 0, ut, 1) ##0 is for the duration im not reading it from the db total_set.append(f) info = myDB.getStressedUts(id, 0, "Stress_based_on_hf_10sec_window") ##returns event, starttime for each utterance for i in info: ut = myDB.getUtterancesFeatures(i[0], id, int(i[1])) ##receives event, id, starttime and returns 6125 values for that utterance if ut == []: ##if there is no utterace features proceed to next one continue f = data_instance(id, i[0], int(i[1]), 0, ut, 0) total_set.append(f) #for i in total_set: # print i.id, " ", i.start_time, " ", i.stress with open(file, "wb") as myFile: pickle.dump(total_set, myFile) #for i in data["stress"]: # print i #print "________________________________________________" #for i in data["no_stress"]: # print i return total_set