def read_data_sets(fake_data=False): global TRAIN_FILE_READER, VALID_FILE_READER, TEST_FILE_READER class DataSets(object): pass data_sets = DataSets() data_sets.train = [] data_sets.validation = [] data_sets.test = [] data_sets.trainpart = [] data_sets.testpart = [] get_train_valid_test_sets(DATA_SWITCH) TRAIN_FILE_READER = fr.FileReader() TRAIN_FILE_READER.initialize(os.path.join(data_dir, TRAINSET), TRAINSET_MAXSIZE * NUM_SAMPLE_LENGTH) VALID_FILE_READER = fr.FileReader() VALID_FILE_READER.initialize(os.path.join(data_dir, VALIDSET), VALIDSET_MAXSIZE * NUM_SAMPLE_LENGTH) TEST_FILE_READER = fr.FileReader() TEST_FILE_READER.initialize(os.path.join(data_dir, TESTSET), TESTSET_MAXSIZE * NUM_SAMPLE_LENGTH) change_train_data_set(data_sets) change_valid_data_set(data_sets) change_test_data_set(data_sets) return data_sets
def reading_file(list_file): to_research = [] for file in list_file: with file_reader.FileReader(file) as record_reader: for record in record_reader: to_research.append(record) return to_research
def test_FileReader(): reader = file_reader.FileReader( "/code/cohort3/src/python/comp-220/dummy-syntax.js") assert reader.line_count() == 14 assert reader.word_count('else') == 3 assert reader.char_count() == 307 assert reader.summary( "else" ) == "File /code/cohort3/src/python/comp-220/dummy-syntax.js Summary: \n- 14 lines \n- 307 characters (incl. spaces) \n- 3 instances of the word 'else'" reader = file_reader.FileReader("/code/cohort3/src/javascript/syntax.js") assert reader.line_count() == 247 assert reader.word_count('else') == 10 assert reader.char_count() == 7626 assert reader.summary( "else" ) == "File /code/cohort3/src/javascript/syntax.js Summary: \n- 247 lines \n- 7626 characters (incl. spaces) \n- 10 instances of the word 'else'"
def build_query_vec(self): query_fr = file_reader.FileReader(self.combine_file, words_filter=dataset.stop_words, vector_type=self.query_method) query_set = query_fr.build_set(self.combine_file) # the vector of query return query_set['doc' + str(self.combine_file_last_line)][0:-1]
def createGraph(self): fileReader = file_reader.FileReader() self.nodesDict = fileReader.readNodes() arcsList = fileReader.readArcs() for arc in arcsList: # node1Id, node2Id, distance = arc node1ID = arc["nodes"][0] node2ID = arc["nodes"][1] distance = arc["distance"] self.nodesDict[node1ID].addNeighbor(node2ID, distance) self.nodesDict[node2ID].addNeighbor(node1ID, distance)
def test() : MHz = 1e6 fs = 4 * MHz block_size = 4 * int(fs / 1000) # 4 millisecond blocks fr = file_reader.FileReader() ca = ca_search.CASearch( fs ) for b in range(10) : bb_IQ = fr.read(block_size) ca.processBlock(bb_IQ)
def read_data_sets(train_skip_samples=0, valid_skip_samples=0, test_skip_samples=0): global TRAIN_FILE_READER, VALID_FILE_READER, TEST_FILE_READER class DataSets(object): def __init__(self): self.train = [] self.validation = [] self.test = [] data_sets = DataSets() get_train_valid_test_sets(DATA_SWITCH) TRAIN_FILE_READER = fr.FileReader() TRAIN_FILE_READER.initialize(os.path.join(DATA_PATH, TRAINSET), TRAINSET_MAXSIZE * NUM_SAMPLE_LENGTH) TRAIN_FILE_READER.read_data_skip(train_skip_samples * NUM_SAMPLE_LENGTH, False, np.uint8) VALID_FILE_READER = fr.FileReader() VALID_FILE_READER.initialize(os.path.join(DATA_PATH, VALIDSET), VALIDSET_MAXSIZE * NUM_SAMPLE_LENGTH) VALID_FILE_READER.read_data_skip(valid_skip_samples * NUM_SAMPLE_LENGTH, False, np.uint8) TEST_FILE_READER = fr.FileReader() TEST_FILE_READER.initialize(os.path.join(DATA_PATH, TESTSET), TESTSET_MAXSIZE * NUM_SAMPLE_LENGTH) TEST_FILE_READER.read_data_skip(test_skip_samples * NUM_SAMPLE_LENGTH, False, np.uint8) change_train_data_set(data_sets) change_valid_data_set(data_sets) change_test_data_set(data_sets) return data_sets
def read_data_sets(fake_data=False, dtype=tf.float32): global train_file_reader, valid_file_reader, test_file_reader class DataSets(object): pass data_sets = DataSets() if fake_data: def fake(): return DataSet([], [], fake_data=True, dtype=dtype) data_sets.train = fake() data_sets.validation = fake() data_sets.test = fake() return data_sets get_train_valid_test_sets(DATA_SWITCH) train_file_reader = fr.FileReader() valid_file_reader = fr.FileReader() test_file_reader = fr.FileReader() train_file_reader.initialize(os.path.join(DATA_PATH, TRAINSET), TRAINSET_MAXSIZE * NUM_SAMPLE_LENGTH) valid_file_reader.initialize(os.path.join(DATA_PATH, VALIDSET), VALIDSET_MAXSIZE * NUM_SAMPLE_LENGTH) test_file_reader.initialize(os.path.join(DATA_PATH, TESTSET), TESTSET_MAXSIZE * NUM_SAMPLE_LENGTH) change_train_data_set(data_sets) change_valid_data_set(data_sets) change_test_data_set(data_sets) return data_sets
def read_header(self): present_time = file_reader.FileReader(self.file) present_time2 = present_time.read_file_time()[0] time_split = present_time2.split(':') inc_time = dateTime.dateTime(time_split[0], time_split[1]) inc_time = inc_time.add_minutes(present_time2, 5) try: with open(self.file, 'r') as inFile: reader = inFile.readlines()[:6] lines = list(self.group(reader)) final_list = 'Day: \n' + lines[0][ 1] + 'Time: \n' + inc_time + '\n' + 'Company: \n' + 'NSHF' except IndexError: print("Error - Please specify an input file.") sys.exit(2) return final_list
def __init__(self, k, query, method): if method not in eval_types: raise ValueError("method is not supported") self.k = k self.query_method = eval_types[method] self.combine_file = self.build_combine_file(query) with open(self.combine_file) as f: self.combine_file_last_line = sum(1 for _ in f) - 1 full_fr = file_reader.FileReader(self.combine_file, words_filter=dataset.stop_words, vector_type=TFIDF) combine_tfidf_set = full_fr.build_set(self.combine_file) del combine_tfidf_set['doc' + str(self.combine_file_last_line)] self.full_set = combine_tfidf_set self.query_vec = self.build_query_vec()
def __init__(self, parent=None, width=8, height=6, dpi=150): """ Initialization """ # Initialize figure and axis fig = Figure(figsize=(width, height), dpi=dpi) self.ax = fig.add_subplot( 1, 1, 1, projection=cartopy.crs.PlateCarree()) self.ax.stock_img() self.ax.add_feature(cartopy.feature.LAND, zorder=1) self.ax.add_feature(cartopy.feature.BORDERS, zorder=2) self.ax.add_feature(cartopy.feature.COASTLINE, zorder=2) fig.tight_layout() # Initialize FigureCanvas FigureCanvas.__init__(self, fig) self.setParent(parent) FigureCanvas.setSizePolicy(self, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding) FigureCanvas.updateGeometry(self) # Initialize some variables we use self.countries = shpreader.Reader(shpreader.natural_earth(resolution='110m', category='cultural', name='admin_0_countries')).records() tmp, self.countries = itertools.tee(self.countries) self.land = cartopy.feature.ShapelyFeature( (c.geometry for c in tmp), cartopy.crs.PlateCarree(), facecolor=cartopy.feature.COLORS['land']) # Initialize filereader self.fr = fr.FileReader('data.txt') # Read file self.sel_countries = self.fr.read_countries() # Fill in those in file self.fill_country(self.find_country_a3(self.sel_countries), 1)
SAVER_BASENAME = "two-layer-rnn-model-anna-simplified" SAVE_FREQUENCY = 0.10 STEP_SIZE = 100 TEST_MODE = False TOP_N = 5 USE_MY_LSTM_CELL = False VALIDATION_FREQUENCY = 0.10 elif CONFIG_NAME == "3_LAYER_MODE": # These settings max out at ~60% accuracy, too. BATCH_SIZE = 10 BURN_IN_LETTERS = 128 CHARS_TO_GENERATE = 2048 CLIP_GRADIENT = 5 KEEP_PROB = 0.50 LEARNING_RATE = .01 NUM_EPOCHS = 20 NUM_LAYERS = 3 NUM_LSTM_UNITS = 256 RESTORE_FILENAME = "./two-layer-rnn-model-anna-simplified-19-0439.ckpt" SAVER_BASENAME = "three-layer-rnn-model-anna-simplified" SAVE_FREQUENCY = 0.10 STEP_SIZE = 100 TEST_MODE = False TOP_N = 5 VALIDATION_FREQUENCY = 0.10 else: raise Exception("unknown configuration name") import file_reader as fr file_reader = fr.FileReader('./anna-simplified.txt')
Saves the corresponding signal arrays for beaconed, non-beaconed and probe trials. In our virtual reality task, every every tenth trial is a probe trial, and every fifth trial that is not a probe trial is a non-beaconed trial. The rest of the trials are beaconed. In these functions, the indices for the different trial types are separated and saved into beaconed, nbeaconed and probe arrays. If the arrays already exist, then the data is loaded from the file (the location of this file is specified in init_params in main). ''' import numpy as np import os import matplotlib.pylab as plt import vr_process_movement import file_reader import parameters import signal_for_indices fr = file_reader.FileReader() beaconed = None nbeaconed = None probe = None trial_num = None def keep_first_from_close_series(array, threshold): num_delete = 1 while num_delete > 0: diff = np.ediff1d(array, to_begin=threshold + 1) to_delete = np.where(diff <= threshold) num_delete = len(to_delete[0]) if num_delete > 0:
def get_one_data_file(input_file, output_file_root): read_index_list = [[0, get_file_size(input_file) // NUM_SAMPLE_LENGTH]] read_index_list_arr = np.array(read_index_list) num_samples_total = np.sum(read_index_list_arr[:, 1] - read_index_list_arr[:, 0]) print(num_samples_total) file_reader = fr.FileReader() file_reader.initialize(input_file, get_file_size(input_file)) output_file_temp = output_file_root + '.dat_shuffled' f_out = open(output_file_temp, 'wb') num_samples_saved = 0 num_samples_saved_valid = 0 for i_qp in range(NUM_QPS): for i in range(len(read_index_list)): if i == 0: index_last = 0 else: index_last = read_index_list[i - 1][1] index_start = read_index_list[i][0] index_end = read_index_list[i][1] assert index_end > index_start assert index_start >= index_last num_samples = index_end - index_start if index_start > index_last: file_reader.read_data( (index_start - index_last) * NUM_SAMPLE_LENGTH, isloop=False, dtype=np.uint8) index_start_in_batch = 0 index_end_in_batch = 0 vectors_lstm = [] is_init = True while index_end_in_batch < num_samples: if is_init == True: index_start_in_batch = 0 index_new_start_in_batch = 0 index_end_in_batch = BATCH_SAMPLES else: index_start_in_batch += BATCH_SAMPLES_SWAP index_new_start_in_batch = index_end_in_batch index_end_in_batch += BATCH_SAMPLES_SWAP if index_end_in_batch > num_samples: index_end_in_batch = num_samples num_samples_new = index_end_in_batch - index_new_start_in_batch data_new = file_reader.read_data(num_samples_new * NUM_SAMPLE_LENGTH, isloop=False, dtype=np.uint8) data_new = data_new.reshape(num_samples_new, NUM_SAMPLE_LENGTH) data_info_new = np.copy(data_new[:, 0:64]) data_new = np.copy( data_new[:, 64 + NUM_SAMPLE_LENGTH_PER_QP * i_qp:64 + NUM_SAMPLE_LENGTH_PER_QP * (i_qp + 1)]) images_new = np.copy(data_new[:, 1 + NUM_LABEL_BYTES:]) images_new = np.reshape( images_new, [num_samples_new, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS]) vectors_lstm_new = 255 * np.ones( (num_samples_new, VECTOR_LENGTH * LSTM_MAX_LENGTH)).astype( np.float32) vectors_lstm_new[:, 0:VECTOR_LENGTH] = get_vectors(images_new) labels_qps_new = 255 * np.ones( (num_samples_new, (1 + NUM_LABEL_BYTES) * LSTM_MAX_LENGTH)).astype(np.uint8) labels_qps_new[:, 0:1 + NUM_LABEL_BYTES] = data_new[:, 0:1 + NUM_LABEL_BYTES] if is_init == True: vectors_lstm = vectors_lstm_new labels_qps = labels_qps_new else: vectors_lstm = np.concatenate([ vectors_lstm[BATCH_SAMPLES_SWAP:vectors_lstm.shape[0]], vectors_lstm_new ], axis=0) labels_qps = np.concatenate([ labels_qps[BATCH_SAMPLES_SWAP:labels_qps.shape[0]], labels_qps_new ], axis=0) widths_new = data_info_new[:, 2] + 256 * data_info_new[:, 3] heights_new = data_info_new[:, 4] + 256 * data_info_new[:, 5] widths_in_64_new = widths_new // 64 heights_in_64_new = heights_new // 64 i_frames_new = data_info_new[:, 10] + 256 * data_info_new[:, 11] + 256 * 256 * data_info_new[:, 12] + 256 * 256 * 256 * data_info_new[:, 13] num_samples_valid_currentbatch = 0 for i_sample_new in range(num_samples_new): delta_frames_new = get_delta_ref_frames( i_frames_new[i_sample_new]) data_info_new[i_sample_new, 0] = len(delta_frames_new) if len(delta_frames_new ) + 1 == LSTM_MAX_LENGTH and get_is_print( i_frames_new[i_sample_new]) == True: num_samples_saved_valid += 1 num_samples_valid_currentbatch += 1 f_out.write(data_info_new[i_sample_new, :]) f_out.write(labels_qps_new[i_sample_new, 0:(1 + NUM_LABEL_BYTES)].astype( np.float32)) f_out.write(vectors_lstm_new[i_sample_new, 0:VECTOR_LENGTH].astype( np.float32)) for i_delta_frame in range(len(delta_frames_new)): i_sample_ref_new = i_sample_new - i_delta_frame * widths_in_64_new[ i_sample_new] * heights_in_64_new[i_sample_new] if is_init == True: i_sample_ref_total = i_sample_ref_new else: i_sample_ref_total = i_sample_ref_new + BATCH_SAMPLES - BATCH_SAMPLES_SWAP if i_sample_ref_total >= 0: vectors_lstm_new[ i_sample_new, VECTOR_LENGTH * (i_delta_frame + 1):VECTOR_LENGTH * (i_delta_frame + 2)] = vectors_lstm[i_sample_ref_total, 0:VECTOR_LENGTH] labels_qps_new[ i_sample_new, (1 + NUM_LABEL_BYTES) * (i_delta_frame + 1):(1 + NUM_LABEL_BYTES) * (i_delta_frame + 2)] = labels_qps[i_sample_ref_total, 0:(1 + NUM_LABEL_BYTES)] f_out.write(labels_qps[i_sample_ref_total, 0:(1 + NUM_LABEL_BYTES)].astype( np.float32)) f_out.write(vectors_lstm[i_sample_ref_total, 0:VECTOR_LENGTH].astype( np.float32)) is_init = False num_samples_saved += num_samples_new print('QP No.%d, %d (%d Valid) / %d Samples Completed' % (i_qp + 1, num_samples_saved, num_samples_saved_valid, num_samples_total * NUM_QPS)) f_out.close() output_file = output_file_root + ('_%d.dat_lstm_%dqps' % (num_samples_saved_valid, NUM_QPS)) os.rename(output_file_temp, output_file) shuffle_samples(output_file, NUM_SAMPLE_LENGTH_OUT * NUM_QPS)
correct = 0.0 total = len(test_set.keys()) for key in test_set: real = test_set[key][-1] predicted = classifier.predict(test_set[key][0:-1]) if real == predicted: correct += 1.0 return correct / total if __name__ == '__main__': print('Accuracy results:') file_name = "./dataset/amazon_cells_labelled_full.txt" train_file_name = "./dataset/amazon_cells_labelled_train.txt" test_file_name = "./dataset/amazon_cells_labelled_test.txt" data = file_reader.FileReader(file_name) # boolean train_set, _ = data.build_set("boolean", train_file_name) test_set, _ = data.build_set("boolean", test_file_name) classifier = rocchio_classifier.RocchioClassifier(train_set) print("Boolean:", '{:.3f}'.format(calc_accuracy(test_set, classifier))) # tf train_set, _ = data.build_set("tf", train_file_name) test_set, _ = data.build_set("tf", test_file_name) classifier = rocchio_classifier.RocchioClassifier(train_set) print("tf:", '{:.3f}'.format(calc_accuracy(test_set, classifier))) # tf-idf train_set, _ = data.build_set("tfidf", train_file_name) test_set, _ = data.build_set("tfidf", test_file_name) classifier = rocchio_classifier.RocchioClassifier(train_set) print("tfidf:", '{:.3f}'.format(calc_accuracy(test_set, classifier)))
import file_reader import echo fr = file_reader.FileReader("testfile.xml") fr.attach(echo.Echo()) fr.start() fr.thread.join()
""" correct = 0.0 total = len(test_set.keys()) for key in test_set: real = test_set[key][-1] predicted = classifier.predict(test_set[key][0:-1], similarity.CosineDistance) print(real, predicted) if real == predicted: correct += 1.0 return correct / total if __name__ == '__main__': file_reader = file_reader.FileReader(dataset.FULL_FILE, words_filter=dataset.stop_words, vector_type='tfidf') full_set = file_reader.build_set(dataset.FULL_FILE) train_set = file_reader.build_set(dataset.TRAIN_FILE) test_set = file_reader.build_set(dataset.TEST_FILE) classifier = rocchio_classifier.Rocchio_Classifier(train_set) print(calc_accuracy(test_set, classifier)) # # svm_light_format(full_set) # k, queryID, query, rep_method = int(sys.argv[1]), sys.argv[2],str(sys.argv[3]), int(sys.argv[4]) # eval = AdhocEval(k, query, rep_method) # output_file = "Output_"+str(queryID)+"_"+str(rep_method)+".txt" # output_dir = open(output_file, 'w') # sys.stdout = output_dir # eval.print_results()