def __init__(self, samp_num=100000, channels_num=25, log_interval=100): print "Samples: ", samp_num, " channels: ", channels_num,\ " log_interval: ", log_interval self.num_of_samples = samp_num self.log_interval = log_interval self.num_of_channels = channels_num self.logger = SampleLogger(self.log_interval) self.sample_vec = variables_pb2.SampleVector() for x in range(1): #self.num_of_channels): samp = self.sample_vec.samples.add() for i in range(self.num_of_channels): samp.channels.append(float(x)) samp.timestamp = time.time() self.msg = self.sample_vec.SerializeToString() print "Approx. serialized sample vector size: ", len(self.msg)
def __init__(self, samp_num=100000, channels_num=25, log_interval=100): print "Samples: ", samp_num, " channels: ", channels_num,\ " log_interval: ", log_interval self.num_of_samples = samp_num self.log_interval = log_interval self.num_of_channels = channels_num self.logger = SampleLogger(self.log_interval) self.sample_vec = variables_pb2.SampleVector() for x in range(1):#self.num_of_channels): samp = self.sample_vec.samples.add() for i in range(self.num_of_channels): samp.channels.append(float(x)) samp.timestamp = time.time() self.msg = self.sample_vec.SerializeToString() print "Approx. serialized sample vector size: ", len(self.msg)
class ProtobufTest(object): def __init__(self, samp_num=100000, channels_num=25, log_interval=100): print "Samples: ", samp_num, " channels: ", channels_num,\ " log_interval: ", log_interval self.num_of_samples = samp_num self.log_interval = log_interval self.num_of_channels = channels_num self.logger = SampleLogger(self.log_interval) self.sample_vec = variables_pb2.SampleVector() for x in range(1): #self.num_of_channels): samp = self.sample_vec.samples.add() for i in range(self.num_of_channels): samp.channels.append(float(x)) samp.timestamp = time.time() self.msg = self.sample_vec.SerializeToString() print "Approx. serialized sample vector size: ", len(self.msg) def perform_packing_test(self): start_time = time.time() print "Start packing test: " self.logger.mark_start() for i in xrange(self.num_of_samples): samp = self.sample_vec.samples[0] # don`t do sth like: samp.channels[x] = float(x) without # clearing samp. Somehow, not-clearing samp results in linear memory usage... samp.Clear() for x in range(self.num_of_channels): samp.channels.append(float(x)) samp.timestamp = time.time() msg = self.sample_vec.SerializeToString() #self.logger.log_sample() self.logger.mark_end() end_time = time.time() print "End of packing test - time: ", end_time - start_time,\ " approx. sample rate: ", float(self.num_of_samples) / (end_time - start_time) data_size = len(self.msg) * self.num_of_samples print(float(data_size) / 1024 / 1024), " MiB" print(float(data_size) / (end_time - start_time) / 1000 / 1000 * 8), " Mbps" #self.logger.report() def perform_unpacking_test(self): start_time = time.time() test_vec = variables_pb2.SampleVector() msg = self.sample_vec.SerializeToString() print "Start deserializing test: " self.logger.mark_start() for i in xrange(self.num_of_samples): test_vec.ParseFromString(msg) #self.logger.log_sample() self.logger.mark_end() end_time = time.time() print "End of unpacking test - time: ", end_time - start_time,\ " approx. sample rate: ", float(self.num_of_samples) / (end_time - start_time) #self.logger.report() data_size = len(self.msg) * self.num_of_samples print(float(data_size) / 1024 / 1024), " MiB" print(float(data_size) / (end_time - start_time) / 1000 / 1000 * 8), " Mbps"
def main(): config_filename = Path.cwd().joinpath(CONFIGS_DIR).joinpath( CONFIG_FILENAME) config = Configuration(config_filename) batch_size = 4 epochs = 4 results_dir_path = Path.cwd().joinpath(RESULTS_DIR) current_run_path = create_results_directories(results_dir_path) sample_logger_path = Path.cwd().joinpath(current_run_path).joinpath( SAMPLE_LOGGER_FILE) sample_logger = SampleLogger(sample_logger_path) transforms = TransformsComposer([Rescale(output_size=10000), ToTensor()]) encoder = LabelEncoder() data_loader = DataLoader(config) x_train, y_train = data_loader.get_train_set() encoder.fit(y_train) classes = encoder.classes_ classes_map = {} for i, category in enumerate(classes): classes_map[i] = category print(classes_map) y_train = encoder.transform(y_train) train_dataset = SimilarityDataset(x_train, y_train, classes_map, sample_logger, transforms) x_test, y_test = data_loader.get_test_set() y_test = encoder.transform(y_test) test_dataset = SimilarityDataset(x_test, y_test, classes_map, sample_logger, transforms) model = Siamese() states_dir = Path.cwd().joinpath(STATES_DIR) state_filename = f'{uuid.uuid1()}_state_{epochs}_epochs.pth' state_path = current_run_path.joinpath('best_snapshot').joinpath( state_filename) classifier = SimilarityClassifier(model=model, state_path=state_path) # Fit model on data train_loss_history, val_loss_history = classifier.fit( train_dataset, batch_size=batch_size, epochs=epochs, validation_data=test_dataset) sample_logger.save() # plt.figure() # plt.title(f'Model Loss for {epochs} epochs') # plt.xlabel('epoch') # plt.ylabel('loss') # plt.plot(train_loss_history, label='train') # plt.plot(val_loss_history, label='test') # plt.legend() # plt.show() predictions_path = Path.cwd().joinpath('./predicted.csv') validation_dataset = SimilarityDataset(x_test, y_test, classes_map, sample_logger, transforms) validation_model = Siamese(num_classes=len(classes_map)) validation_classifier = SimilarityClassifier(validation_model, state_path=state_path) validation_classifier.predict(validation_dataset, batch_size=batch_size, output_filepath=predictions_path)
class ProtobufTest(object): def __init__(self, samp_num=100000, channels_num=25, log_interval=100): print "Samples: ", samp_num, " channels: ", channels_num,\ " log_interval: ", log_interval self.num_of_samples = samp_num self.log_interval = log_interval self.num_of_channels = channels_num self.logger = SampleLogger(self.log_interval) self.sample_vec = variables_pb2.SampleVector() for x in range(1):#self.num_of_channels): samp = self.sample_vec.samples.add() for i in range(self.num_of_channels): samp.channels.append(float(x)) samp.timestamp = time.time() self.msg = self.sample_vec.SerializeToString() print "Approx. serialized sample vector size: ", len(self.msg) def perform_packing_test(self): start_time = time.time() print "Start packing test: " self.logger.mark_start() for i in xrange(self.num_of_samples): samp = self.sample_vec.samples[0] # don`t do sth like: samp.channels[x] = float(x) without # clearing samp. Somehow, not-clearing samp results in linear memory usage... samp.Clear() for x in range(self.num_of_channels): samp.channels.append(float(x)) samp.timestamp = time.time() msg = self.sample_vec.SerializeToString() #self.logger.log_sample() self.logger.mark_end() end_time = time.time() print "End of packing test - time: ", end_time - start_time,\ " approx. sample rate: ", float(self.num_of_samples) / (end_time - start_time) data_size = len(self.msg) * self.num_of_samples print (float(data_size) / 1024 / 1024), " MiB" print (float(data_size) / (end_time - start_time) / 1000 / 1000 * 8), " Mbps" #self.logger.report() def perform_unpacking_test(self): start_time = time.time() test_vec = variables_pb2.SampleVector() msg = self.sample_vec.SerializeToString() print "Start deserializing test: " self.logger.mark_start() for i in xrange(self.num_of_samples): test_vec.ParseFromString(msg) #self.logger.log_sample() self.logger.mark_end() end_time = time.time() print "End of unpacking test - time: ", end_time - start_time,\ " approx. sample rate: ", float(self.num_of_samples) / (end_time - start_time) #self.logger.report() data_size = len(self.msg) * self.num_of_samples print (float(data_size) / 1024 / 1024), " MiB" print (float(data_size) / (end_time - start_time) / 1000 / 1000 * 8), " Mbps"