def test(net): global best_acc net.eval() test_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += float(targets.size(0)) correct += predicted.eq(targets.data).cpu().sum().type( torch.FloatTensor) progress_bar( batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) # Save checkpoint. acc = 100. * correct / total return acc
def extract_xref(files_list): # total number of files to calculate completion percentage total_files = len(files_list) bad_files_names = [] # Extract all features related to DATA and CODE XREF xref_dict = xref_initialization() for idx, file_name in enumerate(files_list): asm_file = DATASET_DIR + 'train/' + file_name + '.asm.gz' try: get_xref_features(asm_file, xref_dict) except Exception as e: # log corrupted files for future correction log_exception(e, sys.argv[0], asm_file) bad_files_idx.append(idx) bad_files_names.append(file_name) progress_bar(idx+1, total_files, 50) xref_pd = pd.DataFrame.from_dict(xref_dict) # store xref features to avoid recalculation save_obj(xref_pd, 'xref_features') ''' save_obj(bad_files_names, 'bad_asm_files') # drop corrupted files (if any) from the training set if len(bad_files_names) > 0: # log the number of corrupted files logging.info('XREF Feature Extraction completed: ' + str(len(bad_files_names)) + ' file(s) are corrupted.') # store the corrupted files names in 'bad_asm_files.txt' with open('bad_asm_files.txt', 'w') as bfp: for name in bad_files_names: bfp.write(name + '.asm') ''' # save xref features dataframe to csv file to keep results (optional) xref_pd.to_csv('features/xref_features.csv', index=False) return xref_pd
def test(epoch): netG.eval() total_psnr = 0 total_loss = 0 for iteration, batch in enumerate(test_data_loader, 1): with torch.no_grad(): input, target = Variable(batch[0]), Variable(batch[1]) if opt.cuda: input = input.cuda() target = target.cuda() prediction = netG(input) mse = criterionMSE(prediction, target) total_loss += mse.item() psnr = 10 * log10(1 / mse.item()) total_psnr += psnr # display progress_bar(epoch, iteration, len(test_data_loader)+1, ': Avg.Loss: {:.4f}, Avg.PSNR: {:.4f} dB'.format(mse.item(), psnr) ) if epoch % 20 == 0 or epoch == opt.nEpochs: testDir = 'Test_Prediction/' os.makedirs(testDir, exist_ok=True) imgList = [ input[0], prediction[0], target[0] ] grid_img = make_grid(imgList) save_image(grid_img, testDir + 'epoch_{}.png'.format(epoch)) avg_loss = total_loss / len(test_data_loader) avg_psnr = total_psnr / len(test_data_loader) print('Test Avg.Loss: {:.4f}, Avg.PSNR: {:.4f} dB'.format(avg_loss, avg_psnr)) return avg_loss, avg_psnr
def run_new(country, csv_path, save_path, save_result): # ############ get country list and save as js file############### f = open('data/country.js', 'w') f.write('window.country=' + json.dumps(country)) f.close() # ################################################################ js_data = make_json() preConfirmed = list() global SCALE step = 0 distance = len(country) # get predict data for item in country: file = csv_path + item + '.csv' preConfirmed.append(pre_week(file)[0].astype(int)) js_data["data"][item]["PreConfirmed"] = preConfirmed[-1].tolist() progress_bar(step, distance) step += 1 # ####### save the js_data as a js file####### f = open('data/data.js', 'w') f.write('window.data=' + json.dumps(js_data)) f.close() # ############################################ result = {} for a, b in zip(country, preConfirmed): result[a] = b.tolist() print(a, b) # ######## here to save the prediction data ########## f = open(save_result, 'w') f.write(json.dumps(result)) f.close()
def train(net, epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(train_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += float(targets.size(0)) correct += predicted.eq(targets.data).cpu().sum().type( torch.FloatTensor) progress_bar( batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): channel0, channel1, channel2 = inputs.numpy( )[:, 0, :, :], inputs.numpy()[:, 1, :, :], inputs.numpy()[:, 2, :, :] channel0, channel1, channel2 = encoder.tempencoding( channel0), encoder.tempencoding(channel1), encoder.tempencoding( channel2) channel0, channel1, channel2 = torch.Tensor(channel0), torch.Tensor( channel1), torch.Tensor(channel2) if use_cuda: channel0, channel1, channel2, targets = channel0.cuda( ), channel1.cuda(), channel2.cuda(), targets.cuda() optimizer.zero_grad() channel0, channel1, channel2, targets = Variable(channel0), Variable( channel1), Variable(channel2), Variable(targets) outputs = net(channel0, channel1, channel2) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
def train(epoch): print('-- Current Training Epoch %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() Optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() Optimizer.step() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() util.progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.2f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
def stream_from_source(self): self.cntr = 0 # training folder path training_path = os.path.join(self.root, "TrainingSet") if self.segment is None: patient_folders = os.listdir(training_path) else: patient_folders = self.segment # iterate through the patients for patient_folder in patient_folders: self.cntr += 1 patient_path = os.path.join(training_path, patient_folder) folders = os.listdir(patient_path) img_folder = [fd for fd in folders if fd.find('dicom') != -1][0] con_folder = [fd for fd in folders if fd.find('contours') != -1][0] img_path = os.path.join(patient_path, img_folder) con_path = os.path.join(patient_path, con_folder) # read the images and the contours dcm = dicom_reader.DCMreaderMC2(img_path) con = con_reader.CONreaderMC2(con_path) p = Patient(Origin.MC2) p.origin_path = patient_path p.patient_id = patient_folder p.gender = dcm.get_gender() p.height = dcm.get_height() p.weight = dcm.get_weight() p.pixel_spacing = dcm.get_pixelspacing() p.slice_thicknes = dcm.get_slicethickness() p.gap = dcm.get_gap() for k, v in dcm.get_imagepaths().items(): image = Image(Origin.MC2) image.origin_path = v['path'] image.patient_id = p.patient_id image.slice = v['slice'] image.frame = v['frame'] if k in con.get_contours().keys(): for part, cntr in con.get_contours()[k].items(): contour = Contour(Origin.MC2) contour.origin_path = cntr['path'] contour.patient_id = p.patient_id contour.slice = image.slice contour.frame = image.frame contour.length = cntr['contour'].shape[0] contour.contour_mtx = cntr['contour'] contour.corresponding_image = image if part == 'i': # i indicates right-endo contour.part = Region.RN elif part == 'o': contour.part = Region.RP image.ground_truths.append(contour) p.append_gt(contour) image.has_gt = True p.append_image(image) pickle_path = os.path.join(self.cache, patient_folder + '.pickle') with open(pickle_path, 'wb') as f: pickle.dump(p, f) # create cache util.progress_bar(self.cntr, len(patient_folders), 50) yield p
def test(epoch, model, criterion, testloader): model.eval() # drop outを適用しない test_loss = 0.0 correct = 0 total = 0 with torch.no_grad(): for iteration, batch in enumerate(testloader): inputs = batch[0] labels = batch[1] # GPUが使えるなら if use_gpu: inputs = inputs.cuda() labels = labels.cuda() # forward outputs = model(inputs) _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) test_loss += loss.item() total += labels.size(0) correct += preds.eq(labels).sum().item() printLoss = (test_loss / (iteration + 1)) printAcc = 100. * correct / total progress_bar( epoch, iteration, len(testloader), ': Loss: {:.4f}, Acc: {:.4f} % ({}/{})'.format( printLoss, printAcc, correct, total))
def print_test_info(cfg, step, metrics): """Print validate information on the screen. Inputs: cfg: training options step: a list includes --> [per_step, total_step] metrics: the current batch testing metrics """ report = info = message = "" if step[0] == 0: equal_left, equal_right = cal_equal(15) info += "\n" + "=" * equal_left + " Start Testing " + "=" * equal_right print(info) if cfg.opts.save_test_log: wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Test_log.txt'), info, mode='w', show=False) progress_bar(step[0], step[1], display=False) if cfg.opts.test_label != 'None': if step[0] + 1 >= step[1]: report, message = prepare_metrics(report, message, metrics) if cfg.opts.save_test_log: wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Test_log.txt'), message + "\n" + report, mode='a', show=False)
def print_val_info(val_flag, cfg, step, metrics): """Print validate information on the screen. Inputs: val_flag: whether print the information or not(bool) cfg: training options step: a list includes --> [per_step, total_step] metrics: the current batch validating metrics """ report = info = message = "" if val_flag: if step[0] - 1 == 0: info += ">>> Validate on the val dataset ..." print(info) if cfg.opts.save_train_log: wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Train_Log.txt'), info, mode='a', show=False) progress_bar(step[0] - 1, step[1], display=False) if step[0] >= step[1]: report, message = prepare_metrics(report, message, metrics) if cfg.opts.save_train_log: wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Train_Log.txt'), message + "\n" + report, mode='a', show=False)
def extract_byte_ngram_features(files_list, n, addrlength=32): ngram_d_list = [] for idx, file_name in enumerate(files_list): byte_file = conf['dataset_dir'] + file_name + '.bytes.gz' byte_seq = parse_bytes(byte_file, addrlength) # check for successful byte file parsing if not byte_seq: continue ngram_cnt_d = count_byte_ngrams(byte_seq, n) ngram_d_list.append(ngram_cnt_d) progress_bar(idx + 1, len(files_list), 50) # convert list of dictionaries to a byte ngram count numpy array vec = DictVectorizer() ngram_freq = vec.fit_transform(ngram_d_list).toarray() ngram_freq_df = pd.DataFrame(ngram_freq, columns=vec.get_feature_names()) # store frequency of each byte ngram ngram_freq_df.to_csv(conf['featsets_dir'] + str(n) + 'gram_byte_freq2.csv', index=False) # transform ngram frequency array to ngram tfidf array transformer = TfidfTransformer(smooth_idf=False) ngram_tfidf = transformer.fit_transform(ngram_freq) # store tfidf of each byte ngram in CSV file ngram_tfidf_df = pd.DataFrame( ngram_tfidf.todense(), columns=[x.decode("utf-8") + '_tf' for x in vec.get_feature_names()]) ngram_tfidf_df.to_csv(conf['featsets_dir'] + str(n) + 'gram_byte_tfidf2.csv', index=False) return ngram_freq_df, ngram_tfidf_df
def retrain(net, epoch): print('\nEpoch: %d' % epoch) global best_acc net.train() train_loss = 0 total = 0 correct = 0 for batch_idx, (inputs, targets) in enumerate(train_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() if args.fixed: net = util.quantize(net, args.pprec) optimizer.step() train_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += float(predicted.eq(targets.data).cpu().sum()) progress_bar( batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) acc = 100. * correct / total if args.fixed: net = util.quantize(net, args.pprec)
def stream_from_source(self): self.cntr = 0 # training folder path training_path = os.path.join(self.root, "CAP_challenge_training_set") if self.segment is None: patient_folders = os.listdir(training_path) else: patient_folders = self.segment patient_folders = [p for p in patient_folders if os.path.isdir(os.path.join(training_path, p))] # iterate through the patients for patient_folder in patient_folders: self.cntr += 1 patient_path = os.path.join(training_path, patient_folder) # read the images and the contours dcm = dicom_reader.DCMreaderST(patient_path) con = con_reader.CONreaderST(patient_path) p = Patient(Origin.ST11) p.origin_path = patient_path p.patient_id = patient_folder p.gap = dcm.get_gap() p.pixel_spacing = dcm.get_pixelspacing() p.slice_thicknes = dcm.get_slicethickness() for slice in dcm.get_imagepaths(): for frame in dcm.get_imagepaths()[slice]: image = Image(Origin.ST11) image.origin_path = dcm.get_imagepaths()[slice][frame]['path'] image.patient_id = p.patient_id image.slice = slice image.frame = frame if slice in con.get_contours(): if frame in con.get_contours()[slice]: for part in ['LN', 'LP']: cntr = con.get_contours()[slice][frame] contour = Contour(Origin.ST11) contour.origin_path = cntr['path'] contour.patient_id = p.patient_id contour.slice = image.slice contour.frame = image.frame contour.length = cntr['contours'][part].shape[0] contour.contour_mtx = cntr['contours'][part] contour.corresponding_image = image if part == 'LN': # i indicates left-endo contour.part = Region.LN elif part == 'LP': contour.part = Region.LP image.ground_truths.append(contour) p.append_gt(contour) image.has_gt = True p.append_image(image) pickle_path = os.path.join(self.cache, patient_folder + '.pickle') with open(pickle_path, 'wb') as f: pickle.dump(p, f) # create cache util.progress_bar(self.cntr, len(patient_folders), 50) yield p
def train(epoch): netD.train() netG.train() total_D_loss = 0 total_G_loss = 0 for iteration, batch in enumerate(training_data_loader, 1): # forward real_a_cpu, real_b_cpu = batch[0], batch[1] real_a.data.resize_(real_a_cpu.size()).copy_(real_a_cpu) real_b.data.resize_(real_b_cpu.size()).copy_(real_b_cpu) fake_b = netG(real_a) ############################ # (1) Update D network: maximize log(D(x,y)) + log(1 - D(x,G(x))) ########################### optimizerD.zero_grad() # train with fake fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = netD.forward(fake_ab.detach()) loss_d_fake = criterionGAN(pred_fake, False) # train with real real_ab = torch.cat((real_a, real_b), 1) pred_real = netD.forward(real_ab) loss_d_real = criterionGAN(pred_real, True) # Combined loss loss_d = (loss_d_fake + loss_d_real) * 0.5 loss_d.backward() optimizerD.step() ############################ # (2) Update G network: maximize log(D(x,G(x))) + L1(y,G(x)) ########################## optimizerG.zero_grad() # First, G(A) should fake the discriminator fake_ab = torch.cat((real_a, fake_b), 1) pred_fake = netD.forward(fake_ab) loss_g_gan = criterionGAN(pred_fake, True) # Second, G(A) = B loss_g_l1 = criterionL1(fake_b, real_b) * opt.lamb loss_g = loss_g_gan + loss_g_l1 loss_g.backward() optimizerG.step() total_D_loss += loss_d.item() total_G_loss += loss_g.item() # display progress_bar(epoch, iteration, len(training_data_loader)+1, ': Loss_D: {:.4f}, Loss_G: {:.4f}'.format(loss_d.item(), loss_g.item()) ) avg_D_loss = total_D_loss / len(training_data_loader) avg_G_loss = total_G_loss / len(training_data_loader) return avg_D_loss, avg_G_loss
def test(epoch, net, criterion, testLoader, device): net.eval() test_loss = 0 for batch_idx, (inputs, targets) in enumerate(testLoader): inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) test_loss += loss.item() util.progress_bar(batch_idx, len(testLoader), 'Test Loss: %.3f' % (test_loss / (batch_idx + 1))) return test_loss / len(testLoader)
def stream_from_source(self): self.cntr = 0 # training folder path training_path = os.path.join(self.root, "TrainingData_LVQuan19") if self.segment is None: patient_datas = os.listdir(training_path) else: patient_datas = self.segment patient_datas = [p for p in patient_datas if p.endswith('.mat')] # iterate through the patients for patient_file in patient_datas: self.cntr += 1 patient_path = os.path.join(training_path, patient_file) # read the images and the contours mat = loadmat(patient_path) p = Patient(Origin.ST19) p.origin_path = patient_path p.patient_id = patient_file[:-4] p.pixel_spacing = mat['pix_spacing'][0, 0] for frame in range(20): image = Image(Origin.ST19) image.origin_path = p.origin_path image.patient_id = p.patient_id image.frame = frame image.image = mat['image'][:, :, frame] image.size = image.image.shape lvq = LVQuant(Origin.ST19) lvq.origin_path = p.origin_path lvq.patient_id = p.patient_id lvq.frame = image.frame lvq.epi_mask = mat['epi'][:, :, frame] lvq.endo_mask = mat['endo'][:, :, frame] lvq.area_cavity = mat['areas'][0, frame] lvq.area_myo = mat['areas'][1, frame] lvq.dims = mat['dims'][:, frame] lvq.rwt = mat['rwt'][:, frame] lvq.phase = mat['lv_phase'] # systole: 1, diastole: 0 lvq.corresponding_image = image image.ground_truths.append(lvq) p.append_gt(lvq) image.has_gt = True p.append_image(image) pickle_path = os.path.join(self.cache, p.patient_id + '.pickle') with open(pickle_path, 'wb') as f: pickle.dump(p, f) # create cache util.progress_bar(self.cntr, len(patient_datas), 10) yield p
def main(): train_labels = pd.read_csv(DATASET_DIR + 'trainLabels.csv') files_list = train_labels['Id'].tolist() # total number of files to calculate completion percentage total_files = len(files_list) # do not count corrupted files bad_files_idx = [] bad_files_names = [] # Extract all features related to DATA and CODE XREF xref_dict = xref_initialization() for idx, file_name in enumerate(files_list): asm_file = DATASET_DIR + 'train/' + file_name + '.asm.gz' try: get_xref_features(asm_file, xref_dict) except Exception as e: # log corrupted files for future correction log_exception(e, sys.argv[0], asm_file) bad_files_idx.append(idx) bad_files_names.append(file_name) progress_bar(idx+1, total_files, 50) xref_pd = pd.DataFrame.from_dict(xref_dict) # store xref features to avoid recalculation save_obj(xref_pd, 'xref_features') save_obj(bad_files_names, 'bad_files') # concat features with classes and IDs to create the dataset data = pd.concat([train_labels, xref_pd], axis=1, sort=False) # drop corrupted files (if any) from the training set if len(bad_files_idx) > 0: data.drop(data.index[bad_files_idx], inplace=True) data = data.reset_index(drop=True) # log the number of corrupted files logging.info('XREF Feature Extraction completed: ' + str(len(bad_files_idx)) + ' file(s) are corrupted.') # store the corrupted files names in 'bad_asm_files.txt' with open('bad_asm_files.txt', 'w') as bfp: for name in bad_files_names: bfp.write(name + '.asm.gz') # save xref features dataframe to csv file to keep results (optional) data.to_csv('results/xref_features.csv') '''
def extract_opcode_ngram(files_list, n): dicts_list = [] total_files = len(files_list) for idx, file_name in enumerate(files_list): asm_file = conf['dataset_dir'] + file_name + '.asm.gz' clean_asm_code = clean_asm_lines(asm_file) opcode_sequence = [] # this loop constructs a sequence of opcodes delimited by space character for line in clean_asm_code: # below commands works assuming that the preprocessing of the .asm # file has already occured opcode_mnem = line.split(' ')[0].rstrip() # further condition to minimize the number of outliers (handle extreme cases) is_valid_opcode = bool(re.match('^[a-z]{2,7}$', opcode_mnem)) if is_valid_opcode: opcode_sequence.append(opcode_mnem) ngram_dict = {} for index, opcode in enumerate(opcode_sequence): if (n + index) > len(opcode_sequence): break opcode_ngram = "" for j in range(index, index + n): opcode_ngram += opcode_sequence[j] + '-' # remove trailing '-' char from opcode_ngram opcode_ngram = opcode_ngram[:-1] if opcode_ngram in ngram_dict: ngram_dict[opcode_ngram] += 1 else: ngram_dict[opcode_ngram] = 1 dicts_list.append(ngram_dict) # progress bars always save my sanity progress_bar(idx+1, total_files, 50) # convert list of dictionaries to an opcode ngram count numpy array vec = DictVectorizer() ngram_freq = vec.fit_transform(dicts_list).toarray() ngram_freq_df = pd.DataFrame(ngram_freq, columns=vec.get_feature_names()) ngram_freq_df.to_csv('features/' + str(n) + 'gram_opcode_freq1.csv', index=False) save_obj(ngram_freq_df, str(n) + 'gram_opcode_freq') # transform ngram frequency array to ngram tfidf array transformer = TfidfTransformer(smooth_idf=False) ngram_tfidf = transformer.fit_transform(ngram_freq) # transform array to pandas dataframe freq_vec_df = pd.DataFrame(ngram_tfidf.todense(), columns=vec.get_feature_names()) freq_vec_df.to_csv('features/' + str(n) + 'gram_opcode_tfidf1.csv', index=False) save_obj(freq_vec_df, str(n) + 'gram_opcode_tfidf') return freq_vec_df
def extract_byte_entropy_features(files_list, addrlength=32): ent_d_list = [] for idx, file_name in enumerate(files_list): bytes_file = conf['dataset_dir'] + file_name + '.bytes.gz' bytes_seq = parse_bytes(bytes_file, addrlength) ent_stats = extract_entropy_statistics(bytes_seq, window_size=10000) ent_d_list.append(ent_stats) progress_bar(idx+1, len(files_list), 50) # convert list of dictionaries to pandas DataFrame vec = DictVectorizer() ent_features = vec.fit_transform(ent_d_list).toarray() ent_features = pd.DataFrame(ent_features, columns=vec.get_feature_names()) # store entropy feature set in CSV file ent_features.to_csv('features/entropy_features.csv', index=False) return ent_features
def train(epoch, net, criterion, optimizer, trainLoader, device): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 for batch_idx, (inputs, targets) in enumerate(trainLoader): inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) train_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() util.progress_bar(batch_idx, len(trainLoader), 'Train Loss: %.3f' % (train_loss / (batch_idx + 1)))
def stream_from_cache(self): # calculate the number of patients for checking progress cntr = 0 if self.segment is None: patients = os.listdir(self.cache) else: patients = [p for p in self.segment] all_patient = len(patients) # stream the saved pickle files for p in patients: p_path = os.path.join(self.cache, p) cntr += 1 with open(p_path, 'br') as f: patient = pickle.load(f) util.progress_bar(cntr, all_patient, 50) yield patient
def main(): while True: micro_config = util.read_config_file(micro_config_file_name) macro_config = util.read_config_file(macro_config_file_name) micro_util = monitor.get_microservices_utilization() #micro_util = monitor.get_container_utilization() macro_util = monitor.get_macroservices_utilization() compare_cpu_util(micro_config, micro_util, macro_config, macro_util) print("****** Completed monitoring! Wait for " + str(monitoring_interval) + " seconds. *****") util.progress_bar(monitoring_interval)
def test(epoch): global best_acc net.eval() test_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(testloader): channel0, channel1, channel2 = inputs.numpy( )[:, 0, :, :], inputs.numpy()[:, 1, :, :], inputs.numpy()[:, 2, :, :] channel0, channel1, channel2 = encoder.tempencoding( channel0), encoder.tempencoding(channel1), encoder.tempencoding( channel2) channel0, channel1, channel2 = torch.Tensor(channel0), torch.Tensor( channel1), torch.Tensor(channel2) if use_cuda: channel0, channel1, channel2, targets = channel0.cuda( ), channel1.cuda(), channel2.cuda(), targets.cuda() channel0, channel1, channel2, targets = Variable(channel0), Variable( channel1), Variable(channel2), Variable(targets) outputs = net(channel0, channel1, channel2) loss = criterion(outputs, targets) test_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar( batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) # Save checkpoint. acc = 100. * correct / total if acc > best_acc: print('Saving..') state = { 'net': net.module if use_cuda else net, 'acc': acc, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/ckpt.t7') best_acc = acc
def advtrain(epoch): global attackstep global attacker print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): channel0, channel1, channel2 = attacker.attackthreechannel( inputs, targets) channel0, channel1, channel2 = torch.Tensor(channel0), torch.Tensor( channel1), torch.Tensor(channel2) if use_cuda: channel0, channel1, channel2, targets = channel0.cuda( ), channel1.cuda(), channel2.cuda(), targets.cuda() optimizer.zero_grad() channel0, channel1, channel2, targets = Variable(channel0), Variable( channel1), Variable(channel2), Variable(targets) outputs = net(channel0, channel1, channel2) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() if batch_idx == 0: advc0, advc1, advc2 = ( channel[-3:].data.cpu().numpy() for channel in [channel0, channel1, channel2]) advc0, advc1, advc2 = (encoder.temp2img(advc) for advc in [advc0, advc1, advc2]) advc0, advc1, advc2 = (torch.Tensor(advc[:, np.newaxis, :, :]) for advc in [advc0, advc1, advc2]) advimg = torch.cat((advc0, advc1, advc2), dim=1) advimg = torchvision.utils.make_grid(advimg) writer.add_image('Image', advimg, epoch) progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
def extract_reg_counts(files_list): reg_cnt_d_list = [] for idx, file_name in enumerate(files_list): asm_file = conf['dataset_dir'] + file_name + '.asm.gz' reg_cnt = register_count(asm_file) reg_cnt_d_list.append(reg_cnt) progress_bar(idx+1, len(files_list), 50) # convert list of dictionaries to an opcode count numpy array vec = DictVectorizer() reg_freq = vec.fit_transform(reg_cnt_d_list).toarray() reg_freq = pd.DataFrame(reg_freq, columns=vec.get_feature_names()) csv_file = conf['featsets_dir'] + 'reg_freq.csv' reg_freq.to_csv(csv_file) # transform ngram frequency array to ngram tfidf array transformer = TfidfTransformer(smooth_idf=False) reg_tfidf = transformer.fit_transform(reg_freq) reg_tfidf = pd.DataFrame(reg_tfidf.todense(), columns=vec.get_feature_names()) csv_file = conf['featsets_dir'] + 'reg_tfidf.csv' reg_tfidf.to_csv(csv_file)
def extract_opcode_ngrams(files_list, n): opcode_d_list = [] for idx, file_name in enumerate(files_list): asm_file = conf['dataset_dir'] + file_name + '.asm.gz' opcode_count = opcode_ngram_count(asm_file, n) opcode_d_list.append(opcode_count) progress_bar(idx+1, len(files_list), 50) # convert list of dictionaries to an opcode count numpy array vec = DictVectorizer() opc_freq = vec.fit_transform(opcode_d_list).toarray() opc_freq = pd.DataFrame(opc_freq, columns=vec.get_feature_names()) csv_file = conf['featsets_dir'] + str(n) + 'gram_opcode_freq.csv' opc_freq.to_csv(csv_file) # transform ngram frequency array to ngram tfidf array transformer = TfidfTransformer(smooth_idf=False) opc_tfidf = transformer.fit_transform(opc_freq) opc_tfidf = pd.DataFrame(opc_tfidf.todense(), columns=[x.decode("utf-8") + '_tf' + for x in vec.get_feature_names()]) csv_file = conf['featsets_dir'] + str(n) + 'gram_opcode_tfidf.csv' opc_tfidf.to_csv(csv_file)
def evaluate(self, sess, pre_score, saver=None): """在@examples_raw数据上评估模型/预测数据类别 """ new_score = pre_score preds = [] # 所有数据的预测值存放 for j, batch in enumerate( minibatches(self.dev_set, self.batch_size, shuffle=False)): inputs_batch, mask_batch = batch[0], batch[2] feed = self.create_feed_dict(inputs_batch=inputs_batch, mask_batch=mask_batch) preds_ = sess.run(tf.argmax(self.preds, axis=2), feed_dict=feed) # 一个batch的预测值 preds += list(preds_) total_batch = 1 + int(len(self.dev_set) / self.batch_size) print(progress_bar(j, total_batch, "batch")) all_original_labels = [] # 标准答案 all_predicted_labesl = [] # 预测值 for i, (sentence, labels) in enumerate(self.dev_vec): _, _, mask = self.dev_set[i] # 获取每个句子的mask labels_ = [l for l, m in zip(preds[i], mask) if m] # mask作用(预测值只保留mask标记为True的) if len(labels_) == len(labels): # 最后一个batch all_original_labels += labels all_predicted_labesl += labels_ cm = confusion_matrix(all_original_labels, all_predicted_labesl) # 混淆矩阵 acc_sorce = accuracy_score(all_original_labels, all_predicted_labesl) f_score = f1_score(all_original_labels, all_predicted_labesl, average="micro") report = classification_report(all_original_labels, all_predicted_labesl, target_names=self.LBLS) print("准确率:", acc_sorce) print("F值:", f_score) print("混淆矩阵:\n", cm) print("分类结果:\n", report) if f_score > pre_score: new_score = f_score if saver: # 训练时可保存下最好的模型,测试时可选择没有saver logger.info("New best score! Saving model in %s", self.model_output) saver.save(sess, self.model_output) return all_predicted_labesl, new_score
def load_data(self, mode, shuffle=False): """Load the train or val or test dataset""" if mode == 'Train': label_name, label_dir, data_dir = [ self.opts.train_label, self.cfg.TRAIN_LABEL_DIR, self.cfg.TRAIN_DATA_DIR ] elif mode == 'Val': label_name, label_dir, data_dir = [ self.opts.val_label, self.cfg.VAL_LABEL_DIR, self.cfg.VAL_DATA_DIR ] else: label_name, label_dir, data_dir = [ self.opts.test_label, self.cfg.TEST_LABEL_DIR, self.cfg.TEST_DATA_DIR ] if label_name == 'None': label_data = [] test_names = os.listdir(data_dir) for name in test_names: label_data.append([name, 0]) else: label_data = self._open_data_file(label_name, label_dir) if shuffle: random.shuffle(label_data) for index, data_set in enumerate(label_data): if mode == 'Test': length = self.opts.num_test if self.opts.num_test < len( label_data) else len(label_data) if index + 1 > self.opts.num_test: break else: length = len(label_data) progress_bar(index, length, "Loading {} dataset".format(mode)) self._add_to_database(index, data_set, data_dir) equal_left, equal_right = cal_equal(6) print('\n%s Done %s' % ('=' * equal_left, '=' * equal_right))
def print_test_info(cfg, step, loss=100.0, metric=0.0): """Print validate information on the screen. Inputs: cfg: training options step: a list includes --> [per_step, total_step] loss: a float includes --> val loss value metric: the current batch testing metric """ info = message = "" if step[0] == 0: equal_left, equal_right = cal_equal(15) info += "\n" + "=" * equal_left + " Start Testing " + "=" * equal_right print(info) if cfg.opts.save_test_log: wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Test_log.txt'), info, mode='w', show=False) progress_bar(step[0], step[1], display=False) if cfg.opts.test_label != 'None': if step[0] + 1 >= step[1]: message += "\n>>> Loss:{:.4f} ACC:{:.3f}% ".format(loss / step[1], metric / step[1] * 100) print(message) if cfg.opts.save_test_log: wrote_txt_file(os.path.join(cfg.CHECKPOINT_DIR, 'Test_log.txt'), message, mode='a', show=False)
# original LabVIEW code. print '\rBuilding a list of directories with raw data in them.' PATH_STRING_1 = '/Volumes/science/projects/kinesin_and_microtubules' PATH_STRING_2 = 'water_isotope_study/data' BASE_PATH = os.path.join(PATH_STRING_1, PATH_STRING_2) ASSAY_TYPE = 'heavy_hydrogen_buffer' PATH = os.path.join(BASE_PATH, ASSAY_TYPE) raw_data_paths = util.list_data_paths(PATH, 'Full') LENGTH_RAW = len(raw_data_paths) # Create a new directory that will contain data for the python scripts. print '\rCreating python_data directories.' for i,raw_data_file in enumerate(raw_data_paths): util.create_python_data_directory(raw_data_file) percent = float(i + 1)/LENGTH_RAW*100 util.progress_bar(percent) # Create a copy of the raw data generated by LabVIEW into the python_data # directory. print '\rCopying raw data files.' for i,raw_data_file in enumerate(raw_data_paths): util.copy_raw_data(raw_data_file) percent = float(i + 1)/LENGTH_RAW*100 util.progress_bar(percent) # Smooth the raw x and y position data. print '\rSmoothing data and saving it to the python_data directory.' for i,raw_data_file in enumerate(raw_data_paths): util.smooth_raw_data(raw_data_file) percent = float(i + 1)/LENGTH_RAW*100 util.progress_bar(percent)