def test_minimal_required_args(self): """Verifies it succeeds if all required args are present.""" image_name = 'my-image' customization_script = '/tmp/my-script.sh' daisy_path = '/opt/daisy' zone = 'us-west1-a' gcs_bucket = 'gs://my-bucket' args = args_parser.parse_args([ '--image-name', image_name, '--customization-script', customization_script, '--daisy-path', daisy_path, '--zone', zone, '--gcs-bucket', gcs_bucket ]) expected_result = self._make_expected_result( base_image_uri="None", customization_script="'{}'".format(customization_script), daisy_path="'{}'".format(daisy_path), dataproc_version="None", disk_size="15", extra_sources="{}", family="'dataproc-custom-image'", gcs_bucket="'{}'".format(gcs_bucket), image_name="'{}'".format(image_name), machine_type="'n1-standard-1'", network="'{}'".format(''), no_smoke_test="False", oauth="None", project_id="None", service_account="'default'", shutdown_instance_timer_sec="300", subnetwork="''", zone="'{}'".format(zone)) self.assertEqual(str(args), expected_result)
def parse_args(raw_args): """Parses and infers command line arguments.""" args = args_parser.parse_args(raw_args) _LOG.info("Parsed args: {}".format(args)) infer_args(args) _LOG.info("Inferred args: {}".format(args)) return args
def main(): args = parse_args() data = Data(np.array([]), args) data.open_image() data = process(data) data.save_image()
def eval_contrastive(model, mode="val", batch_size=8): args = parse_args() encoding1 = pickle.load( open( "data/{}/BERTContrastiveEncodings1_{}.pkl".format( args.data_type, mode), 'rb')).to(device) encoding2 = pickle.load( open( "data/{}/BERTContrastiveEncodings2_{}.pkl".format( args.data_type, mode), 'rb')).to(device) labels = pickle.load( open( "data/{}/BERTContrastiveLabels_{}.pkl".format( args.data_type, mode), 'rb')) test_dataset = TensorDataset(encoding1['input_ids'], encoding1['token_type_ids'], encoding1['attention_mask'], encoding2['input_ids'], encoding2['token_type_ids'], encoding2['attention_mask'], labels) sampler = RandomSampler(test_dataset) test_dataloader = DataLoader(test_dataset, sampler=sampler, batch_size=batch_size) model = model.eval() predictions = [] print('......................{} summary...................'.format(mode)) with torch.no_grad(): for input_ids1, _, attention_mask1, input_ids2, _, attention_mask2, labels_train in test_dataloader: emd1 = model(input_ids1, attention_mask1) emd2 = model(input_ids2, attention_mask2) #print(emd1, emd2) cosine_sim = torch.nn.functional.cosine_similarity( emd1, emd2, dim=1).cpu().detach().numpy() #print("Cosine sim", cosine_sim) cosine_sim[cosine_sim > 0.5] = 1 cosine_sim[cosine_sim <= 0.5] = 0 predictions += list(cosine_sim) #print("predictions", predictions) #print("labels", labels.numpy()[:16]) #print("Predictions shape:", len(predictions)) #print("Labels shape:", labels.size()) precision, recall, fscore, _ = score(labels.numpy(), np.asarray(predictions).reshape( -1, 1), average='macro') print(classification_report(labels.numpy(), predictions)) sys.stdout.flush() return fscore
def eval_classification(model, mode="val", batch_size=8): args = parse_args() encodings = pickle.load( open( "data/{}/BERTClassificationEncodings_{}.pkl".format( args.data_type, mode), 'rb')).to(device) labels = pickle.load( open( "data/{}/BERTClassificationLabels_{}.pkl".format( args.data_type, mode), 'rb')).to(device).long() test_dataset = TensorDataset(encodings['input_ids'], encodings['token_type_ids'], encodings['attention_mask'], labels) sampler = RandomSampler(test_dataset) test_dataloader = DataLoader(test_dataset, sampler=sampler, batch_size=batch_size) model = model.eval() preds = [] print('......................{} summary...................'.format(mode)) with torch.no_grad(): for input_ids, _, attention_mask, val_labels in test_dataloader: #print("input ids", input_ids) #print("attention masks", attention_mask) loss, logits = model(input_ids, attention_mask, val_labels) #print("logits", logits) preds += list(torch.argmax(logits, dim=1).cpu().detach().numpy()) #print("preds", preds) preds = np.asarray(preds) preds = preds.reshape(-1, 1) #print(preds) print("----------------------------------------------") #print(labels) labels = labels.cpu().detach().numpy() correct = (preds == labels) print('ACCURACY ================= ', correct.sum() / preds.shape[0]) precision, recall, fscore, _ = score(labels, preds, average='macro') print(classification_report(labels, preds)) sys.stdout.flush() return fscore
model_vars, model_metrics, model_losses, model_collections = get_model( u, p, params={ 'ARGS': ARGS, 'snr_legit': snr_legit, 'snr_adv': snr_adv }) session = tf.Session(config=session_config) saver = tf.compat.v1.train.Saver() save_path = save_dir + sim_slug + "/model.ckpt" if os.path.exists(save_path + ".index"): saver.restore(session, save_path) print("Model restored from: ", save_path) test_results = test(session) append_results(test_results) write_results() else: print("ERROR: Model not found.") else: print("Error: the only available options are 'train' or 'test'.") if __name__ == '__main__': ARGS = args_parser.parse_args(DATASETS) run_main(ARGS) print('SUCCESS: Program ended correctly.')
import torch from model_helper import Phase import model_helper as mh import args_parser if __name__ == '__main__': # Parse Argument args = args_parser.parse_args(Phase.train) print(args) # Use GPU if it's available device = torch.device( "cuda" if torch.cuda.is_available() and args.gpu else "cpu") print('### Using device: ', device) # Loading the data print('### Loading data') train_dataset, trainloader, train_transforms = mh.load_data(Phase.train) valid_dataset, validloader, valid_transforms = mh.load_data(Phase.valid) test_dataset, testloader, test_transforms = mh.load_data(Phase.test) # Building Model print('### Building the model') arch = args.arch nHiddens = args.hidden_units nOutputs = 102 pDropout = 0.2 lr = args.learning_rate model, optimizer, criterion = mh.build_model(arch, nHiddens=nHiddens, nOutputs=nOutputs,
def test_optional_args(self): """Verifies it succeeds with optional arguments specified.""" image_name = 'my-image' customization_script = '/tmp/my-script.sh' daisy_path = '/opt/daisy' zone = 'us-west1-a' gcs_bucket = 'gs://my-bucket' dataproc_version = '1.4.5-debian9' project_id = 'my-project' oauth = 'xyz' family = 'debian9' machine_type = 'n1-standard-4' disk_size = 40 network = 'my-network' subnetwork = 'my-subnetwork' no_external_ip = True no_smoke_test = True dry_run = True service_account = "my-service-account" shutdown_instance_timer_sec = 567 args = args_parser.parse_args([ '--customization-script', customization_script, '--daisy-path', daisy_path, '--dataproc-version', dataproc_version, '--disk-size', str(disk_size), '--dry-run', '--family', family, '--gcs-bucket', gcs_bucket, '--image-name', image_name, '--machine-type', machine_type, '--network', network, '--no-external-ip', '--no-smoke-test', '--oauth', oauth, '--project-id', project_id, '--service-account', service_account, '--shutdown-instance-timer-sec', str(shutdown_instance_timer_sec), '--subnetwork', subnetwork, '--zone', zone, ]) expected_result = self._make_expected_result( base_image_uri="None", customization_script="'{}'".format(customization_script), daisy_path="'{}'".format(daisy_path), dataproc_version="'{}'".format(dataproc_version), disk_size="{}".format(disk_size), dry_run="{}".format(dry_run), extra_sources="{}", family="'{}'".format(family), gcs_bucket="'{}'".format(gcs_bucket), image_name="'{}'".format(image_name), machine_type="'{}'".format(machine_type), network="'{}'".format(network), no_external_ip="{}".format(no_external_ip), no_smoke_test="{}".format(no_smoke_test), oauth="'{}'".format(oauth), project_id="'{}'".format(project_id), service_account="'{}'".format(service_account), shutdown_instance_timer_sec="{}".format(shutdown_instance_timer_sec), subnetwork="'{}'".format(subnetwork), zone="'{}'".format(zone) ) self.assertEqual(str(args), expected_result)
def test_missing_required_args(self): """Verifies it fails if missing required args.""" with self.assertRaises(SystemExit) as e: args_parser.parse_args([])
def run_with_args(args): print('Doing test run using args "%s"' % args) parsed_args = parse_args(args) process_inputs(parsed_args)
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import random import numpy as np from client import Client from server import Server from utils import get_dataset, plot_metric, prepare_output_dir from utils import print_configuration, save_configuration from args_parser import parse_args if __name__ == '__main__': args = parse_args(is_federated=True) if args.seed: random.seed(int(args.seed)) np.random.RandomState(int(args.seed)) output_dir = prepare_output_dir() train_dataset, train_dataset_labels, clients_groups = get_dataset(args) print_configuration(args, train_dataset, True) save_configuration(args, train_dataset, output_dir, True) # Prepare clients clients = {} for idx_client in range(args.K): clients[idx_client] = Client(idx_client, train_dataset, clients_groups[idx_client])
import torch import json from model_helper import Phase import model_helper as mh import args_parser if __name__ == '__main__': # Parse Argument args = args_parser.parse_args(Phase.valid) print(args) # Use GPU if it's available device = torch.device( "cuda" if torch.cuda.is_available() and args.gpu else "cpu") print('### Using device: ', device) # Label Mapping with open(args.category_names, 'r') as f: cat_to_name = json.load(f) # Loading the model probs, classes = mh.predict_from_disk(device, args.image_path, args.checkpoint, args.top_k) flowers_names = [cat_to_name[cat] for cat in classes] print() print("PROBABILITY FLOWER NAME") print("=========== ====================================") for i in range(args.top_k): print('{:5.2f}% {}'.format(probs[0][i] * 100, flowers_names[i].upper()))
def main(args=None): if args is None: args = sys.argv[1:] args = parse_args(args) generator = SAIL_inference_datagenerator(base_data_path = args.base_data_directory, interpolation_constants_directory=args.interpolation_constants_directory) # examples = [] # data_fnames = [] # sat_labels = [] # datetimes = [] # for i in range(args.batch_size): keras.backend.set_session(get_session()) model = models.load_model(os.path.abspath(args.model_snapshot), backbone_name=args.backbone) model = models.convert_model(model) snapshots_processing_delta_t = np.zeros(len(generator), dtype=np.float64) for datafile_idx in range(len(generator)): start_time = time.time() if 'prev_start_time' in locals(): delta_t = start_time - prev_start_time snapshots_processing_delta_t[datafile_idx-1] = delta_t print('prev. snapshot processed in %f s; estimated processing time: %f s' % (delta_t, ((len(generator)-(datafile_idx+1)) * np.mean(snapshots_processing_delta_t[snapshots_processing_delta_t > 0.1])))) prev_start_time = start_time else: prev_start_time = start_time curr_fname = generator.data_fnames[datafile_idx] curr_fname_basename = os.path.basename(curr_fname) reex = '.+(MSG\d).+(\d{14})\.nc' match = re.match(reex, curr_fname_basename) sat_label = match.groups()[0] if sat_label == 'MSG1': continue dt_str = match.groups()[1] dt = datetime.strptime(dt_str, "%Y%m%d%H%M%S") curr_snapshot_results_filename = os.path.join(args.output_directory, datetime.strftime(dt, "%Y%m%d"), '%s_%s_p%s.pkl' % (sat_label, datetime.strftime(dt, "%Y%m%d%H%M%S"), ('%.5f' % args.proba_threshold).replace('.', '_'))) curr_snapshot_vis_plot_filename = os.path.join(args.output_directory, datetime.strftime(dt, "%Y%m%d"), '%s_%s_p%s.png' % (sat_label, datetime.strftime(dt, "%Y%m%d%H%M%S"), ('%.5f' % args.proba_threshold).replace('.', '_'))) if os.path.exists(curr_snapshot_results_filename): generator.current += 1 print('this file has been already processed earlier. Skipping.') continue example,shared_mask,crops,masks,data_fname,dt,crop_bboxes,sat_label = next(generator) print('%s : processing file %d of %d: %s' % (str(start_time), datafile_idx+1, len(generator), data_fname)) # examples.append(crops) # data_fnames.append(data_fname) # sat_labels.append(sat_label) # datetimes.append(dt) # examples = np.concatenate(examples, axis=0) #region debug_plot # crop_ch5_normed = example[0, :, :, 0] # # f = plt.figure(figsize=(6,6), dpi=300) # im = plt.imshow(scale_ch5_back(crop_ch5_normed), cmap=cmap_ch5, vmin=200., vmax=320.) # for idx in range(len(crop_bboxes)): # x1,y1,x2,y2 = crop_bboxes[idx] # # p = plt.subplot(3, 3, idx+1) # # ax = plt.gca() # _ = plt.plot([x1,x1,x2,x2,x1], [y1,y2,y2,y1,y1], color='green') # # _ = plt.axis('off') # # _ = plt.title(str(datetimes[idx])) # plt.show() #endregion debug_plot curr_example_batch_generator = SAIL_batches_generator(crops, batch_size=args.batch_size) detected_boxes_per_crop = [] scores_per_crop = [] for batch_idx in range(len(curr_example_batch_generator)): images_batch, scales = next(curr_example_batch_generator) # prediction! boxes, scores, pred_labels = model.predict_on_batch(images_batch) boxes = [np.array([box for box in curr_boxes if np.square(box - np.array([-1., -1., -1., -1.])).sum() > 0.]) for curr_boxes in boxes] scores = [np.array([sc for sc in curr_scores if sc > -1.]) for curr_scores in scores] detected_boxes_per_crop = detected_boxes_per_crop + boxes scores_per_crop = scores_per_crop + scores if len(detected_boxes_per_crop) == 0: continue # translate these labels bboxes translated_detected_boxes_per_crop = [[box + np.array([l, b, l, b]) for box in curr_boxes] for (curr_boxes, (l, b, r, t)) in zip(detected_boxes_per_crop, crop_bboxes)] # flat this list translated_detected_boxes_per_crop = [box[np.newaxis, :] for boxes_of_crop in translated_detected_boxes_per_crop for box in boxes_of_crop] if len(translated_detected_boxes_per_crop) == 0: continue # concat to one array translated_detected_boxes_per_crop_flat = np.concatenate(translated_detected_boxes_per_crop, axis=0) # concat scores to one array scores_per_crop_flat = np.concatenate(scores_per_crop) # indices1 = np.where(scores_per_crop_flat<1.)[0] # translated_detected_boxes_per_crop_flat = translated_detected_boxes_per_crop_flat[indices1] # scores_per_crop_flat = scores_per_crop_flat[indices1] selected_indices = np.where((scores_per_crop_flat >= args.proba_threshold) & (scores_per_crop_flat<1.))[0] if len(selected_indices) > 30: print('adjusting proba_thresh...') curr_thresh = args.proba_threshold failed_searching_suitable_threshold = False while np.sum((scores_per_crop_flat >= curr_thresh)&(scores_per_crop_flat<1.)) > 30: curr_thresh = (1.- 0.98*(1-curr_thresh)) print('%f : %d bboxes' % (curr_thresh, np.sum((scores_per_crop_flat >= curr_thresh)&(scores_per_crop_flat<1.)))) if ((np.abs(curr_thresh-1.)<1.e-3) & (np.sum((scores_per_crop_flat >= curr_thresh)&(scores_per_crop_flat<1.)) > 30)): failed_searching_suitable_threshold = True break if failed_searching_suitable_threshold: print('failed searching suitable threshold. !!! Skipping this example !!!') continue selected_indices = np.where((scores_per_crop_flat >= curr_thresh)&(scores_per_crop_flat<1.))[0] translated_detected_boxes_per_crop_flat_filtered = translated_detected_boxes_per_crop_flat[selected_indices] scores_per_crop_flat_filtered = scores_per_crop_flat[selected_indices] translated_detected_boxes_shrinked = np.copy(translated_detected_boxes_per_crop_flat_filtered) scores_shrinked = np.copy(scores_per_crop_flat_filtered) with tqdm(np.arange(len(translated_detected_boxes_per_crop_flat_filtered))) as prbr: while True: if translated_detected_boxes_shrinked.shape[0] < 2: break item_to_exclude = exclude_redundant_labelbbox_pair(translated_detected_boxes_shrinked, iou_threshold=args.shrinking_iou_threshold) if item_to_exclude is None: break translated_detected_boxes_shrinked = np.array([translated_detected_boxes_shrinked[i] for i in range(translated_detected_boxes_shrinked.shape[0]) if i != item_to_exclude]) scores_shrinked = np.array([scores_shrinked[i] for i in range(scores_shrinked.shape[0]) if i != item_to_exclude]) prbr.update(1) curr_snapshot_detected_data_dict = {'data_fname': data_fname, 'sat_label': sat_label, 'dt': dt, 'proba_threshold': args.proba_threshold, 'shrinking_iou_threshold': args.shrinking_iou_threshold, 'scores_shrinked': scores_shrinked, 'translated_detected_boxes_shrinked': translated_detected_boxes_shrinked, 'projection_shape': example.shape, 'retinanet_backbone': args.backbone, 'retinanet_snapshot_file': args.model_snapshot} EnsureDirectoryExists(os.path.dirname(curr_snapshot_results_filename)) with open(curr_snapshot_results_filename, 'wb') as f: pickle.dump(curr_snapshot_detected_data_dict, f) crop_ch5_normed = example[0, :, :, 0] crop_ch5_normed = np.ma.asarray(crop_ch5_normed) crop_ch5_normed.mask = shared_mask.astype(np.bool) crop_ch9_normed = example[0, :, :, 1] crop_ch9_normed = np.ma.asarray(crop_ch9_normed) crop_ch9_normed.mask = shared_mask.astype(np.bool) crop_btd_normed = example[0, :, :, 2] crop_btd_normed = np.ma.asarray(crop_btd_normed) crop_btd_normed.mask = shared_mask.astype(np.bool) #region debug_plot f = plt.figure(figsize=(8, 8), dpi=300) p = plt.subplot(2, 2, 1) ax = plt.gca() im = plt.imshow(scale_ch5_back(crop_ch5_normed), cmap=cmap_ch5, vmin=200., vmax=320.) for box, score in zip(translated_detected_boxes_shrinked, scores_shrinked): (bbox_x1, bbox_y1, bbox_x2, bbox_y2) = box.astype(int) plt.plot([bbox_x1, bbox_x1, bbox_x2, bbox_x2, bbox_x1], [bbox_y1, bbox_y2, bbox_y2, bbox_y1, bbox_y1], color='green', linewidth=0.5) plt.text(bbox_x2, bbox_y2 + 4, '%.3f' % score, fontsize=6, color='magenta') _ = plt.axis('off') divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = plt.colorbar(im, cax=cax) ax.set_title('ch5, K') p = plt.subplot(2, 2, 2) ax = plt.gca() im = plt.imshow(scale_ch9_back(crop_ch9_normed), cmap=cmap_ch9, vmin=200., vmax=320.) for box, score in zip(translated_detected_boxes_shrinked, scores_shrinked): (bbox_x1, bbox_y1, bbox_x2, bbox_y2) = box.astype(int) plt.plot([bbox_x1, bbox_x1, bbox_x2, bbox_x2, bbox_x1], [bbox_y1, bbox_y2, bbox_y2, bbox_y1, bbox_y1], color='green', linewidth=0.5) plt.text(bbox_x1, bbox_y1, '%.3f' % score, fontsize=6, color='magenta') _ = plt.axis('off') divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = plt.colorbar(im, cax=cax) # cbar.set_label('ch5, K', rotation=270) ax.set_title('ch9, K') p = plt.subplot(2, 2, 3) ax = plt.gca() # im = plt.imshow(scale_btd_back(crop_btd_normed), cmap='jet', vmin=scale_btd_back(btd_thresh)) im = plt.imshow(scale_btd_back(crop_btd_normed), cmap=cmap_btd, vmin=-80., vmax=3.3) for box, score in zip(translated_detected_boxes_shrinked, scores_shrinked): (bbox_x1, bbox_y1, bbox_x2, bbox_y2) = box.astype(int) plt.plot([bbox_x1, bbox_x1, bbox_x2, bbox_x2, bbox_x1], [bbox_y1, bbox_y2, bbox_y2, bbox_y1, bbox_y1], color='green', linewidth=0.5) plt.text(bbox_x1, bbox_y1, '%.3f' % score, fontsize=6, color='magenta') _ = plt.axis('off') divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = plt.colorbar(im, cax=cax) # cbar.set_label('ch5, K', rotation=270) ax.set_title('BTD, K') # _ = plt.show() plt.tight_layout() plt.savefig(curr_snapshot_vis_plot_filename, dpi=300, pad_inches=0) plt.close()
def main(): """IPS Patcher main function.""" args = parse_args() commands = {c.__name__: c for c in (apply, create)} commands[args.command](args)
def main(): args = parse_args() storage = Storage(args.storage) if args.command == "init": try: storage.init() return except StorageInitError as error: sys.stdout.write(error.text) sys.exit(STORAGE_INIT_ERROR) check_path(args.storage) with storage: if args.command == "add": if len(args.items) % 2 != 0: # Выкидывать ошибку return pair = list() for item in args.items: pair.append(item) if len(pair) == 2: value = pair.pop() key = pair.pop() try: key = int(key) except ValueError: pass try: value = int(value) except ValueError: pass storage[key] = value elif args.command == "get": for key in args.keys: try: key = int(key) except ValueError: pass value = storage[key] print(value, file=sys.stdout) elif args.command == "del": for key in args.keys: del storage[key] elif args.command == "exist": for key in args.keys: try: key = int(key) except ValueError: pass exist = key in storage print(exist) elif args.command == "keys": for key in storage: print(key, end=" ") elif args.command == "values": for key in storage: value = storage[key] print(value, end=" ")
for i in range(last, size - last): if pieces[i] == 0: q.put(queue_data(i, rs[i])) multi_thread.start(num, q, download_piece) print('\ndone.') os.remove(save_dir + ovd_file) if __name__ == '__main__': signal.signal(signal.SIGINT, quit_all) signal.signal(signal.SIGTERM, quit_all) arg = ap.parse_args() st_dir = arg[ap.ST_DIR] st_num = arg[ap.ST_NUM] setting_flag = st_dir or st_num if setting_flag is None and len(arg[ap.URL]) == 0: print('use \'python app.py -h\' or view README to get help') exit(-1) if setting_flag is not None: if st_dir is not None: cp.set_item(cp.DIR, st_dir) if st_num is not None: cp.set_item(cp.NUM, st_num) if len(arg[ap.URL]) > 0:
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import random import numpy as np from utils import get_dataset, plot_metric, prepare_output_dir from utils import print_configuration, save_configuration from args_parser import parse_args from gmm import GaussianMixture if __name__ == '__main__': args = parse_args(is_federated=False) if args.seed: random.seed(int(args.seed)) np.random.RandomState(int(args.seed)) output_dir = prepare_output_dir() train_dataset, train_dataset_labels, _ = get_dataset(args) print_configuration(args, train_dataset, False) save_configuration(args, train_dataset, output_dir, False) # Init the Gaussian Mixture Model seed = None if args.seed: seed = (int(args.seed)) # Prepare server --> init_dataset is given by 0.5% of the train_dataset randomly sampled # init_dataset_size = int(train_dataset.shape[0] * 0.005)
def run(): """Generate custom image.""" args = args_parser.parse_args(sys.argv[1:]) # get dataproc base image from dataproc version project_id = get_project_id() if not args.project_id else args.project_id _LOG.info("Getting Dataproc base image name...") parsed_image_version = False if args.base_image_uri: dataproc_base_image = get_partial_image_uri(args.base_image_uri) dataproc_version = get_dataproc_image_version(args.base_image_uri) parsed_image_version = True else: dataproc_base_image = get_dataproc_base_image(args.dataproc_version) dataproc_version = args.dataproc_version _LOG.info("Returned Dataproc base image: %s", dataproc_base_image) run_script_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), "run.sh") oauth = "" if args.oauth: oauth = "\n \"OAuthPath\": \"{}\",".format( os.path.abspath(args.oauth)) daisy_sources = { "run.sh": run_script_path, "init_actions.sh": os.path.abspath(args.customization_script) } daisy_sources.update(args.extra_sources) sources = ",\n".join(["\"{}\": \"{}\"".format(source, path) for source, path in daisy_sources.items()]) network = args.network # When the user wants to create a VM in a shared VPC, # only the subnetwork argument has to be provided whereas # the network one has to be left empty. if not args.network and not args.subnetwork: network = 'global/networks/default' # create daisy workflow _LOG.info("Created Daisy workflow...") workflow = constants.daisy_wf.format( image_name=args.image_name, project_id=project_id, sources=sources, zone=args.zone, oauth=oauth, gcs_bucket=args.gcs_bucket, family=args.family, dataproc_base_image=dataproc_base_image, machine_type=args.machine_type, network=network, subnetwork=args.subnetwork, service_account=args.service_account, disk_size=args.disk_size, shutdown_timer_in_sec=args.shutdown_instance_timer_sec) _LOG.info("Successfully created Daisy workflow...") # run daisy to build custom image _LOG.info("Creating custom image with Daisy workflow...") run_daisy(os.path.abspath(args.daisy_path), workflow) _LOG.info("Successfully created custom image with Daisy workflow...") # set custom image label _LOG.info("Setting label on custom image...") set_custom_image_label(args.image_name, dataproc_version, project_id, parsed_image_version) _LOG.info("Successfully set label on custom image...") # perform test on the newly built image if not args.no_smoke_test: _LOG.info("Verifying the custom image...") verify_custom_image( args.image_name, project_id, args.zone, network, args.subnetwork) _LOG.info("Successfully verified the custom image...") _LOG.info("Successfully built Dataproc custom image: %s", args.image_name) # notify when the image will expire. creation_date = _parse_date_time( get_custom_image_creation_timestamp(args.image_name, project_id)) expiration_date = creation_date + datetime.timedelta(days=60) _LOG.info( constants.notify_expiration_text.format(args.image_name, str(expiration_date)))
from multiprocessing import Pool from args_parser import parse_args from http_parser.master_parser import MasterParser from tools.general import create_dir, text_file_to_set, get_url_slug_tuples def download(info): filename, url = info MasterParser.parse(url, OUTPUT_DIR, filename) def main(txt_file_path, num_workers): links = text_file_to_set(txt_file_path) try: filenames_urls = get_url_slug_tuples(links) except NotImplementedError: indices_as_strings = map(str, range(len(links))) filenames_urls = zip(indices_as_strings, links) with Pool(num_workers) as p: p.map(download, filenames_urls) if __name__ == '__main__': args = parse_args() OUTPUT_DIR = args.output_dir create_dir(OUTPUT_DIR) main(args.input, args.workers)
def human_eval_ranking(model, mode="val", batch_size=8, rows=100): args = parse_args() ''' encoding1 = pickle.load(open("data/{}/BERTContrastiveEncodings1_{}.pkl".format(args.data_type, mode), 'rb')).to(device) encoding2 = pickle.load(open("data/{}/BERTContrastiveEncodings2_{}.pkl".format(args.data_type, mode), 'rb')).to(device) labels = pickle.load(open("data/{}/BERTContrastiveLabels_{}.pkl".format(args.data_type, mode), 'rb')) test_dataset = TensorDataset(encoding1['input_ids'], encoding1['token_type_ids'], encoding1['attention_mask'], encoding2['input_ids'], encoding2['token_type_ids'], encoding2['attention_mask'], labels) sampler = RandomSampler(test_dataset) test_dataloader = DataLoader(test_dataset, sampler=sampler, batch_size=batch_size) ''' tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') df = pd.read_csv('error_analysis/test_human_eval.csv', encoding='latin-1') df.dropna(inplace=True) print(df.head()) abstract1 = list(df['paperAbstract1']) abstract2 = list(df['paperAbstract2']) encoding1 = tokenizer(abstract1, padding=True, truncation=True, return_tensors="pt").to(device) encoding2 = tokenizer(abstract2, padding=True, truncation=True, return_tensors="pt").to(device) labels = torch.tensor(list(df['label'])).unsqueeze(dim=1).long().to(device) test_dataset = TensorDataset(encoding1['input_ids'], encoding1['token_type_ids'], encoding1['attention_mask'], encoding2['input_ids'], encoding2['token_type_ids'], encoding2['attention_mask'], labels) #sampler = RandomSampler(test_dataset) test_dataloader = DataLoader(test_dataset, batch_size=batch_size) model = model.eval() predictions = [] similarity = [] model_name_or_path = "bert-base-cased" tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) print('......................{} summary...................'.format(mode)) with torch.no_grad(): for input_ids1, _, attention_mask1, input_ids2, _, attention_mask2, labels_train in test_dataloader: emd1 = model(input_ids1, attention_mask1) emd2 = model(input_ids2, attention_mask2) #print(emd1, emd2) cosine_sim = torch.nn.functional.cosine_similarity( emd1, emd2, dim=1).cpu().detach().numpy() #print("Cosine sim", cosine_sim) similarity += list(cosine_sim) cosine_sim[cosine_sim > 0.9] = 1 cosine_sim[cosine_sim <= 0.9] = 0 predictions += list(cosine_sim) ''' for outer in range(input_ids1.shape[0]): temp_abstract1 = "" for inner in range(input_ids1.shape[1]): temp_abstract1 += tokenizer.decode(input_ids1[outer][inner]) abstract1.append(temp_abstract1) for outer in range(input_ids2.shape[0]): temp_abstract2 = "" for inner in range(input_ids2.shape[1]): temp_abstract2 += tokenizer.decode(input_ids2[outer][inner]) abstract2.append(temp_abstract2) ''' #print("predictions", predictions) #print("labels", labels.numpy()[:16]) #print("Predictions shape:", len(predictions)) #print("Labels shape:", labels.size()) labels = labels.cpu().detach().numpy() precision, recall, fscore, _ = score(labels, np.asarray(predictions).reshape( -1, 1), average='macro') print(classification_report(labels, predictions)) sys.stdout.flush() save_to_csv( cosine_sim.reshape(-1)[:rows], labels.reshape(-1)[:rows], np.asarray(abstract1).reshape(-1)[:rows], np.asarray(abstract2).reshape(-1)[:rows], np.asarray(similarity).reshape(-1)[:rows], "Contrastive") return fscore
def human_eval_classification(model, mode="val", batch_size=8, rows=100): args = parse_args() tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') df = pd.read_csv('error_analysis/test_human_eval.csv', encoding='latin-1') df.dropna(inplace=True) print(df.head()) abstract1 = list(df['paperAbstract1']) abstract2 = list(df['paperAbstract2']) # encodings = pickle.load(open("data/{}/BERTClassificationEncodings_{}.pkl".format(args.data_type, mode), 'rb')).to(device) # labels = pickle.load(open("data/{}/BERTClassificationLabels_{}.pkl".format(args.data_type, mode), 'rb')).to(device).long() encodings = tokenizer(abstract1, abstract2, padding=True, truncation=True, return_tensors="pt").to(device) labels = torch.tensor(list(df['label'])).unsqueeze(dim=1).long().to(device) test_dataset = TensorDataset(encodings['input_ids'], encodings['token_type_ids'], encodings['attention_mask'], labels) #sampler = RandomSampler(test_dataset) test_dataloader = DataLoader(test_dataset, batch_size=batch_size) model = model.eval() preds = [] probability = [] model_name_or_path = "bert-base-cased" tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) print('......................{} summary...................'.format(mode)) with torch.no_grad(): for input_ids, _, attention_mask, val_labels in test_dataloader: #print("input ids", input_ids) #print("attention masks", attention_mask) loss, logits = model(input_ids, attention_mask, val_labels) #for outer in range(input_ids.shape[0]): #sep = False #temp_abstract1 = "" #temp_abstract2 = "" #for inner in range(input_ids.shape[1]): # if input_ids[outer][inner] == tokenizer.sep_token_id: # sep = True #elif not sep: # temp_abstract1 += tokenizer.decode(input_ids[outer][inner]) # elif sep: # temp_abstract2 += tokenizer.decode(input_ids[outer][inner]) #abstract1.append(temp_abstract1) #abstract2.append(temp_abstract2) print("logits", logits) print(logits.size()) probability += list( torch.nn.functional.softmax(logits, dim=1)[:, 1].cpu().detach().numpy()) preds += list(torch.argmax(logits, dim=1).cpu().detach().numpy()) #print("preds", preds) preds = np.asarray(preds) preds = preds.reshape(-1, 1) #print(preds) print("----------------------------------------------") #print(labels) labels = labels.cpu().detach().numpy() correct = (preds == labels) print('ACCURACY ================= ', correct.sum() / preds.shape[0]) precision, recall, fscore, _ = score(labels, preds, average='macro') print(classification_report(labels, preds)) sys.stdout.flush() save_to_csv( preds.reshape(-1)[:rows], labels.reshape(-1)[:rows], np.asarray(abstract1).reshape(-1)[:rows], np.asarray(abstract2).reshape(-1)[:rows], np.asarray(probability).reshape(-1)[:rows]) print(fscore) return fscore
from do_tournament import do_tournament from args_parser import parse_args import sys if __name__ == "__main__": do_tournament(*parse_args(sys.argv))