示例#1
0
    def test_minimal_required_args(self):
        """Verifies it succeeds if all required args are present."""
        image_name = 'my-image'
        customization_script = '/tmp/my-script.sh'
        daisy_path = '/opt/daisy'
        zone = 'us-west1-a'
        gcs_bucket = 'gs://my-bucket'

        args = args_parser.parse_args([
            '--image-name', image_name, '--customization-script',
            customization_script, '--daisy-path', daisy_path, '--zone', zone,
            '--gcs-bucket', gcs_bucket
        ])

        expected_result = self._make_expected_result(
            base_image_uri="None",
            customization_script="'{}'".format(customization_script),
            daisy_path="'{}'".format(daisy_path),
            dataproc_version="None",
            disk_size="15",
            extra_sources="{}",
            family="'dataproc-custom-image'",
            gcs_bucket="'{}'".format(gcs_bucket),
            image_name="'{}'".format(image_name),
            machine_type="'n1-standard-1'",
            network="'{}'".format(''),
            no_smoke_test="False",
            oauth="None",
            project_id="None",
            service_account="'default'",
            shutdown_instance_timer_sec="300",
            subnetwork="''",
            zone="'{}'".format(zone))
        self.assertEqual(str(args), expected_result)
示例#2
0
def parse_args(raw_args):
  """Parses and infers command line arguments."""

  args = args_parser.parse_args(raw_args)
  _LOG.info("Parsed args: {}".format(args))
  infer_args(args)
  _LOG.info("Inferred args: {}".format(args))
  return args
示例#3
0
def main():
    args = parse_args()

    data = Data(np.array([]), args)
    data.open_image()

    data = process(data)

    data.save_image()
示例#4
0
def eval_contrastive(model, mode="val", batch_size=8):
    args = parse_args()
    encoding1 = pickle.load(
        open(
            "data/{}/BERTContrastiveEncodings1_{}.pkl".format(
                args.data_type, mode), 'rb')).to(device)
    encoding2 = pickle.load(
        open(
            "data/{}/BERTContrastiveEncodings2_{}.pkl".format(
                args.data_type, mode), 'rb')).to(device)
    labels = pickle.load(
        open(
            "data/{}/BERTContrastiveLabels_{}.pkl".format(
                args.data_type, mode), 'rb'))
    test_dataset = TensorDataset(encoding1['input_ids'],
                                 encoding1['token_type_ids'],
                                 encoding1['attention_mask'],
                                 encoding2['input_ids'],
                                 encoding2['token_type_ids'],
                                 encoding2['attention_mask'], labels)
    sampler = RandomSampler(test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=sampler,
                                 batch_size=batch_size)
    model = model.eval()
    predictions = []

    print('......................{} summary...................'.format(mode))
    with torch.no_grad():
        for input_ids1, _, attention_mask1, input_ids2, _, attention_mask2, labels_train in test_dataloader:
            emd1 = model(input_ids1, attention_mask1)
            emd2 = model(input_ids2, attention_mask2)
            #print(emd1, emd2)
            cosine_sim = torch.nn.functional.cosine_similarity(
                emd1, emd2, dim=1).cpu().detach().numpy()
            #print("Cosine sim", cosine_sim)
            cosine_sim[cosine_sim > 0.5] = 1
            cosine_sim[cosine_sim <= 0.5] = 0
            predictions += list(cosine_sim)
            #print("predictions", predictions)
            #print("labels", labels.numpy()[:16])
    #print("Predictions shape:", len(predictions))
    #print("Labels shape:", labels.size())
    precision, recall, fscore, _ = score(labels.numpy(),
                                         np.asarray(predictions).reshape(
                                             -1, 1),
                                         average='macro')
    print(classification_report(labels.numpy(), predictions))
    sys.stdout.flush()
    return fscore
示例#5
0
def eval_classification(model, mode="val", batch_size=8):
    args = parse_args()
    encodings = pickle.load(
        open(
            "data/{}/BERTClassificationEncodings_{}.pkl".format(
                args.data_type, mode), 'rb')).to(device)
    labels = pickle.load(
        open(
            "data/{}/BERTClassificationLabels_{}.pkl".format(
                args.data_type, mode), 'rb')).to(device).long()
    test_dataset = TensorDataset(encodings['input_ids'],
                                 encodings['token_type_ids'],
                                 encodings['attention_mask'], labels)
    sampler = RandomSampler(test_dataset)
    test_dataloader = DataLoader(test_dataset,
                                 sampler=sampler,
                                 batch_size=batch_size)
    model = model.eval()
    preds = []
    print('......................{} summary...................'.format(mode))
    with torch.no_grad():
        for input_ids, _, attention_mask, val_labels in test_dataloader:
            #print("input ids", input_ids)
            #print("attention masks", attention_mask)
            loss, logits = model(input_ids, attention_mask, val_labels)
            #print("logits", logits)
            preds += list(torch.argmax(logits, dim=1).cpu().detach().numpy())
            #print("preds", preds)
    preds = np.asarray(preds)
    preds = preds.reshape(-1, 1)
    #print(preds)
    print("----------------------------------------------")
    #print(labels)
    labels = labels.cpu().detach().numpy()
    correct = (preds == labels)
    print('ACCURACY ================= ', correct.sum() / preds.shape[0])
    precision, recall, fscore, _ = score(labels, preds, average='macro')
    print(classification_report(labels, preds))
    sys.stdout.flush()
    return fscore
示例#6
0
                model_vars, model_metrics, model_losses, model_collections = get_model(
                    u,
                    p,
                    params={
                        'ARGS': ARGS,
                        'snr_legit': snr_legit,
                        'snr_adv': snr_adv
                    })

                session = tf.Session(config=session_config)
                saver = tf.compat.v1.train.Saver()
                save_path = save_dir + sim_slug + "/model.ckpt"
                if os.path.exists(save_path + ".index"):
                    saver.restore(session, save_path)
                    print("Model restored from: ", save_path)
                    test_results = test(session)
                    append_results(test_results)
                    write_results()

                else:
                    print("ERROR: Model not found.")

    else:
        print("Error: the only available options are 'train' or 'test'.")


if __name__ == '__main__':
    ARGS = args_parser.parse_args(DATASETS)
    run_main(ARGS)
    print('SUCCESS: Program ended correctly.')
示例#7
0
import torch
from model_helper import Phase
import model_helper as mh
import args_parser

if __name__ == '__main__':
    # Parse Argument
    args = args_parser.parse_args(Phase.train)
    print(args)

    # Use GPU if it's available
    device = torch.device(
        "cuda" if torch.cuda.is_available() and args.gpu else "cpu")
    print('### Using device: ', device)

    # Loading the data
    print('### Loading data')
    train_dataset, trainloader, train_transforms = mh.load_data(Phase.train)
    valid_dataset, validloader, valid_transforms = mh.load_data(Phase.valid)
    test_dataset, testloader, test_transforms = mh.load_data(Phase.test)

    # Building Model
    print('### Building the model')
    arch = args.arch
    nHiddens = args.hidden_units
    nOutputs = 102
    pDropout = 0.2
    lr = args.learning_rate
    model, optimizer, criterion = mh.build_model(arch,
                                                 nHiddens=nHiddens,
                                                 nOutputs=nOutputs,
示例#8
0
  def test_optional_args(self):
    """Verifies it succeeds with optional arguments specified."""
    image_name = 'my-image'
    customization_script = '/tmp/my-script.sh'
    daisy_path = '/opt/daisy'
    zone = 'us-west1-a'
    gcs_bucket = 'gs://my-bucket'
    dataproc_version = '1.4.5-debian9'
    project_id = 'my-project'
    oauth = 'xyz'
    family = 'debian9'
    machine_type = 'n1-standard-4'
    disk_size = 40
    network = 'my-network'
    subnetwork = 'my-subnetwork'
    no_external_ip = True
    no_smoke_test = True
    dry_run = True
    service_account = "my-service-account"
    shutdown_instance_timer_sec = 567

    args = args_parser.parse_args([
        '--customization-script', customization_script,
        '--daisy-path', daisy_path,
        '--dataproc-version', dataproc_version,
        '--disk-size', str(disk_size),
        '--dry-run',
        '--family', family,
        '--gcs-bucket', gcs_bucket,
        '--image-name', image_name,
        '--machine-type', machine_type,
        '--network', network,
        '--no-external-ip',
        '--no-smoke-test',
        '--oauth', oauth,
        '--project-id', project_id,
        '--service-account', service_account,
        '--shutdown-instance-timer-sec', str(shutdown_instance_timer_sec),
        '--subnetwork', subnetwork,
        '--zone', zone,
    ])

    expected_result = self._make_expected_result(
        base_image_uri="None",
        customization_script="'{}'".format(customization_script),
        daisy_path="'{}'".format(daisy_path),
        dataproc_version="'{}'".format(dataproc_version),
        disk_size="{}".format(disk_size),
        dry_run="{}".format(dry_run),
        extra_sources="{}",
        family="'{}'".format(family),
        gcs_bucket="'{}'".format(gcs_bucket),
        image_name="'{}'".format(image_name),
        machine_type="'{}'".format(machine_type),
        network="'{}'".format(network),
        no_external_ip="{}".format(no_external_ip),
        no_smoke_test="{}".format(no_smoke_test),
        oauth="'{}'".format(oauth),
        project_id="'{}'".format(project_id),
        service_account="'{}'".format(service_account),
        shutdown_instance_timer_sec="{}".format(shutdown_instance_timer_sec),
        subnetwork="'{}'".format(subnetwork),
        zone="'{}'".format(zone)
    )
    self.assertEqual(str(args), expected_result)
示例#9
0
 def test_missing_required_args(self):
   """Verifies it fails if missing required args."""
   with self.assertRaises(SystemExit) as e:
     args_parser.parse_args([])
示例#10
0
def run_with_args(args):
    print('Doing test run using args "%s"' % args)
    parsed_args = parse_args(args)
    process_inputs(parsed_args)
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import random
import numpy as np

from client import Client
from server import Server
from utils import get_dataset, plot_metric, prepare_output_dir
from utils import print_configuration, save_configuration
from args_parser import parse_args

if __name__ == '__main__':
    args = parse_args(is_federated=True)
    if args.seed:
        random.seed(int(args.seed))
        np.random.RandomState(int(args.seed))

    output_dir = prepare_output_dir()

    train_dataset, train_dataset_labels, clients_groups = get_dataset(args)

    print_configuration(args, train_dataset, True)
    save_configuration(args, train_dataset, output_dir, True)

    # Prepare clients
    clients = {}
    for idx_client in range(args.K):
        clients[idx_client] = Client(idx_client, train_dataset,
                                     clients_groups[idx_client])
示例#12
0
import torch
import json
from model_helper import Phase
import model_helper as mh
import args_parser

if __name__ == '__main__':
    # Parse Argument
    args = args_parser.parse_args(Phase.valid)
    print(args)

    # Use GPU if it's available
    device = torch.device(
        "cuda" if torch.cuda.is_available() and args.gpu else "cpu")
    print('### Using device: ', device)

    # Label Mapping
    with open(args.category_names, 'r') as f:
        cat_to_name = json.load(f)

    # Loading the model
    probs, classes = mh.predict_from_disk(device, args.image_path,
                                          args.checkpoint, args.top_k)
    flowers_names = [cat_to_name[cat] for cat in classes]
    print()
    print("PROBABILITY   FLOWER NAME")
    print("===========   ====================================")
    for i in range(args.top_k):
        print('{:5.2f}%        {}'.format(probs[0][i] * 100,
                                          flowers_names[i].upper()))
def main(args=None):
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    generator = SAIL_inference_datagenerator(base_data_path = args.base_data_directory,
                                             interpolation_constants_directory=args.interpolation_constants_directory)

    # examples = []
    # data_fnames = []
    # sat_labels = []
    # datetimes = []
    # for i in range(args.batch_size):

    keras.backend.set_session(get_session())
    model = models.load_model(os.path.abspath(args.model_snapshot), backbone_name=args.backbone)
    model = models.convert_model(model)

    snapshots_processing_delta_t = np.zeros(len(generator), dtype=np.float64)

    for datafile_idx in range(len(generator)):
        start_time = time.time()

        if 'prev_start_time' in locals():
            delta_t = start_time - prev_start_time
            snapshots_processing_delta_t[datafile_idx-1] = delta_t
            print('prev. snapshot processed in %f s; estimated processing time: %f s' % (delta_t, ((len(generator)-(datafile_idx+1)) * np.mean(snapshots_processing_delta_t[snapshots_processing_delta_t > 0.1]))))
            prev_start_time = start_time
        else:
            prev_start_time = start_time

        curr_fname = generator.data_fnames[datafile_idx]
        curr_fname_basename = os.path.basename(curr_fname)
        reex = '.+(MSG\d).+(\d{14})\.nc'
        match = re.match(reex, curr_fname_basename)
        sat_label = match.groups()[0]
        if sat_label == 'MSG1':
            continue
        dt_str = match.groups()[1]
        dt = datetime.strptime(dt_str, "%Y%m%d%H%M%S")
        curr_snapshot_results_filename = os.path.join(args.output_directory,
                                                      datetime.strftime(dt, "%Y%m%d"),
                                                      '%s_%s_p%s.pkl' % (sat_label, datetime.strftime(dt, "%Y%m%d%H%M%S"), ('%.5f' % args.proba_threshold).replace('.', '_')))
        curr_snapshot_vis_plot_filename = os.path.join(args.output_directory,
                                                       datetime.strftime(dt, "%Y%m%d"),
                                                       '%s_%s_p%s.png' % (sat_label, datetime.strftime(dt, "%Y%m%d%H%M%S"), ('%.5f' % args.proba_threshold).replace('.', '_')))
        if os.path.exists(curr_snapshot_results_filename):
            generator.current += 1
            print('this file has been already processed earlier. Skipping.')
            continue


        example,shared_mask,crops,masks,data_fname,dt,crop_bboxes,sat_label = next(generator)
        print('%s : processing file %d of %d: %s' % (str(start_time), datafile_idx+1, len(generator), data_fname))

        # examples.append(crops)
        # data_fnames.append(data_fname)
        # sat_labels.append(sat_label)
        # datetimes.append(dt)
        # examples = np.concatenate(examples, axis=0)

        #region debug_plot
        # crop_ch5_normed = example[0, :, :, 0]
        #
        # f = plt.figure(figsize=(6,6), dpi=300)
        # im = plt.imshow(scale_ch5_back(crop_ch5_normed), cmap=cmap_ch5, vmin=200., vmax=320.)
        # for idx in range(len(crop_bboxes)):
        #     x1,y1,x2,y2 = crop_bboxes[idx]
        #     # p = plt.subplot(3, 3, idx+1)
        #     # ax = plt.gca()
        #     _ = plt.plot([x1,x1,x2,x2,x1], [y1,y2,y2,y1,y1], color='green')
        #
        # _ = plt.axis('off')
        # # _ = plt.title(str(datetimes[idx]))
        # plt.show()
        #endregion debug_plot

        curr_example_batch_generator = SAIL_batches_generator(crops, batch_size=args.batch_size)
        detected_boxes_per_crop = []
        scores_per_crop = []
        for batch_idx in range(len(curr_example_batch_generator)):
            images_batch, scales = next(curr_example_batch_generator)

            # prediction!
            boxes, scores, pred_labels = model.predict_on_batch(images_batch)
            boxes = [np.array([box for box in curr_boxes if np.square(box - np.array([-1., -1., -1., -1.])).sum() > 0.]) for curr_boxes in boxes]
            scores = [np.array([sc for sc in curr_scores if sc > -1.]) for curr_scores in scores]
            detected_boxes_per_crop = detected_boxes_per_crop + boxes
            scores_per_crop = scores_per_crop + scores

        if len(detected_boxes_per_crop) == 0:
            continue

        # translate these labels bboxes
        translated_detected_boxes_per_crop = [[box + np.array([l, b, l, b]) for box in curr_boxes] for (curr_boxes, (l, b, r, t)) in zip(detected_boxes_per_crop, crop_bboxes)]
        # flat this list
        translated_detected_boxes_per_crop = [box[np.newaxis, :] for boxes_of_crop in translated_detected_boxes_per_crop for box in boxes_of_crop]
        if len(translated_detected_boxes_per_crop) == 0:
            continue
        # concat to one array
        translated_detected_boxes_per_crop_flat = np.concatenate(translated_detected_boxes_per_crop, axis=0)
        # concat scores to one array
        scores_per_crop_flat = np.concatenate(scores_per_crop)

        # indices1 = np.where(scores_per_crop_flat<1.)[0]
        # translated_detected_boxes_per_crop_flat = translated_detected_boxes_per_crop_flat[indices1]
        # scores_per_crop_flat = scores_per_crop_flat[indices1]

        selected_indices = np.where((scores_per_crop_flat >= args.proba_threshold) & (scores_per_crop_flat<1.))[0]

        if len(selected_indices) > 30:
            print('adjusting proba_thresh...')
            curr_thresh = args.proba_threshold
            failed_searching_suitable_threshold = False
            while np.sum((scores_per_crop_flat >= curr_thresh)&(scores_per_crop_flat<1.)) > 30:
                curr_thresh = (1.- 0.98*(1-curr_thresh))
                print('%f : %d bboxes' % (curr_thresh, np.sum((scores_per_crop_flat >= curr_thresh)&(scores_per_crop_flat<1.))))
                if ((np.abs(curr_thresh-1.)<1.e-3) & (np.sum((scores_per_crop_flat >= curr_thresh)&(scores_per_crop_flat<1.)) > 30)):
                    failed_searching_suitable_threshold = True
                    break
            if failed_searching_suitable_threshold:
                print('failed searching suitable threshold. !!! Skipping this example !!!')
                continue
            selected_indices = np.where((scores_per_crop_flat >= curr_thresh)&(scores_per_crop_flat<1.))[0]
        translated_detected_boxes_per_crop_flat_filtered = translated_detected_boxes_per_crop_flat[selected_indices]
        scores_per_crop_flat_filtered = scores_per_crop_flat[selected_indices]

        translated_detected_boxes_shrinked = np.copy(translated_detected_boxes_per_crop_flat_filtered)
        scores_shrinked = np.copy(scores_per_crop_flat_filtered)
        with tqdm(np.arange(len(translated_detected_boxes_per_crop_flat_filtered))) as prbr:
            while True:
                if translated_detected_boxes_shrinked.shape[0] < 2:
                    break
                item_to_exclude = exclude_redundant_labelbbox_pair(translated_detected_boxes_shrinked, iou_threshold=args.shrinking_iou_threshold)
                if item_to_exclude is None:
                    break
                translated_detected_boxes_shrinked = np.array([translated_detected_boxes_shrinked[i] for i in range(translated_detected_boxes_shrinked.shape[0]) if i != item_to_exclude])
                scores_shrinked = np.array([scores_shrinked[i] for i in range(scores_shrinked.shape[0]) if i != item_to_exclude])
                prbr.update(1)


        curr_snapshot_detected_data_dict = {'data_fname': data_fname,
                                            'sat_label': sat_label,
                                            'dt': dt,
                                            'proba_threshold': args.proba_threshold,
                                            'shrinking_iou_threshold': args.shrinking_iou_threshold,
                                            'scores_shrinked': scores_shrinked,
                                            'translated_detected_boxes_shrinked': translated_detected_boxes_shrinked,
                                            'projection_shape': example.shape,
                                            'retinanet_backbone': args.backbone,
                                            'retinanet_snapshot_file': args.model_snapshot}
        EnsureDirectoryExists(os.path.dirname(curr_snapshot_results_filename))
        with open(curr_snapshot_results_filename, 'wb') as f:
            pickle.dump(curr_snapshot_detected_data_dict, f)



        crop_ch5_normed = example[0, :, :, 0]
        crop_ch5_normed = np.ma.asarray(crop_ch5_normed)
        crop_ch5_normed.mask = shared_mask.astype(np.bool)
        crop_ch9_normed = example[0, :, :, 1]
        crop_ch9_normed = np.ma.asarray(crop_ch9_normed)
        crop_ch9_normed.mask = shared_mask.astype(np.bool)
        crop_btd_normed = example[0, :, :, 2]
        crop_btd_normed = np.ma.asarray(crop_btd_normed)
        crop_btd_normed.mask = shared_mask.astype(np.bool)

        #region debug_plot
        f = plt.figure(figsize=(8, 8), dpi=300)
        p = plt.subplot(2, 2, 1)
        ax = plt.gca()
        im = plt.imshow(scale_ch5_back(crop_ch5_normed), cmap=cmap_ch5, vmin=200., vmax=320.)
        for box, score in zip(translated_detected_boxes_shrinked, scores_shrinked):
            (bbox_x1, bbox_y1, bbox_x2, bbox_y2) = box.astype(int)
            plt.plot([bbox_x1, bbox_x1, bbox_x2, bbox_x2, bbox_x1], [bbox_y1, bbox_y2, bbox_y2, bbox_y1, bbox_y1], color='green', linewidth=0.5)
            plt.text(bbox_x2, bbox_y2 + 4, '%.3f' % score, fontsize=6, color='magenta')
        _ = plt.axis('off')
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = plt.colorbar(im, cax=cax)
        ax.set_title('ch5, K')

        p = plt.subplot(2, 2, 2)
        ax = plt.gca()
        im = plt.imshow(scale_ch9_back(crop_ch9_normed), cmap=cmap_ch9, vmin=200., vmax=320.)
        for box, score in zip(translated_detected_boxes_shrinked, scores_shrinked):
            (bbox_x1, bbox_y1, bbox_x2, bbox_y2) = box.astype(int)
            plt.plot([bbox_x1, bbox_x1, bbox_x2, bbox_x2, bbox_x1], [bbox_y1, bbox_y2, bbox_y2, bbox_y1, bbox_y1], color='green', linewidth=0.5)
            plt.text(bbox_x1, bbox_y1, '%.3f' % score, fontsize=6, color='magenta')
        _ = plt.axis('off')
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = plt.colorbar(im, cax=cax)
        # cbar.set_label('ch5, K', rotation=270)
        ax.set_title('ch9, K')

        p = plt.subplot(2, 2, 3)
        ax = plt.gca()
        # im = plt.imshow(scale_btd_back(crop_btd_normed), cmap='jet', vmin=scale_btd_back(btd_thresh))
        im = plt.imshow(scale_btd_back(crop_btd_normed), cmap=cmap_btd, vmin=-80., vmax=3.3)
        for box, score in zip(translated_detected_boxes_shrinked, scores_shrinked):
            (bbox_x1, bbox_y1, bbox_x2, bbox_y2) = box.astype(int)
            plt.plot([bbox_x1, bbox_x1, bbox_x2, bbox_x2, bbox_x1], [bbox_y1, bbox_y2, bbox_y2, bbox_y1, bbox_y1], color='green', linewidth=0.5)
            plt.text(bbox_x1, bbox_y1, '%.3f' % score, fontsize=6, color='magenta')
        _ = plt.axis('off')
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        cbar = plt.colorbar(im, cax=cax)
        # cbar.set_label('ch5, K', rotation=270)
        ax.set_title('BTD, K')

        # _ = plt.show()
        plt.tight_layout()
        plt.savefig(curr_snapshot_vis_plot_filename, dpi=300, pad_inches=0)
        plt.close()
示例#14
0
def main():
    """IPS Patcher main function."""
    args = parse_args()
    commands = {c.__name__: c for c in (apply, create)}
    commands[args.command](args)
示例#15
0
def main():
    args = parse_args()

    storage = Storage(args.storage)
    if args.command == "init":
        try:
            storage.init()
            return
        except StorageInitError as error:
            sys.stdout.write(error.text)
            sys.exit(STORAGE_INIT_ERROR)

    check_path(args.storage)
    with storage:

        if args.command == "add":
            if len(args.items) % 2 != 0:
                # Выкидывать ошибку
                return

            pair = list()
            for item in args.items:
                pair.append(item)
                if len(pair) == 2:
                    value = pair.pop()
                    key = pair.pop()
                    try:
                        key = int(key)
                    except ValueError:
                        pass
                    try:
                        value = int(value)
                    except ValueError:
                        pass
                    storage[key] = value

        elif args.command == "get":
            for key in args.keys:
                try:
                    key = int(key)
                except ValueError:
                    pass
                value = storage[key]
                print(value, file=sys.stdout)

        elif args.command == "del":
            for key in args.keys:
                del storage[key]

        elif args.command == "exist":
            for key in args.keys:
                try:
                    key = int(key)
                except ValueError:
                    pass
                exist = key in storage
                print(exist)

        elif args.command == "keys":
            for key in storage:
                print(key, end=" ")

        elif args.command == "values":
            for key in storage:
                value = storage[key]
                print(value, end=" ")
示例#16
0
    for i in range(last, size - last):
        if pieces[i] == 0:
            q.put(queue_data(i, rs[i]))

    multi_thread.start(num, q, download_piece)
    print('\ndone.')
    os.remove(save_dir + ovd_file)


if __name__ == '__main__':

    signal.signal(signal.SIGINT, quit_all)
    signal.signal(signal.SIGTERM, quit_all)

    arg = ap.parse_args()
    st_dir = arg[ap.ST_DIR]
    st_num = arg[ap.ST_NUM]
    setting_flag = st_dir or st_num

    if setting_flag is None and len(arg[ap.URL]) == 0:
        print('use \'python app.py -h\' or view README to get help')
        exit(-1)

    if setting_flag is not None:
        if st_dir is not None:
            cp.set_item(cp.DIR, st_dir)
        if st_num is not None:
            cp.set_item(cp.NUM, st_num)

    if len(arg[ap.URL]) > 0:
示例#17
0
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import random
import numpy as np

from utils import get_dataset, plot_metric, prepare_output_dir
from utils import print_configuration, save_configuration
from args_parser import parse_args
from gmm import GaussianMixture

if __name__ == '__main__':
    args = parse_args(is_federated=False)
    if args.seed:
        random.seed(int(args.seed))
        np.random.RandomState(int(args.seed))

    output_dir = prepare_output_dir()

    train_dataset, train_dataset_labels, _ = get_dataset(args)

    print_configuration(args, train_dataset, False)
    save_configuration(args, train_dataset, output_dir, False)

    # Init the Gaussian Mixture Model
    seed = None
    if args.seed: seed = (int(args.seed))

    # Prepare server --> init_dataset is given by 0.5% of the train_dataset randomly sampled
    # init_dataset_size = int(train_dataset.shape[0] * 0.005)
def run():
  """Generate custom image."""
  args = args_parser.parse_args(sys.argv[1:])

  # get dataproc base image from dataproc version
  project_id = get_project_id() if not args.project_id else args.project_id
  _LOG.info("Getting Dataproc base image name...")
  parsed_image_version = False
  if args.base_image_uri:
    dataproc_base_image = get_partial_image_uri(args.base_image_uri)
    dataproc_version = get_dataproc_image_version(args.base_image_uri)
    parsed_image_version = True
  else:
    dataproc_base_image = get_dataproc_base_image(args.dataproc_version)
    dataproc_version = args.dataproc_version
  _LOG.info("Returned Dataproc base image: %s", dataproc_base_image)
  run_script_path = os.path.join(
      os.path.dirname(os.path.realpath(__file__)), "run.sh")

  oauth = ""
  if args.oauth:
    oauth = "\n    \"OAuthPath\": \"{}\",".format(
        os.path.abspath(args.oauth))

  daisy_sources = {
    "run.sh": run_script_path,
    "init_actions.sh": os.path.abspath(args.customization_script)
  }
  daisy_sources.update(args.extra_sources)

  sources = ",\n".join(["\"{}\": \"{}\"".format(source, path)
                        for source, path in daisy_sources.items()])
  network = args.network
  # When the user wants to create a VM in a shared VPC,
  # only the subnetwork argument has to be provided whereas
  # the network one has to be left empty.
  if not args.network and not args.subnetwork:
    network = 'global/networks/default'

  # create daisy workflow
  _LOG.info("Created Daisy workflow...")
  workflow = constants.daisy_wf.format(
      image_name=args.image_name,
      project_id=project_id,
      sources=sources,
      zone=args.zone,
      oauth=oauth,
      gcs_bucket=args.gcs_bucket,
      family=args.family,
      dataproc_base_image=dataproc_base_image,
      machine_type=args.machine_type,
      network=network,
      subnetwork=args.subnetwork,
      service_account=args.service_account,
      disk_size=args.disk_size,
      shutdown_timer_in_sec=args.shutdown_instance_timer_sec)

  _LOG.info("Successfully created Daisy workflow...")

  # run daisy to build custom image
  _LOG.info("Creating custom image with Daisy workflow...")
  run_daisy(os.path.abspath(args.daisy_path), workflow)
  _LOG.info("Successfully created custom image with Daisy workflow...")

  # set custom image label
  _LOG.info("Setting label on custom image...")
  set_custom_image_label(args.image_name, dataproc_version,
                         project_id, parsed_image_version)
  _LOG.info("Successfully set label on custom image...")

  # perform test on the newly built image
  if not args.no_smoke_test:
    _LOG.info("Verifying the custom image...")
    verify_custom_image(
        args.image_name, project_id, args.zone, network, args.subnetwork)
    _LOG.info("Successfully verified the custom image...")

  _LOG.info("Successfully built Dataproc custom image: %s",
            args.image_name)

  # notify when the image will expire.
  creation_date = _parse_date_time(
      get_custom_image_creation_timestamp(args.image_name, project_id))
  expiration_date = creation_date + datetime.timedelta(days=60)
  _LOG.info(
      constants.notify_expiration_text.format(args.image_name,
                                              str(expiration_date)))
示例#19
0
from multiprocessing import Pool

from args_parser import parse_args
from http_parser.master_parser import MasterParser
from tools.general import create_dir, text_file_to_set, get_url_slug_tuples


def download(info):
    filename, url = info
    MasterParser.parse(url, OUTPUT_DIR, filename)


def main(txt_file_path, num_workers):
    links = text_file_to_set(txt_file_path)
    try:
        filenames_urls = get_url_slug_tuples(links)
    except NotImplementedError:
        indices_as_strings = map(str, range(len(links)))
        filenames_urls = zip(indices_as_strings, links)

    with Pool(num_workers) as p:
        p.map(download, filenames_urls)


if __name__ == '__main__':
    args = parse_args()
    OUTPUT_DIR = args.output_dir

    create_dir(OUTPUT_DIR)
    main(args.input, args.workers)
def human_eval_ranking(model, mode="val", batch_size=8, rows=100):
    args = parse_args()
    '''
    encoding1 = pickle.load(open("data/{}/BERTContrastiveEncodings1_{}.pkl".format(args.data_type, mode), 'rb')).to(device)
    encoding2 = pickle.load(open("data/{}/BERTContrastiveEncodings2_{}.pkl".format(args.data_type, mode), 'rb')).to(device)
    labels = pickle.load(open("data/{}/BERTContrastiveLabels_{}.pkl".format(args.data_type, mode), 'rb'))
    test_dataset = TensorDataset(encoding1['input_ids'], encoding1['token_type_ids'], encoding1['attention_mask'],
                            encoding2['input_ids'], encoding2['token_type_ids'], encoding2['attention_mask'], labels)
    sampler = RandomSampler(test_dataset)
    test_dataloader = DataLoader(test_dataset, sampler=sampler, batch_size=batch_size)
    '''
    tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
    df = pd.read_csv('error_analysis/test_human_eval.csv', encoding='latin-1')
    df.dropna(inplace=True)
    print(df.head())
    abstract1 = list(df['paperAbstract1'])
    abstract2 = list(df['paperAbstract2'])
    encoding1 = tokenizer(abstract1,
                          padding=True,
                          truncation=True,
                          return_tensors="pt").to(device)
    encoding2 = tokenizer(abstract2,
                          padding=True,
                          truncation=True,
                          return_tensors="pt").to(device)
    labels = torch.tensor(list(df['label'])).unsqueeze(dim=1).long().to(device)
    test_dataset = TensorDataset(encoding1['input_ids'],
                                 encoding1['token_type_ids'],
                                 encoding1['attention_mask'],
                                 encoding2['input_ids'],
                                 encoding2['token_type_ids'],
                                 encoding2['attention_mask'], labels)
    #sampler = RandomSampler(test_dataset)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
    model = model.eval()
    predictions = []
    similarity = []

    model_name_or_path = "bert-base-cased"
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)

    print('......................{} summary...................'.format(mode))
    with torch.no_grad():
        for input_ids1, _, attention_mask1, input_ids2, _, attention_mask2, labels_train in test_dataloader:
            emd1 = model(input_ids1, attention_mask1)
            emd2 = model(input_ids2, attention_mask2)
            #print(emd1, emd2)
            cosine_sim = torch.nn.functional.cosine_similarity(
                emd1, emd2, dim=1).cpu().detach().numpy()
            #print("Cosine sim", cosine_sim)
            similarity += list(cosine_sim)

            cosine_sim[cosine_sim > 0.9] = 1
            cosine_sim[cosine_sim <= 0.9] = 0
            predictions += list(cosine_sim)
            '''
            for outer in range(input_ids1.shape[0]):
                temp_abstract1 = ""
                for inner in range(input_ids1.shape[1]):
                    temp_abstract1 += tokenizer.decode(input_ids1[outer][inner])
                abstract1.append(temp_abstract1)

            for outer in range(input_ids2.shape[0]):
                temp_abstract2 = ""
                for inner in range(input_ids2.shape[1]):
                    temp_abstract2 += tokenizer.decode(input_ids2[outer][inner])
                abstract2.append(temp_abstract2)
            '''
            #print("predictions", predictions)
            #print("labels", labels.numpy()[:16])
    #print("Predictions shape:", len(predictions))
    #print("Labels shape:", labels.size())
    labels = labels.cpu().detach().numpy()
    precision, recall, fscore, _ = score(labels,
                                         np.asarray(predictions).reshape(
                                             -1, 1),
                                         average='macro')
    print(classification_report(labels, predictions))
    sys.stdout.flush()
    save_to_csv(
        cosine_sim.reshape(-1)[:rows],
        labels.reshape(-1)[:rows],
        np.asarray(abstract1).reshape(-1)[:rows],
        np.asarray(abstract2).reshape(-1)[:rows],
        np.asarray(similarity).reshape(-1)[:rows], "Contrastive")
    return fscore
def human_eval_classification(model, mode="val", batch_size=8, rows=100):
    args = parse_args()
    tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
    df = pd.read_csv('error_analysis/test_human_eval.csv', encoding='latin-1')
    df.dropna(inplace=True)
    print(df.head())
    abstract1 = list(df['paperAbstract1'])
    abstract2 = list(df['paperAbstract2'])
    # encodings = pickle.load(open("data/{}/BERTClassificationEncodings_{}.pkl".format(args.data_type, mode), 'rb')).to(device)
    # labels = pickle.load(open("data/{}/BERTClassificationLabels_{}.pkl".format(args.data_type, mode), 'rb')).to(device).long()

    encodings = tokenizer(abstract1,
                          abstract2,
                          padding=True,
                          truncation=True,
                          return_tensors="pt").to(device)
    labels = torch.tensor(list(df['label'])).unsqueeze(dim=1).long().to(device)

    test_dataset = TensorDataset(encodings['input_ids'],
                                 encodings['token_type_ids'],
                                 encodings['attention_mask'], labels)
    #sampler = RandomSampler(test_dataset)

    test_dataloader = DataLoader(test_dataset, batch_size=batch_size)
    model = model.eval()
    preds = []

    probability = []
    model_name_or_path = "bert-base-cased"
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
    print('......................{} summary...................'.format(mode))
    with torch.no_grad():
        for input_ids, _, attention_mask, val_labels in test_dataloader:
            #print("input ids", input_ids)
            #print("attention masks", attention_mask)
            loss, logits = model(input_ids, attention_mask, val_labels)

            #for outer in range(input_ids.shape[0]):
            #sep = False
            #temp_abstract1 = ""
            #temp_abstract2 = ""
            #for inner in range(input_ids.shape[1]):
            # if input_ids[outer][inner] == tokenizer.sep_token_id:
            #     sep = True
            #elif not sep:
            #     temp_abstract1 += tokenizer.decode(input_ids[outer][inner])
            #  elif sep:
            #       temp_abstract2 += tokenizer.decode(input_ids[outer][inner])

            #abstract1.append(temp_abstract1)
            #abstract2.append(temp_abstract2)

            print("logits", logits)
            print(logits.size())
            probability += list(
                torch.nn.functional.softmax(logits,
                                            dim=1)[:,
                                                   1].cpu().detach().numpy())
            preds += list(torch.argmax(logits, dim=1).cpu().detach().numpy())
            #print("preds", preds)
    preds = np.asarray(preds)
    preds = preds.reshape(-1, 1)
    #print(preds)
    print("----------------------------------------------")
    #print(labels)
    labels = labels.cpu().detach().numpy()
    correct = (preds == labels)
    print('ACCURACY ================= ', correct.sum() / preds.shape[0])
    precision, recall, fscore, _ = score(labels, preds, average='macro')
    print(classification_report(labels, preds))
    sys.stdout.flush()
    save_to_csv(
        preds.reshape(-1)[:rows],
        labels.reshape(-1)[:rows],
        np.asarray(abstract1).reshape(-1)[:rows],
        np.asarray(abstract2).reshape(-1)[:rows],
        np.asarray(probability).reshape(-1)[:rows])
    print(fscore)
    return fscore
示例#22
0
from do_tournament import do_tournament
from args_parser import parse_args
import sys

if __name__ == "__main__":
	do_tournament(*parse_args(sys.argv))