def get_validation_split(data_file,
                         training_file,
                         validation_file,
                         data_split=0.8,
                         overwrite=False):
    """
    Splits the data into the training and validation indices list.
    :param data_file: pytables hdf5 data file
    :param training_file:
    :param validation_file:
    :param data_split:
    :param overwrite:
    :return:
    """
    if overwrite or not os.path.exists(training_file):
        print("Creating validation split...")
        nb_samples = data_file.root.data.shape[0]
        sample_list = list(range(nb_samples))
        training_list, validation_list = split_list(sample_list,
                                                    split=data_split)
        pickle_dump(training_list, training_file)
        pickle_dump(validation_list, validation_file)
        return training_list, validation_list
    else:
        print("Loading previous validation split...")
        return pickle_load(training_file), pickle_load(validation_file)
def load_data(data_type,
              train_on_cv=False,
              cv_random_state=42,
              cv_fold=5,
              cv_index=0):
    if data_type == 'train':
        if train_on_cv:
            data = pickle_load(
                format_filename(PROCESSED_DATA_DIR,
                                TRAIN_CV_DATA_TEMPLATE,
                                random=cv_random_state,
                                fold=cv_fold,
                                index=cv_index))
        else:
            data = pickle_load(
                format_filename(PROCESSED_DATA_DIR, TRAIN_DATA_FILENAME))
    elif data_type == 'dev':
        if train_on_cv:
            data = pickle_load(
                format_filename(PROCESSED_DATA_DIR,
                                DEV_CV_DATA_TEMPLATE,
                                random=cv_random_state,
                                fold=cv_fold,
                                index=cv_index))
        else:
            data = pickle_load(
                format_filename(PROCESSED_DATA_DIR, DEV_DATA_FILENAME))
    elif data_type == 'test':
        data = pickle_load(
            format_filename(PROCESSED_DATA_DIR, TEST_DATA_FILENAME))
    else:
        raise ValueError('data type not understood: {}'.format(data_type))
    return data
Exemplo n.º 3
0
def fold_split(FG, seed=1234):
    labels = tuple(FG.labels)
    assert labels in [('AD', 'CN'), ('AD', 'MCI', 'CN')]
    subject_ids = pickle_load(FG.subject_ids_path)
    diagnosis = pickle_load(FG.diagnosis_path)

    X = np.array(
        [sid for sid in subject_ids['smri'] if diagnosis[sid] in labels])
    y = np.array([diagnosis[x] for x in X])

    skf = StratifiedKFold(n_splits=FG.fold, shuffle=True, random_state=seed)
    train, test = list(skf.split(X, y))[FG.running_fold]

    print(
        'Train ',
        tuple(
            zip(labels,
                [len(X[train][y[train] == label]) for label in labels])))
    print(
        'Test ',
        tuple(zip(labels,
                  [len(X[test][y[test] == label]) for label in labels])))

    ratio = [
        len(X[train][y[train] == label]) / len(X[train]) for label in labels
    ]
    ratio = torch.Tensor(2 * (1 - np.array(ratio)))

    return X, y, train, test, ratio
Exemplo n.º 4
0
def gen_product_sentences():

    order_products_prior = pickle_load(config.order_products_prior_path)[[
        'order_id', 'product_id', 'add_to_cart_order'
    ]].copy()
    order_products_train = pickle_load(config.order_products_train_path)[[
        'order_id', 'product_id', 'add_to_cart_order'
    ]].copy()

    order_products_train["product_id"] = order_products_train[
        "product_id"].astype(str)
    order_products_prior["product_id"] = order_products_prior[
        "product_id"].astype(str)

    product_sentences_train = order_products_train.sort_values([
        'order_id', 'add_to_cart_order'
    ]).groupby('order_id').apply(lambda order: order['product_id'].tolist())
    product_sentences_prior = order_products_prior.sort_values([
        'order_id', 'add_to_cart_order'
    ]).groupby('order_id').apply(lambda order: order['product_id'].tolist())

    product_sentences = product_sentences_prior.append(
        product_sentences_train).values

    return product_sentences
Exemplo n.º 5
0
def get_name_type_for_subject(entity2name_path, entity2type_path, triple_path,
                              save_dir):
    entity2name = pickle_load(entity2name_path)
    entity2type = pickle_load(entity2type_path)
    triples = pickle_load(triple_path)

    has_name_count = 0
    has_type_count = 0
    subject2name = {}

    subject2type = {}

    for index, subject in enumerate(triples.keys()):
        if subject in entity2name:
            subject2name[subject] = entity2name[subject]
            has_name_count += 1

        if subject in entity2type:
            subject2type[subject] = entity2type[subject]
            has_type_count += 1

    print(has_name_count, len(triples.keys()))
    print(has_type_count, len(triples.keys()))

    pickle_save(subject2name, os.path.join(save_dir, 'trim_subject2name.pkl'))
    pickle_save(subject2type, os.path.join(save_dir, 'trim_subject2type.pkl'))
Exemplo n.º 6
0
def load_agents_params(params, agents_list):
    agents, states, traces, trajectories, dataframes = {}, {}, {}, {}, {}
    # states['all_states'] = {}

    environment = create_test_env(params.env, n_envs=params.n_envs, is_atari=params.is_atari,
                                  stats_path=params.stats_path, seed=0, log_dir=params.log_dir,
                                  should_render=not params.no_render, hyperparams=params.hyperparams)
    for a in agents_list:
        """load agent model"""
        model_path = os.path.join(params.agents_dir, a, params.env + ".pkl")
        agents[a] = ALGOS[a].load(model_path, env=environment)
        """load agent states traces and trajectories"""
        params_path = os.path.join(params.stt_dir, a)
        for file in os.listdir(params_path):
            if file.startswith('States'):
                states[a] = pickle_load(os.path.join(params_path, file))
            elif file.startswith('Traces'):
                traces[a] = pickle_load(os.path.join(params_path, file))
            else:
                trajectories[a] = pickle_load(os.path.join(params_path, file))
        # states['all_states'] = {**states['all_states'], **states[a]}
        # The number of "same" states between algos is very small because the id of a state is based on the
        # full observation, therefore taking into account the current score.

        """importance dataframes"""
        data = {
            'state': list(states[a].keys()),
            'q_values': [x.observed_actions for x in states[a].values()]
        }
        df = pd.DataFrame(data)
        # top "important_state_percentage" of the states sorted by importance
        dataframes[a] = compute_states_importance(df, compare_to=params.state_importance).sort_values(
            ['importance'], ascending=False).head(int(df.shape[0] * params.important_state_percentage))

    return agents, states, traces, trajectories, dataframes
Exemplo n.º 7
0
def prepare_training_data():
    X_train, y_train = pickle_load('data/train.pkl')
    X_valid, y_valid = pickle_load('data/valid.pkl')
    X_test, y_test = pickle_load('data/test.pkl')

    trainer = Trainer(mp_pool=Pool(8))
    trainer.prepare_data(X_train, y_train, X_valid, y_valid, X_test, y_test)
    trainer.save('data2')
Exemplo n.º 8
0
def load_idx2cate():
    idx2cate1 = pickle_load(
        format_filename(PROCESSED_DATA_DIR, IDX2TOKEN_TEMPLATE, level='cate1'))
    idx2cate2 = pickle_load(
        format_filename(PROCESSED_DATA_DIR, IDX2TOKEN_TEMPLATE, level='cate2'))
    idx2cate3 = pickle_load(
        format_filename(PROCESSED_DATA_DIR, IDX2TOKEN_TEMPLATE, level='cate3'))
    return idx2cate1, idx2cate2, idx2cate3
 def __init__(self):
     ## fixme: 静态部分改成离线计算好的dict
     from annoy import AnnoyIndex
     from utils import pickle_load
     self.ann_index = AnnoyIndex(300, metric='euclidean')
     self.ann_index.load('data/index/annoy.euclidean.idx')
     self.id2word = pickle_load('data/index/id2word.pkl')
     self.word2id = pickle_load('data/index/word2id.pkl')
Exemplo n.º 10
0
def get_pca_features(y, npca_comp=40, tss=0.1):
    '''
    Get pca features

    Args
        y - 1D array
            mono sound_data
        npca_comp - int
            number of pca components needed to extract pca features
        tss - float
            clip length of sound(sec)
    return :
        pca_features - 2D array(ntseg, npca_comp)
    '''
    # length of sound(y)
    sound_len = len(y)
    # length of time segment
    tseg_len = int(tss*SR)
    # number of time segments in the music
    ntseg = sound_len//tseg_len

    pca_basis_path = get_pca_basis_path(tss)

    try :
        content = pickle_load(pca_basis_path)
    # if pca basis with tss doesn't exist, generate pca basis with tss
    except FileNotFoundError:
        print("FileNotFoundError : {}".format(pca_basis_path))
        pca_generator(clip_sec = tss)
        content = pickle_load(pca_basis_path)

    w = content['w'] # eigen value
    v = content['v'] # eigen vector

    print("Number of pca vectors : {}".format(len(w)))

    window = int(SR*tss)
    nfft = window
    win_len = window # win_len<=nfft
    hop_len = window

    D = librosa.core.stft(y,
                          n_fft=nfft,
                          win_length=win_len,
                          hop_length=hop_len)

    D_T = D.T
    ntime, nfreq = D_T.shape

    pca_features = np.zeros((ntseg, npca_comp))
    for i in range(min(ntime, ntseg)):
        for j in range(npca_comp):
            pca_features[i][j] = abs(complex_dot(D_T[i], v[j]))

    return pca_features
Exemplo n.º 11
0
def load_input_data(data_folder, data_kind, level, use_text_input,
                    use_text_input_l, use_text_input_r,
                    use_text_input_r_with_pad, use_aspect_input,
                    use_aspect_text_input, use_loc_input, use_offset_input,
                    use_mask):
    dirname = os.path.join('./data', data_folder)
    input_data = []
    if use_text_input:
        input_data.append(
            pickle_load(
                os.path.join(dirname,
                             '{}_{}_input.pkl'.format(data_kind, level))))
    if use_text_input_l:
        input_data.append(
            pickle_load(
                os.path.join(dirname,
                             '{}_{}_input_l.pkl'.format(data_kind, level))))
    if use_text_input_r:
        input_data.append(
            pickle_load(
                os.path.join(dirname,
                             '{}_{}_input_r.pkl'.format(data_kind, level))))
    if use_text_input_r_with_pad:
        input_data.append(
            pickle_load(
                os.path.join(
                    dirname,
                    '{}_{}_input_r_with_pad.pkl'.format(data_kind, level))))
    if use_aspect_input:
        input_data.append(
            pickle_load(
                os.path.join(dirname,
                             '{}_aspect_input.pkl'.format(data_kind))))
    if use_aspect_text_input:
        input_data.append(
            pickle_load(
                os.path.join(dirname,
                             '{}_{}_aspect_input.pkl'.format(data_kind,
                                                             level))))
    if use_loc_input:
        input_data.append(
            pickle_load(
                os.path.join(dirname,
                             '{}_{}_pos_input.pkl'.format(data_kind, level))))
    if use_offset_input:
        input_data.append(
            pickle_load(
                os.path.join(dirname,
                             '{}_{}_offset_input.pkl'.format(data_kind,
                                                             level))))
    if use_mask:
        input_data.append(
            pickle_load(
                os.path.join(dirname,
                             '{}_{}_mask.pkl'.format(data_kind, level))))
    if len(input_data) == 1:
        input_data = input_data[0]
    if len(input_data) == 0:
        raise Exception('No Input!')
    return input_data
Exemplo n.º 12
0
    def __init__(self):
        self.cdir = os.path.abspath(os.path.dirname(__file__))
        self.unigram_fixing = UnigramFixing()
        self.vn_vocab, self.vn_bare_vocab, self.vn_long_vocab, self.vn_long_bare_vocab \
            = vnbarenorm.load_vn_vocab()
        self.vn_special_words = vnbarenorm.load_special_words()
        self.vn_true_bare_bigram_hie_ddict = utils.pickle_load("%s/models/data/out/hierachical_true_dict.pkl"%self.cdir)
        self.vn_true_bare_bigram_f_ddict = utils.pickle_load("%s/models/data/out/hierachical_true_first_ab_dict.pkl"%self.cdir)

        self.hard_fixing_map = vnbarenorm.load_hard_fixing()
        self.load_one_fix()
Exemplo n.º 13
0
def align(params, load_prefix, save_path):
    """
  Align depth and color images. Save everything into a single pickle object.

  Parameters
  ----------
  params: Camera intrinsic parameters.

  load_prefix: Path to load data. Will load color stream from
  `load_prefix`_color.avi, depth stream from `load_prefix`_depth.pkl, and body
  stream from `load_prefix`_body.pkl.

  save_path: Path to save result data.

  """
    color_src = cv2.VideoCapture(load_prefix + '_color.avi')
    depth_src = pickle_load(load_prefix + '_depth.pkl')
    body_src = pickle_load(load_prefix + '_body.pkl')

    depth_height = depth_src[0].shape[0]
    depth_width = depth_src[0].shape[1]

    h_coord = np.tile(np.reshape(np.arange(1, depth_width + 1), [1, -1]),
                      [depth_height, 1]) - params['cx_d']
    v_coord = np.tile(np.reshape(np.arange(1, depth_height + 1), [-1, 1]),
                      [1, depth_width]) - params['cy_d']

    pcloud_frames = []
    depth_frames = []
    color_frames = []
    body_frames = []
    for depth, body in tqdm(zip(depth_src, body_src)):
        _, color = color_src.read()
        pcloud = depth_to_world(depth, params, h_coord, v_coord)
        pcloud_frames.append(pcloud)
        color = world_to_color(params, pcloud, color)
        color_frames.append(color)
        body_frames.append(body)
        depth_frames.append(depth)

    data = {
        'pclouds': pcloud_frames,
        'depths': depth_frames,
        'colors': color_frames,
        'bodies': body_frames
    }

    pickle_save(save_path, data)
Exemplo n.º 14
0
def load_model_with_weights(fname, custom_layers=None):
    """
    Loads model and scaler from .zip
    """
    if custom_layers is None:
        custom_layers = {}

    with zipfile.ZipFile(fname, mode='r') as zipf:
        # First, instantiate model from config
        json_str = zipf.read('config.json')
        config = json.loads(json_str)
        metadata = config['metadata']
        del config['metadata']
        model = keras.models.model_from_config(config,
                                               custom_objects=custom_layers)

        # Unzip weights.hdf to a temporary file and load it
        wf, tmpfname = tempfile.mkstemp()
        # weights - extract to tmpfname
        with open(tmpfname, 'w') as wf:
            with zipf.open('weights.hdf5') as zwf:
                wf.write(zwf.read())
        model.load_weights(tmpfname)

        # scaler - extract to tmpfname
        with open(tmpfname, 'w') as sf:
            with zipf.open('scaler.pickle') as zsf:
                sf.write(zsf.read())
        scaler = utils.pickle_load(sf.name)
        os.remove(tmpfname)

    return model, scaler, metadata
Exemplo n.º 15
0
def show_results(res_paths):
    results = {}
    for path in res_paths:
        result = pickle_load(path)
        for k, v in result.items():
            if k not in results.keys():
                results[k] = result[k]
    results = collections.OrderedDict(sorted(results.items()))
    fig, ax = plt.subplots(figsize=(9, 5.5))
    colors = cm.Dark2(np.linspace(0, 1, len(results)))
    count = 0
    for k, res in results.items():
        mean, std = np.nanmean(res, axis=0), np.nanstd(res, axis=0)
        # ax.errorbar(np.arange(mean.shape[0]), mean, yerr=std, color=colors[count], label=k, fmt='-o')
        plt.plot(np.arange(mean.shape[0]) + 1,
                 mean,
                 '-o',
                 color=colors[count],
                 label=k)
        count += 1
        print(np.array_str(mean[8:], precision=3))
        print("Average precision of %s for future prediction: %f" %
              (k, mean[8:].mean()))

    # Now add the legend with some customizations.
    legend = ax.legend(loc='upper right')

    ax.set_xlabel("time step")
    ax.set_ylabel("average precision")

    plt.axvline(x=8.5, color='r', linestyle='--')
    plt.text(3, 0.1, 'tracking', fontsize=18, color='grey')
    plt.text(11, 0.1, 'prediction', fontsize=18, color='grey')

    plt.show()
Exemplo n.º 16
0
def stats():
    vocaburary = Vocabulary.load(Q_VOCAB_NAME)
    kvs = []
    for key, value in sorted(vocaburary.doc_freq.iteritems(),
                             key=lambda (k, v): (v, k)):
        kvs.append([key, value])

    TOP_TOKEN = set()
    question_list = utils.pickle_load("question_tokens.dat")

    for i in xrange(1, 200):
        key, value = kvs[-i][0], kvs[-i][1]
        #print vocaburary.id_2_token[key],value
        TOP_TOKEN.add(key)

    f_pattern = open("q_pattern.dat", "w", encoding="utf-8")
    for question in question_list:
        pattern = []
        origin = []
        counting = 0
        for token in question:
            if token in TOP_TOKEN:
                s_token = vocaburary.id_2_token[token]
                counting += 1
            else:
                s_token = "*"
            pattern.append(s_token)
            w = vocaburary.id_2_token[token]
            origin.append(w)
        if counting > 0:
            pattern = " ".join(pattern)
            origin = " ".join(origin)
            f_pattern.write(u"%s\t|\t%s\n" % (pattern, origin))
    f_pattern.close()
Exemplo n.º 17
0
def debug_test_set():
    clf = pickle_load(os.path.join('models6', 'strong_classifier_276.pkl'))
    trainer = Trainer(mp_pool=Pool(8))
    trainer.load_data('data')
    print("Strong classifier test metrics:")
    predictions = clf.classify_batch(trainer.test_ds.X_integral)
    print(Metrics(predictions, trainer.test_ds.y))
Exemplo n.º 18
0
def find_papers(args):
    y, m, d = args.report_date.split('-')
    y = int(y)
    m = int(m)
    d = int(d)

    if 'none' in args.filter_primary_category:
        pcates = ['cs.CV', 'cs.AI', 'cs.LG', 'stat.ML', 'cs.RO']
    elif '+' in args.filter_primary_category:
        pcates = args.filter_primary_category.replace(' ', '').split('+')
    else:
        pcates = [args.filter_primary_category]
    assert type(pcates) == list, 'typeError: primary category list'

    sort_by = date_sort_by(args.date_sort_by)

    db = pickle_load(args.db_path)
    print('database has {} entries'.format(len(db)))

    result = {}
    for k, v in db.items():
        if (y == v[sort_by].tm_year) and (m == v[sort_by].tm_mon) and (
                d == v[sort_by].tm_mday):
            if v['arxiv_primary_category']['term'] in pcates:
                result[k] = v
    print('found {} entries'.format(len(result)))
    return result, pcates
Exemplo n.º 19
0
def load_model_with_weights(fname, custom_layers=None):
    """
    Loads model and scaler from .zip
    """
    if custom_layers is None:
        custom_layers = {}

    with zipfile.ZipFile(fname, mode='r') as zipf:
        # First, instantiate model from config
        json_str = zipf.read('config.json')
        config = json.loads(json_str)
        metadata = config['metadata']
        del config['metadata']
        model = keras.models.model_from_config(config,
            custom_objects=custom_layers)

        # Unzip weights.hdf to a temporary file and load it
        wf, tmpfname = tempfile.mkstemp()
        # weights - extract to tmpfname
        with open(tmpfname, 'w') as wf:
            with zipf.open('weights.hdf5') as zwf:
                wf.write(zwf.read())
        model.load_weights(tmpfname)

        # scaler - extract to tmpfname
        with open(tmpfname, 'w') as sf:
            with zipf.open('scaler.pickle') as zsf:
                sf.write(zsf.read())
        scaler = utils.pickle_load(sf.name)
        os.remove(tmpfname)

    return model, scaler, metadata
def load_input_data(data_folder, data_kind, level, use_text_input,
                    use_aspect_input, use_aspect_text_input):
    dirname = os.path.join('./data', data_folder) # ./data/car/
    input_data = []
    if use_text_input:
        input_data.append(pickle_load(os.path.join(dirname, '{}_{}_input.pkl'.format(data_kind, level))))
    if use_aspect_input:
        input_data.append(pickle_load(os.path.join(dirname, '{}_aspect_input.pkl'.format(data_kind))))
    if use_aspect_text_input:
        input_data.append(pickle_load(os.path.join(dirname, '{}_{}_aspect_input.pkl'.format(data_kind, level))))

    if len(input_data) == 1:
        input_data = input_data[0]
    if len(input_data) == 0:
        raise Exception('No Input!')
    return input_data
Exemplo n.º 21
0
def eval_deprecated():
    '''partially deprecated'''
    k = opt.k
    train_node = TrainNode(opt)
    idx2bin = {}
    dataset = utils.load_data('query') ### use dataset eventually
    dataset = dataset.to(device)
    dsnode_path = opt.dsnode_path + str(opt.n_clusters)
    #print('dsnode path {}'.format(dsnode_path))
    dsnode = utils.pickle_load(dsnode_path)
    print('dsnode {}'.format(dsnode))
    
    train_node.train(dataset, dsnode, idx2bin)
    #idx (of query) in entire dataset, bin is idx of leaf bin.
    
    eval_root = train_node.create_eval_tree()
    idx2bin = eval_root.idx2bin
    #eval root should contain dict for answers set indices and bin #, for evaluation.
        
    #serialize
    print('train.py - serializing model evaluation tree...')
    eval_root_path = osp.join(opt.data_dir, 'model_eval_root') ###########
    utils.pickle_dump(eval_root, eval_root_path)

    ## evaluate ##    
    queryset = utils.load_data('query')
    neighbors = utils.load_data('answers')
    acc, probe_count = eval_model(eval_root, queryset, neighbors, opt)
    print('train.py - Query set prediction acc {} probe count {}'.format(acc, probe_count))
Exemplo n.º 22
0
def run_validation_cases(validation_keys_file,
                         model_file,
                         training_modalities,
                         labels,
                         hdf5_file,
                         output_label_map=False,
                         output_dir=".",
                         threshold=0.7,
                         overlap=16,
                         permute=False):
    validation_indices = pickle_load(validation_keys_file)
    model = load_old_model(model_file)
    data_file = tables.open_file(hdf5_file, "r")
    for index in validation_indices:
        # if 'subject_ids' in data_file.root:
        #     case_directory = os.path.join(output_dir, data_file.root.subject_ids[index].decode('utf-8'))
        # else:
        case_directory = os.path.join(output_dir,
                                      "validation_case_{}".format(index))
        run_validation_case(data_index=index,
                            output_dir=case_directory,
                            model=model,
                            data_file=data_file,
                            training_modalities=training_modalities,
                            output_label_map=output_label_map,
                            labels=labels,
                            threshold=threshold,
                            overlap=overlap,
                            permute=permute)
    data_file.close()
def load_model(model_name):
    """
    """
    # Import model (HACK)
    sys.path.append(os.path.join(paths["trainer"], model_name))
    from capsnet import CapsuleNet

    sys.path.pop()

    # Load model
    try:
        model_params = pickle_load(
            os.path.join(paths["trainer"], model_name, "outs",
                         "model_params.pkl"))
    except FileNotFoundError:
        return None, None

    dataset = model_params.pop("dataset")
    _, model = CapsuleNet(**model_params)
    model_params["dataset"] = dataset

    # Clean modules (HACK)
    del sys.modules["capsnet"]

    return model, model_params
Exemplo n.º 24
0
    def __init__(self, config, sess):
        self.config = config
        self.sess = sess
        self.log = utils.Log()

        self.episode_length = int(self.config['META']['EPISODE_LENGTH'])
        self.action_dim = int(self.config['META']['ACTION_DIM'])
        self.statistic_dim = int(self.config['META']['STATISTIC_DIM'])
        self.reward_dim = int(self.config['META']['REWARD_DIM'])
        self.discount_factor = float(self.config['META']['DISCOUNT_FACTOR'])
        self.log_step = int(self.config['META']['LOG_STEP'])
        self.sample_episodes_per_batch = int(self.config['TPGR']['SAMPLE_EPISODES_PER_BATCH'])
        self.sample_users_per_batch = int(self.config['TPGR']['SAMPLE_USERS_PER_BATCH'])
        self.learning_rate = float(self.config['TPGR']['LEARNING_RATE'])
        self.l2_factor = float(self.config['TPGR']['L2_FACTOR'])
        self.entropy_factor = float(self.config['TPGR']['ENTROPY_FACTOR'])
        self.child_num = int(self.config['TPGR']['CHILD_NUM'])
        self.boundary_rating = float(self.config['ENV']['BOUNDARY_RATING'])
        self.eval_batch_size = int(self.config['TPGR']['EVAL_BATCH_SIZE'])
        self.train_batch_size = self.sample_episodes_per_batch * self.sample_users_per_batch

        self.result_file_path = '../data/result/result_log/' + time.strftime('%Y%m%d%H%M%S') + '_' + self.config['ENV']['ALPHA'] + '_' + self.config['ENV']['RATING_FILE']
        self.rnn_file_path = '../data/run_time/%s_rnn_model_%s' % (self.config['ENV']['RATING_FILE'], self.config['TPGR']['RNN_MODEL_VS'])
        self.load_model = self.config['TPGR']['LOAD_MODEL'] == 'T'
        self.load_model_path = '../data/model/%s_tpgr_model_%s' % (self.config['ENV']['RATING_FILE'], self.config['TPGR']['MODEL_LOAD_VS'])
        self.save_model_path = '../data/model/%s_tpgr_model_%s' % (self.config['ENV']['RATING_FILE'], self.config['TPGR']['MODEL_SAVE_VS'].split('s')[0])
        self.tree_file_path = '../data/run_time/%s_tree_model_%s_%s_c%d_%s' % (self.config['ENV']['RATING_FILE'], self.config['TPGR']['CLUSTERING_VECTOR_TYPE'].lower(), self.config['TPGR']['CLUSTERING_TYPE'].lower(), self.child_num, self.config['TPGR']['TREE_VS'])
        self.hidden_units = [int(item) for item in self.config['TPGR']['HIDDEN_UNITS'].split(',')] if self.config['TPGR']['HIDDEN_UNITS'].lower() != 'none' else []

        self.forward_env = Env(self.config)
        self.user_num, self.item_num, self.r_matrix, self.user_to_rele_num = self.forward_env.get_init_data()

        self.boundry_user_id = self.forward_env.boundry_user_id
        self.test_user_num = int(self.user_num/self.eval_batch_size)*self.eval_batch_size-self.boundry_user_id
        self.bc_dim = int(math.ceil(math.log(self.item_num, self.child_num)))

        self.env = [Env(self.config, self.user_num, self.item_num, self.r_matrix, self.user_to_rele_num) for i in range(max(self.train_batch_size, self.eval_batch_size * int(math.ceil(self.user_num / self.eval_batch_size))))]

        ###
        self.rnn_input_dim = self.action_dim + self.reward_dim + self.statistic_dim
        self.rnn_output_dim = self.rnn_input_dim
        self.layer_units = [self.statistic_dim + self.rnn_output_dim] + self.hidden_units + [self.child_num]

        self.is_eval = False
        self.qs_mean_list = []
        self.storage = []

        self.training_steps = 0
        if self.load_model:
            self.training_steps = int(self.config['TPGR']['MODEL_LOAD_VS'].split('s')[-1])

        tree_model = utils.pickle_load(self.tree_file_path)
        self.id_to_code, self.code_to_id = (tree_model['id_to_code'], tree_model['code_to_id'])
        self.aval_val = self.get_aval()
        self.log.log('making graph')
        self.make_graph()
        self.sess.run(tf.global_variables_initializer())
        self.log.log('graph made')
Exemplo n.º 25
0
def predict(model, model_prediction_dir, label):
    print("Predicting...")
    model.eval()
    try:
        data_file = tables.open_file(config["pred_data_file"], "r")
        #load test idx
        test_idxs = pickle_load(config["test_file"])
        # for index in range(len(data_file.root.data)):
        for index in test_idxs:
            if "subject_ids" in data_file.root:
                case_dir = os.path.join(model_prediction_dir, data_file.root.subject_ids[index].decode('utf-8'))
            else:
                case_dir = os.path.join(output_dir, "pred_case_{}".format(index))
            if not os.path.exists(case_dir):
                os.makedirs(case_dir)
                
            data_array = np.asarray(data_file.root.data[index])[np.newaxis]
            affine = data_file.root.affine[index]
            
            assert data_array.shape == config["input_shape"], "Wrong data shape!Expected {0}, but got {1}.".format(config["input_shape"], data_array.shape)
            if config["cuda_devices"] is not None:
                inputs = torch.from_numpy(data_array)
                inputs = inputs.type(torch.FloatTensor)
                inputs = inputs.cuda()
            with torch.no_grad():
                if config["VAE_enable"]:
                    outputs, distr = model(inputs)
                else:
                    outputs = model(inputs)   
            output_array = np.asarray(outputs.tolist())
            output_array = output_array > 0.5
            #for whole labels ben
            # output_image_whole = nib.Nifti1Image(output_array[0][0].astype('int'), affine)
            # output_image_enhancing = nib.Nifti1Image(output_array[0][1].astype('int'), affine)
            # output_image_core = nib.Nifti1Image(output_array[0][2].astype('int'), affine)
            # output_image_whole.to_filename(os.path.join(case_dir, "prediction_label_core.nii.gz"))
            # output_image_enhancing.to_filename(os.path.join(case_dir, "prediction_label_edema.nii.gz"))
            # output_image_core.to_filename(os.path.join(case_dir, "prediction_label_enhancing.nii.gz"))
            #for single label
            output_image = nib.Nifti1Image(output_array[0][0].astype('int'), affine)
            output_image.to_filename(os.path.join(case_dir, "prediction_label_{}.nii.gz".format(label)))
            #combine all the labels
            if len(os.listdir(case_dir))==3 and label==4:
                output_image_combined = np.zeros(output_array[0][0].shape)
                #get other two segs,i.e prediction_label_1.nii.gz, prediction_label_2.nii.gz
                output_image_core = nib.load(os.path.join(case_dir, "prediction_label_1.nii.gz")).get_data()
                output_image_whole = nib.load(os.path.join(case_dir, "prediction_label_2.nii.gz")).get_data()

                #merge 3 segs into 1
                output_image_combined[output_image_whole == 1] = 2
                output_image_combined[output_image_core == 1] = 1
                output_image_combined[output_array[0][0].astype('int') == 1] = 4
                output_image_combined = nib.Nifti1Image(output_image_combined, affine)
                output_image_combined.to_filename(os.path.join(case_dir, "prediction_label_seg_whole.nii.gz"))

    finally:
        data_file.close()
Exemplo n.º 26
0
    def get_style_images(self, _id=""):
        fname = 'style_imgs_{}'.format(self.rst)

        if _id:
            fname += "_" + str(_id)

        return utils.pickle_load(
            os.path.join(self.base_dir,
                         'dataset/{}.pkl'.format(fname)))[:self.max_size]
Exemplo n.º 27
0
def main():
    args = parse_args()
    set_seeds(args.seed)

    # Because I keep forgetting to select the correct env
    assert args.env in args.history_file

    env = create_env(args.env)
    print("Loading environment", args.env)
    print("Action space:", env.action_space)
    print("Observation space:", env.observation_space)

    history = pickle_load(args.history_file)
    if args.best_agent:
        agents = get_best_agents_dedup(history, 1)
        eval_outfile = 'eval_best_agent'
    elif args.best_agent_ensemble:
        agents = get_best_agents_dedup(history, 3)
        eval_outfile = 'eval_best_agent_ensemble'
    elif args.last_agent:
        agents = [history['agents'][-1][0]]
        eval_outfile = 'eval_last_agent'
    elif args.last_agent_ensemble:
        agents = history['agents'][-1][:3]
        eval_outfile = 'eval_last_agent_ensemble'
    else:
        assert False

    dim_in, dim_out = get_input_output_dim(env)
    policies = [a.get_policy(dim_in, dim_out) for a in agents]
    policy = EnsemblePolicy(policies)

    if args.evaluate:
        total_rew = []
        for i in range(args.evaluate):
            env.seed(args.seed + i)
            ep_rew, ep_len = run_episode(env, policy)
            total_rew.append(ep_rew)
            print(
                f"ep = {i + 1}/{args.evaluate} reward = {ep_rew:.2f} len = {ep_len}"
            )
        print(
            f"mean_reward = {np.mean(total_rew):.2f} +- {np.std(total_rew):.2f}"
        )
        np.save(Path(args.history_file).with_name(eval_outfile), total_rew)

    elif args.render:
        for i in count():
            env.seed(args.seed + i)
            ep_rew, ep_len = run_episode(env, policy, render_human)
            print(f"ep = {i + 1}/inf reward = {ep_rew:.2f} len = {ep_len}")

    elif args.gif:
        render_gif = RenderGif()
        env.seed(args.seed)
        run_episode(env, policy, render_gif)
        render_gif.save(args.gif)
Exemplo n.º 28
0
def debug_build_features(count_only=False):
    all_features = Trainer.build_features(2, 2)
    print(len(all_features))
    if count_only:
        return
    for feature in all_features:
        print(feature)
    pickle_save(all_features, "all_features.pkl")
    feature_reloaded = pickle_load('all_features.pkl')
    assert feature_reloaded == all_features
Exemplo n.º 29
0
 def __init__(self,
              classifier_filename,
              detect_window_size=24,
              detect_window_step_size=24,
              n_process=1):
     self.classifier = pickle_load(classifier_filename)
     self.window_size = detect_window_size
     self.step_size = detect_window_step_size
     self.n_process = n_process
     self.pool = None if self.n_process <= 1 else Pool(self.n_process)
Exemplo n.º 30
0
def get_validation_split(data_file,
                         training_file,
                         validation_file,
                         data_split=0.8,
                         overwrite=False):
    """
    """
    if overwrite or not os.path.exists(training_file):
        print("Creating validation split...")
        nb_samples = data_file.root.data.shape[0]
        sample_list = list(range(nb_samples))
        training_list, validation_list = split_list(sample_list,
                                                    split=data_split)
        pickle_dump(training_list, training_file)
        pickle_dump(validation_list, validation_file)
        return training_list, validation_list
    else:
        print("Loading previous validation split...")
        return pickle_load(training_file), pickle_load(validation_file)
Exemplo n.º 31
0
    def __init__(self,
                 base_dir,
                 batch_size,
                 mode=1,
                 cls=1,
                 prune=None,
                 de_norm=False):
        TRAIN = 1
        TEST = 2

        self.base_dir = base_dir
        self.batch_size = batch_size
        ds_dir = os.path.join(self.base_dir, 'dataset/class_{}'.format(cls))
        if mode == TRAIN:
            self.x = utils.pickle_load(ds_dir + '/imgs_train.pkl')
            self.y = utils.pickle_load(ds_dir + '/marks_train.pkl')
        elif mode == TEST:
            self.x = utils.pickle_load(ds_dir + '/imgs_test.pkl')
            self.y = utils.pickle_load(ds_dir + '/marks_test.pkl')
        else:
            raise ("Invalid option, should be one {} or {}".format(
                TRAIN, TEST))

        if de_norm:
            self.x = utils.de_norm(self.x)
            self.y = utils.de_norm(self.y)

        self.labels = np.array(
            [1 if np.sum(mask) > 0 else 0 for mask in self.y])

        if prune is not None:
            self.x, self.y, self.labels = utils.prune(self.x, self.y,
                                                      self.labels, prune)

        self.x = utils.norm(self.x)
        self.y = utils.norm(self.y)
        self.classes = np.unique(self.labels)
        self.per_class_ids = {}
        ids = np.array(range(len(self.x)))
        for c in self.classes:
            self.per_class_ids[c] = ids[self.labels == c]

        print(Counter(self.labels))
Exemplo n.º 32
0
def load_scaler(fname):
    """
    Load just the scaler from a model .zip
    """
    with zipfile.ZipFile(fname, mode='r') as zipf:
        # scaler - extract to tmpfname
        wf, tmpfname = tempfile.mkstemp()
        with open(tmpfname, 'w') as sf:
            with zipf.open('scaler.pickle') as zsf:
                sf.write(zsf.read())
        scaler = utils.pickle_load(sf.name)
        os.remove(tmpfname)
    return scaler
Exemplo n.º 33
0
    def __init__ (self, bus_name, bus_path):
        self.current_status = None
        self.app_ids = []
        self.applications = {}
        self.user_ids = []
        self.users = []
        self.status = {}
        self.updated_timestamp = None
        self.notification_num = 10 
        self.refresh_interval = 60
        self.refresh_hanedler = None
        self.notification = []
        self.session = None
        self.session_code = None
        self.api_key = '9103981b8f62c7dbede9757113372240'
        self.secret = '4cd1dffd6bf0d466bf9ffcf2dcf7805c'
        self.office_status = defs.NO_LOGIN
        self.last_nid = 0
        self.prepare_directories ()

        path = self.local_data_dir + "/cache.pickle"
        cache = utils.pickle_load (path)
        if cache != None:
            for skey in cache:
                setattr (self, skey, cache[skey])

        try:
            dbus.service.Object.__init__ (self, bus_name, bus_path)
        except KeyError:
            logging.debug ("DBus interface registration failed - other wallbox running somewhere")
            pass

        path = self.local_data_dir + "/auth.pickle"
        fb = utils.restore_auth_status (path, self.api_key, self.secret)
        if fb != None:
            self.uid = fb.uid
            self.status_changed (defs.IS_LOGIN)
            self.fb = fb

            if self.uid not in self.user_ids:
                self.user_ids.append (self.uid)

            self.refresh_hanedler = gobject.timeout_add (self.refresh_interval * 1000, self._refresh)
        else:
            self.status_changed (defs.NO_LOGIN)
Exemplo n.º 34
0
def latlng_to_shp(cityid):
    """
    Converts sampling CSV (lat, long) into shapefiles
    """
    if not override and path.isfile(path.join(LATLNGS_SHP_DIR, str(cityid) + '.shp')):
        print '%s.shp already exists. Skipping' % cityid
        return

    print 'Processing latlng data for city %s...' % cityid
    data = pickle_load(path.join(LATLNGS_DIR, str(cityid)))
    print 'Data loaded'

    writer = shapefile.Writer(shapefile.POINT)
    writer.autoBalance = 1
    writer.field('price', 'N')

    for make in MAKES:
        writer.field(make[:10], 'N', 19, 9)

    count = 0
    for (lat, lng) in data:
        writer.point(lng, lat)

        record = {
            'price': data[(lat, lng)]['price']
        }
        for make in MAKES:
            if 'makes' in data[(lat, lng)]:
                record[make[:10]] = str(data[(lat, lng)]['makes'].get(make, 0.0))[:19]
            else:
                record[make[:10]] = 0.0
        writer.record(**record)
        count += 1
        if (count % 5000) == 0:
            print '%s entries copied' % count

    shp_path = path.join(LATLNGS_SHP_DIR, str(cityid))

    print 'Writing shapefile...'
    writer.save(shp_path)
    print 'Defining projection...'
    if projection: arcpy.DefineProjection_management(shp_path + '.shp', PROJECTION_FILE)
    print 'Written shapefile to %s' % shp_path
Exemplo n.º 35
0
    def _load_state(self):
        obj = utils.pickle_load(self.wdir+"/layerwisetrainer_state")
        self.layer_index = obj["layer_index"]
        self.iter = obj["iter"]
        self.loss = obj["loss"]
        self.mlp_best = obj["mlp_best"]
        self.mlp_crrnt = obj["mlp_crrnt"]
        #self.iters_without_impr = obj["iters_without_impr"]
        self.train_sets.set_state(obj["train_sets"])

        out = StringIO.StringIO()
        print >>out, "\n********** Resuming from **********"
        print >>out, "layer_index", self.layer_index
        print >>out, "iter", self.iter
        print >>out, "loss", self.loss
        print >>out, "mlp_best", self.mlp_best
        print >>out, "mlp_crrnt", self.mlp_crrnt
        self.logger.info(out.getvalue())

        load(self.model, self.mlp_crrnt, gradients=True)
Exemplo n.º 36
0
    def _load_state(self):
        obj = utils.pickle_load(self.wdir+"/trainer_state")
        self.iter = obj["iter"]
        self.done = obj["done"]
        self.loss = obj["loss"]
        self.rate = obj["rate"]
        self.mlp_best = obj["mlp_best"]
        self.halving = obj["halving"]
        self.wasAccepted = obj["wasAccepted"]
        self.train_sets.set_state(obj["train_sets"])
        self.valid_sets.set_state(obj["valid_sets"])

        out = StringIO.StringIO()
        print >>out, "\n********** Resuming from **********"
        print >>out, "iter", self.iter
        print >>out, "done", self.done
        print >>out, "loss", self.loss
        print >>out, "rate", self.rate
        print >>out, "mlp_best", self.mlp_best
        print >>out, "halving", self.halving
        print >>out, "wasAccepted", self.wasAccepted
        self.logger.info(out.getvalue())
Exemplo n.º 37
0
def regist_amakan():
    amazon_url_list = utils.pickle_load('amazon_url_list.pickel')

    for url in amazon_url_list[22:]:
        try:
            # 個別ページにアクセス
            driver.get(amakan_product_url.format(url))
            print(driver.find_element_by_css_selector('.card-header').text)

            # 「読んだ」ボタンを取得
            elem = driver.find_element_by_css_selector('.wis-numbers > .waves-effect')

            # 既に「読んだ」がついていたらスキップ
            if 'active' not in elem.get_attribute('class'):
                elem.click()
        except Exception as e:
            # なんか失敗したらとりあえずエラーだけ出しておく
            print(url)
            print(e)

        # 感謝のsleep1秒
        sleep(1)
Exemplo n.º 38
0
def compute_resampling(city_id):
    log('Computing resample points for %s', city_id)

    output_dir = join(RESAMPLE_DIR, str(city_id))
    if not exists(output_dir):
        os.makedirs(output_dir)
        log('Created %s', output_dir)
    else:
        log('%s already exists!', output_dir)

    # find the county fips codes
    city = pickle_load(join(CITY_DIR, str(city_id)))
    city_name = city['name'].strip().title()
    state_name = city['state'].strip().title()
    log('Looking up state info for %s...', state_name)

    if 'Utah' in state_name:
        state_name = 'UT'  # weird ass corner case..

    state = us.states.lookup(state_name)
    fips_set = fips_codes_dict[(city['name'].lower(), state.fips)]
    log('County fips codes for %s, %s: %s', city_name, state.name, fips_set)

    road_shps = fetch_road_shp(output_dir, fips_set)

    all_roads_shp = join(output_dir, 'all_roads.shp')
    if not exists(all_roads_shp):
        log('Merging %s into %s', list(road_shps), all_roads_shp)
        arcpy.Merge_management(list(road_shps), all_roads_shp)
        log('Merge complete')

    boundary_shp = join(CITY_BOUNDARY_SHP_DIR, 'tl_2014_%s_place.shp' % state.fips)
    city_boundary_shp = join(output_dir, 'city_boundary.shp')
    if not exists(city_boundary_shp):
        log('Fetching city boundary from %s...', boundary_shp)
        query = "\"NAME\" Like '%%%s%%'" % city['name'].title()
        log('Using query %s', query)
        arcpy.MakeFeatureLayer_management(boundary_shp, 'temp', query)
        arcpy.CopyFeatures_management('temp', city_boundary_shp)
        log('City boundary %s fetched', city_boundary_shp)

    all_roads_clipped_shp = join(output_dir, 'all_roads_clipped.shp')
    if not exists(all_roads_clipped_shp):
        log('Clipping all_roads into city boundary...')
        arcpy.Clip_analysis(all_roads_shp, city_boundary_shp, all_roads_clipped_shp)
        log('Clip complete')

    all_roads_buffered_shp = join(output_dir, 'all_roads_buffered.shp')
    if not exists(all_roads_buffered_shp):
        log('Buffering into %s...', all_roads_buffered_shp)
        arcpy.Buffer_analysis(all_roads_clipped_shp, all_roads_buffered_shp, '20 Meter')
        log('Buffering complete')

    sample_grid_shp = join(output_dir, 'sample_grid.shp')
    sample_grid_labels_shp = join(output_dir, 'sample_grid_label.shp')
    if not exists(sample_grid_shp):
        log('Creating fishnet...')
        desc = arcpy.Describe(all_roads_buffered_shp)
        arcpy.CreateFishnet_management(
            sample_grid_shp,
            str(desc.extent.lowerLeft),
            str(desc.extent.XMin) + ' ' + str(desc.extent.YMax + 10),
            '0.0003', '0.0003', '0', '0', str(desc.extent.upperRight),
            'LABELS',
            '#',
            'POLYLINE'
        )
        log('Fishnet creation complete')

    sample_grid_clipped_shp = join(output_dir, 'sample_grid_clipped.shp')
    if not exists(sample_grid_clipped_shp):
        log('Clipping fishnet to buffered roads...')
        arcpy.Clip_analysis(sample_grid_labels_shp, all_roads_buffered_shp,
                            sample_grid_clipped_shp)
        log('Clip fishnet to buffered roads complete')

    '''
    sample_grid_erased_shp = join(output_dir, 'sample_grid_erased.shp')
    if not exists(sample_grid_erased_shp):
        print 'Projecting using', PROJECTION_FILE
        # arcpy.DefineProjection_management(sample_grid_clipped_shp, PROJECTION_FILE)
        print 'Erasing existing samples from fishnet...'
        samples_shp = join(LATLNGS_SHP_DIR, str(city_id) + '.shp')
        arcpy.Erase_analysis(sample_grid_clipped_shp, samples_shp,
                            sample_grid_erased_shp, '20 Meter')
        print 'Erasing existing samples from fishnet complete'
    '''

    log('FINISHED computing resampling for %s: %s, %s', city_id, city_name, state.name)
    return sample_grid_clipped_shp
    log('======================END==========================')
Exemplo n.º 39
0
import sys
from os import path
import time

from census import Census, ALL
from fcc.census_block_conversions import census_block_dict

import shapefile
from constants import CACHE_DIR, LATLNGS_SHP_DIR
from utils import pickle_save, pickle_load

YEAR = 2010
try:
    BLOCK_DATA_CACHE = pickle_load(path.join(CACHE_DIR, 'block_data'))
except Exception as e:
    print 'CANNOT LOAD BLOCK DATA CACHE: ', str(e)
    sys.exit(1)
CENSUS_DATA_CACHE = pickle_load(path.join(CACHE_DIR, 'census_data_cache'))
CENSUS_API_KEY = '3a7f0def09f356fa48f84558824b09a009c1f6e2'
INCOME_VARIABLE = 'B06011_001E'
AGE_VARIABLE = 'B05004_004E'
client = Census(CENSUS_API_KEY).acs


def get_block_data(lat, lng, data):
    if not data.get((lat, lng)):
        data[(lat, lng)] = census_block_dict(lat, lng, year=YEAR)
    return data[(lat, lng)]


def get_variable(lat, lng, data, var=INCOME_VARIABLE):
for cityid in CITY_IDS:
    city = City(cityid)
    print 'Processing', city.name, city.state
    name = city.name.title()
    state = city.state.title()
    area = city.area
    samples = city.samples

    polygon_path = path.join(SAMPLES_DIR, 'polygons', str(cityid))
    polygon = pickle.load(open(polygon_path, 'r'))
    sample_area = polygon['area']

    coverage = float(sample_area) / float(area)

    moran_indices = pickle_load(path.join(CACHE_DIR, 'moran_indices'))

    digest.append({
        'cityid': cityid,
        'name': name,
        'state': state,
        'area': area,
        'samples': samples,
        'sample_area': sample_area,
        'coverage': coverage,
        'moran_index': moran_indices[cityid]
    })

pickle.dump(digest, open(path.join(STATS_DIR, 'all_cities'), 'wb'))
print 'FINISHED'
Exemplo n.º 41
0
def compute_city(cityid):
    data = pickle_load(path.join(CACHE_DIR, 'MA_census_data'))
    print data
Exemplo n.º 42
0
from os import path
from collections import defaultdict

import shapefile

from utils import pickle_save, pickle_load
from constants import CACHE_DIR, LATLNGS_SHP_DIR

try:
    BLOCK_DATA_CACHE = pickle_load(path.join(CACHE_DIR, 'block_data'))
except Exception as e:
    print 'CANNOT LOAD BLOCK DATA CACHE: ', str(e)
    BLOCK_DATA_CACHE = {}


def get_block_id(data):
    return data['Block']['FIPS'][:-1]

def group_price_by_block(cityid):
    reader = shapefile.Reader(path.join(LATLNGS_SHP_DIR, str(cityid)))

    changed = False
    block_total = {}
    block_data = BLOCK_DATA_CACHE[cityid]

    for sr in reader.shapeRecords():
        (lng, lat) = sr.shape.points[0]
        data = block_data.get((lat, lng))

        if not data:
            continue
import sys

import shapefile

from constants import CACHE_DIR, SHAPEFILE_DIR, CONVEXHULL_DIR, CITY_IDS, PROJECTION_FILE
from utils import pickle_load, pickle_save


def format_zipcode(zipcode):
    zipcode = str(zipcode)
    while len(zipcode) < 5:
        zipcode = '0' + zipcode
    return zipcode
    
if __name__=='__main__':
    all_cars_data = pickle_load(path.join(CACHE_DIR, 'all_cars_data'))
    all_census_data = pickle_load(path.join(CACHE_DIR, 'all_census_data'))
    reader = shapefile.Reader(path.join(CACHE_DIR, 'zipcodes/zip_poly/zip_poly'))
    
    zipcodeList = {}
    for key, data in all_cars_data.iteritems():
        zipcode = data[94]
        cityid = int(data[95])
        formatted_zipcode = format_zipcode(zipcode)

        if formatted_zipcode not in zipcodeList:
            zipcodeList[formatted_zipcode] = 1
   

    print 'Reading polygon file...'
    zip_poly = {}
def comp_sketch(matrix, objective, load_N=False, save_N=False, N_dir='../N_file/', **kwargs):
    """
    Given matrix A, the function comp_sketch computes a sketch for A and performs further operations on PA.
    It returns the total running time and the desired quantity.

    parameter:
        matrix: a RowMatrix object storing the matrix [A b]
        objective: either 'x' or 'N'
            'x': the function returns the solution to the problem min_x || PA[:,:-1]x - PA[:,-1] ||_2
            'N': the function returns a square matrix N such that PA[:,:-1]*inv(N) is a matrix with orthonormal columns
        load_N: load the precomputed N matrices if possible (it reduces the actual running time for sampling sketches)
        save_N: save the computed N matrices for future use
        sketch_type: either 'projection' or 'sampling'
        projection_type: cw, gaussian, rademacher or srdht
        c: projection size
        s: sampling size (for sampling sketch only)
        k: number of independent trials to run
    """

    sketch_type = kwargs.get('sketch_type')

    if not os.path.exists(N_dir):
        os.makedirs(N_dir)

    if objective == 'x':
        
        if sketch_type == 'projection':
            projection = Projections(**kwargs)
            t = time.time()
            x = projection.execute(matrix, 'x', save_N)
            t = time.time() - t

            if save_N:
                logger.info('Saving N matrices from projections!')
                N = [a[0] for a in x]
                x = [a[1] for a in x]
                # saving N
                filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k')))+ '.dat'
                data = {'N': N, 'time': t}
                pickle_write(filename,data)
 
        elif sketch_type == 'sampling':
            s = kwargs.get('s')
            new_N_proj = 0
            N_proj_filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) +'.dat'

            if load_N and os.path.isfile(N_proj_filename):
                logger.info('Found N matrices from projections, loading them!')
                N_proj_filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) +'.dat'
                result = pickle_load(N_proj_filename)
                N_proj = result['N']
                t_proj = result['time']
            else: # otherwise, compute it
                t = time.time()
                projection = Projections(**kwargs)
                N_proj = projection.execute(matrix, 'N')
                t_proj = time.time() - t
                new_N_proj = 1

            sampling = Sampling(N=N_proj)
            t = time.time()
            x = sampling.execute(matrix, 'x', s, save_N )
            t = time.time() - t + t_proj

            if save_N and new_N_proj:
                logger.info('Saving N matrices from projections!')
                #filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat'
                data = {'N': N_proj, 'time': t_proj}
                pickle_write(N_proj_filename,data)

            if save_N:
                logger.info('Saving N matrices from sampling!')
                N = [a[0] for a in x]
                x = [a[1] for a in x]
                filename = N_dir + 'N_' + matrix.name + '_sampling_s' + str(int(kwargs.get('s'))) + '_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat'
                data = {'N': N, 'time': t}
                pickle_write(filename,data)

        else:
            raise ValueError('Please enter a valid sketch type!')
        return x, t

    elif objective == 'N':
        if sketch_type == 'projection':
            N_proj_filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat'

            if load_N and os.path.isfile(N_proj_filename):
                logger.info('Found N matrices from projections, loading them!')
                result = pickle_load(N_proj_filename)
                N = result['N']
                t = result['time']
            else:
                t = time.time()
                projection = Projections(**kwargs)
                N = projection.execute(matrix, 'N')
                t = time.time() - t

                if save_N:
                    logger.info('Saving N matrices from projections!')
                    data = {'N': N, 'time': t}
                    pickle_write(N_proj_filename,data)

        elif sketch_type == 'sampling':
            s = kwargs.get('s')
            new_N_proj = 0
            new_N_samp = 0

            N_samp_filename = N_dir + 'N_' + matrix.name + '_sampling_s' + str(int(kwargs.get('s'))) + '_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat'
            N_proj_filename = N_dir + 'N_' + matrix.name + '_projection_' + kwargs.get('projection_type') + '_c' + str(int(kwargs.get('c'))) + '_k' + str(int(kwargs.get('k'))) + '.dat'

            if load_N and os.path.isfile(N_samp_filename):
                logger.info('Found N matrices from sampling, loading them!')
                result = pickle_load(N_samp_filename)
                N = result['N']
                t = result['time']

            elif load_N and os.path.isfile(N_proj_filename):
                logger.info('Found N matrices from projections, loading them!')
                result = pickle_load(N_proj_filename)
                N_proj = result['N']
                t_proj = result['time']

                sampling = Sampling(N=N_proj)
                t = time.time()
                N = sampling.execute(matrix, 'N', s)
                t = time.time() - t + t_proj
                new_N_samp = 1

            else:
                t = time.time()
                projection = Projections(**kwargs)
                N_proj = projection.execute(matrix, 'N')
                t_proj = time.time() - t
                new_N_proj = 1

                t = time.time()
                sampling = Sampling(N=N_proj)
                N = sampling.execute(matrix, 'N', s)
                t = time.time() - t + t_proj
                new_N_samp = 1

            if save_N and new_N_proj:
                logger.info('Saving N matrices from projections!')
                data = {'N': N_proj, 'time': t_proj}
                pickle_write(N_proj_filename,data)

            if save_N and new_N_samp:
                logger.info('Saving N matrices from sampling!')
                data = {'N': N, 'time': t}
                pickle_write(N_samp_filename,data)

        else:
            raise ValueError('Please enter a valid sketch type!')
        return N, t
    else:
        raise ValueError('Please enter a valid objective!')