Пример #1
0
def delta():
    asOfDay = '2018-10-11'
    loader = load_data.tdx_loader()
    
    print('load model...', datetime.datetime.now())
    model = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES + '_model_mkm.pickle')
    score_matrix = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES + '_df_mean.pickle')
    
    print('load sec list...', datetime.datetime.now())
    df_sec = pd.read_excel(DATA_FOLDER + REPORT_NAME_PD, converters={'code': str, 'market':int})
    #print(df_sec)
    df_sec = df_sec.set_index('code')
    
    df_result = df_sec.copy()
    df_result[asOfDay] = -1.000
             
    for index, row in df_sec.iterrows():
        try:
            df = prepareExampleAsOfDay(row['market'],index ,asOfDay, loader)
            groups = model.predict(df['X'].tolist())
        
            group =groups[0]
            score = getScore(score_matrix, group)
        except:
            df_result[asOfDay][index] = -1
            print('%s,%s,%f' % (index,row['name'], -1))
        else:
            df_result[asOfDay][index] = score
            print('%s,%s,%f' % (index,row['name'], score))
    
    writer = pd.ExcelWriter(DATA_FOLDER + REPORT_NAME_NEW)
    df_result.to_excel(writer,'Report')
    writer.save()
Пример #2
0
    def plot_query_sets(top_n, detection_cnt, quantile_size):
        """
        Plots and saves evaluation as .svg and .png files in "./output/evaluation/" folder

        :param top_n: Number of scores from high to low to be taken in part of the evaluation
        :param detection_cnt: Number of landmarks detected in the query image
        :param quantile_size: Size of quantile
        """

        print(f'{top_n}-{detection_cnt}')
        fig = plt.figure()
        ax = fig.add_subplot(111)

        quantile_q = int(1/quantile_size)

        ax.axhline(y=localization.POSITION_STEP_SIZE, linestyle='--', color='black', label='$\\Delta P$')

        for detections_path, scores_path, name in query_set_paths:
            detections = util.pickle_load(detections_path)
            scores = util.pickle_load(scores_path)

            assert(detections is not None)
            assert(scores is not None)

            data, n = iterate_queries(gt_estimator, possible_poses, detections, scores, detection_cnt, top_n)

            print(data.shape)
            median = np.median(data, axis=0)
            lower_quantile = np.quantile(data, quantile_size, axis=0)
            upper_quantile = np.quantile(data, 1 - quantile_size, axis=0)

            color = next(ax._get_lines.prop_cycler)['color']
            x = np.array(range(data.shape[1]))
            ax.plot(x, median, color=color, label=f'{name} ($n$={n})')
            ax.plot(x, lower_quantile, '--', linewidth=1, alpha=0.5, color=color, label=None)
            ax.plot(x, upper_quantile, '--', linewidth=1, alpha=0.5, color=color, label=None)
            #ax.fill_between(x, lower_quantile, upper_quantile, alpha=0.2)

        #ax.yaxis.set_major_locator(plticker.MultipleLocator(base=10.0))
        ax.grid()
        ax.set_ylim((0, 50))
        ax.set_xlabel('rank')
        ax.set_ylabel('median min. localization error (m)')
        ax.legend(loc='upper right')
        plural_s = 's' if detection_cnt > 1 else ''
        plt.suptitle(f'Localization accuracy (queries with {detection_cnt} detection{plural_s})', y=0.96)
        plt.title('$\\Delta P = 5$ m, $\\Delta\\theta = 10^{{\\circ}}$, {quantile_q}-quantiles'.format(detection_cnt=detection_cnt, quantile_q=quantile_q), fontsize=10)
        plt.savefig(f'./output/evaluation/png/evaluation-{top_n}-{detection_cnt}.png', bbox_inches='tight')
        plt.savefig(f'./output/evaluation/svg/evaluation-{top_n}-{detection_cnt}.svg', bbox_inches='tight')
        plt.show()
Пример #3
0
 def _read_seqno(self, seq):
     try:
         return util.pickle_load(os.path.join(self.dirname, snp(seq)))
     except IOError, e:
         log.err("Warning: got exception reading sequence number: " +
                 str(e))
         return None
Пример #4
0
def load_training_data():
    df1 = util.pickle_load(DATA_FOLDER + 'Data_0-500_df.pickle')
    print('df1.shape: ', df1.shape)
    df2 = util.pickle_load(DATA_FOLDER + 'Data_500-900_df.pickle')
    print('df2.shape: ', df2.shape)
    df3 = util.pickle_load(DATA_FOLDER + 'Data_900-1500_df.pickle')
    print('df3.shape: ', df3.shape)
    df4 = util.pickle_load(DATA_FOLDER + 'Data_1500-2000_df.pickle')
    print('df4.shape: ', df4.shape)

    print('Merging the DFs...')
    df = pd.concat([df1, df2, df3, df4], ignore_index=True)
    print('Merged DF shape:', df.shape, ' saving to pickle...')
    util.pickle_dump(df, DATA_FOLDER + TRAINING_EXAMPLES + '_df_all.pickle')

    return df
Пример #5
0
    def __init__(self):
        # filenames is the key which connecting image and text for pair
        self.train_file_names = u.pickle_load(c.train_filename_path)
        self.test_file_names = u.pickle_load(c.test_filename_path)

        self.train_file_names = sorted(self.train_file_names)
        self.test_file_names = sorted(self.test_file_names)

        self.train_class_id = c.train_class_info
        self.test_class_id = c.test_class_info
        # self.train_class_id = u.pickle_load2(c.train_class_id_txt_path)
        # self.test_class_id = u.pickle_load2(c.test_class_id_txt_path)

        # text
        self.tokenizer = RegexpTokenizer(r'\w+')

        if not os.path.isfile(c.vocab_path):
            train_text = self.__load_all_text(self.train_file_names)
            test_text = self.__load_all_text(self.test_file_names)
            all_text = train_text + test_text

            self.__word_count_statistics(all_text)

            vocab = v.vocab()
            vocab.create(all_text)

        self.word_2_index = u.json_load(c.vocab_path)
        self.index_2_word = u.json_load(c.index_2_word_path)
        # self.index_2_word = {v: k for k, v in self.word_2_index.items()}
        self.vocab_size = len(self.word_2_index)
        print("vocab_size : ", self.vocab_size)

        # image
        self.base_img_size = [64, 128, 256]
        label_image_size = 256
        rate = 76 / 64
        self.image_transform = transforms.Compose([
            transforms.Resize(int(label_image_size * rate)),
            transforms.RandomCrop(label_image_size),
            transforms.RandomHorizontalFlip()
        ])

        self.norm = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        self.filenames_bbox = self.__load_bbox()
Пример #6
0
 def load_reports(self):
     self.subscribers = util.pickle_load(self.reportfile)
     if self.subscribers == None:
         self.subscribers = []
     for s in self.subscribers:
         s['Busy'] = False
         s['Paused'] = False
         if not 'Format' in s:
             s['Format'] = 'json'
Пример #7
0
def evaluate():
    df1 = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES +
                           '_df_trained.pickle')

    writer = pd.ExcelWriter(DATA_FOLDER + '/_val_.xlsx')
    df = df1[df1['group'] == 1618]
    df.to_excel(writer, 'Sheet1')
    #df2.to_excel(writer,'Sheet2')
    writer.save()
Пример #8
0
 def load_reports(self):
     self.subscribers = util.pickle_load(self.reportfile)
     if self.subscribers == None:
         self.subscribers = []
     for s in self.subscribers:
         s['Busy'] = False
         s['Paused'] = False
         if not 'Format' in s:
             s['Format'] = 'json'
Пример #9
0
def tmp():
    df_train = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES +
                                '_df_trained.pickle')

    df_mean = df_train.groupby('group').agg(['count', 'mean']).reset_index()

    writer = pd.ExcelWriter(DATA_FOLDER + '/_mean_.xlsx')
    df = pd.DataFrame(df_mean)
    df.to_excel(writer, 'Sheet1')
    #df2.to_excel(writer,'Sheet2')
    writer.save()
Пример #10
0
def plot_space_leakage(data, num_samples, normalize=False, features=None,
                       dumpfile=None, replot=False):
    """ Scatter plots spatial distance vs euclidean distance in feature space
        for specified features. If features is None all features excluding
        latitude/longitude are included. Since the total number of pairs of
        points is typically large pairs are picked by sampling the data set
        randomly.
    """
    raw_features = list(data)
    if replot:
        res = pickle_load(dumpfile)
        distances = res['distances']
    else:
        distance_features = ['lat', 'lon']
        if normalize:
            # normalize all features to [0, 1]
            for f in list(data):
                if f in distance_features:
                    continue
                data[f] = (data[f] - data[f].min()) / (data[f].max() - data[f].min())

        if features is None:
            non_features = distance_features + ['GHF']
            features = [x for x in list(data) if x not in non_features]

        distances = []
        sys.stderr.write('Sampling %d pairs of points: \n' % num_samples)
        for i in range(num_samples):
            if (i+1) % 100 == 0:
                sys.stderr.write('%d...\n' % (i+1))
            p1, p2 = np.random.randint(0, len(data), 2)
            p1, p2 = data.iloc[p1], data.iloc[p2]
            feature_d = np.linalg.norm(p1[features] - p2[features])
            spatial_d = np.linalg.norm([p1['lat'] - p2['lat'],
                                        p1['lon'] - p2['lon']])
            distances.append((spatial_d, feature_d))
        if dumpfile:
            res = {'distances': distances}
            pickle_dump(dumpfile, res, 'space leakage')

    fig = plt.figure(figsize=(8, 10))
    ax = fig.add_subplot(1, 1, 1)
    ax.scatter([x[0] for x in distances], [x[1] for x in distances],
               edgecolor=None, facecolor='k', alpha=.5)
    ax.set_xlabel('Distance in latitude-longitude')
    ax.set_ylabel('Distance in feature space')
    ax.grid(True)
    ax.set_title('Opacity of selected features with respect to spatial coordinates')

    fig.tight_layout()
Пример #11
0
    def __init__(self, read_file_name='../../data/raw/trade_new.csv', month=[2, 3, 4], train=True, save_pos='/234'):
        self.read_file_name = read_file_name
        self.month = month
        data = self.read_raw_data(read_file_name)
        self.data = data
        if train:
            path = params.pkl_train_path + save_pos
        else:
            path = params.pkl_test_path + save_pos
        try:
            print('Feature loading ... from ', path, ' month: ', month)
            self.user_bci_agg = pickle_load(path+'/user_bci_agg.pkl')
            self.bci_user_agg = pickle_load(path+'/bci_user_agg.pkl')

            self.m_action_cr_agg = pickle_load(path+'/m_action_cr_agg.pkl')
            self.m_action_cr = pickle_load(path+'/m_action_cr.pkl')

            self.m_pen_cr_agg = pickle_load(path+'/m_pen_cr_agg.pkl')
            self.m_pen_cr = pickle_load(path+'/m_pen_cr.pkl')

            self.m_pd_cr_agg = pickle_load(path+'/m_pd_cr_agg.pkl')
            self.m_pd_cr = pickle_load(path+'/m_pd_cr.pkl')

            self.repeat = pickle_load(path+'/repeat.pkl')

            self.items = pickle_load( path+'/items.pkl')
            self.users = pickle_load(path+'/users.pkl')
            self.items = pickle_load(path+'/items.pkl')
            self.brands = pickle_load(path+'/brands.pkl')
            self.cats = pickle_load(path+'/cats.pkl')
            print('loading finished')

        except:
            print('init..')
            user_bci_agg = bci_agg(data, months=month, groupby1=['user_id'], groupby2=['item_id'])
            bci_user_agg = user_agg(data, months=month, groupby=['brand_id', 'cat_id', 'item_id'])
            self.user_bci_agg = user_bci_agg
            self.bci_user_agg = bci_user_agg

            pickle_dump(self.user_bci_agg, path+'/user_bci_agg.pkl')
            pickle_dump(self.bci_user_agg, path+'/bci_user_agg.pkl')

            m_action_cr = monthly_action_cr(data, month)
            m_action_cr_agg = monthly_action_cr_agg(m_action_cr)
            self.m_action_cr_agg = m_action_cr_agg
            self.m_action_cr = m_action_cr

            pickle_dump(self.m_action_cr_agg, path+'/m_action_cr_agg.pkl')
            pickle_dump(self.m_action_cr, path+'/m_action_cr.pkl')


            m_pen_cr = monthly_penetration_cr(data, month, groupby=['brand_id', 'cat_id', 'item_id'])
            m_pen_cr_agg = penetration_agg(m_pen_cr)
            self.m_pen_cr_agg = m_pen_cr_agg
            self.m_pen_cr = m_pen_cr

            pickle_dump(self.m_pen_cr_agg, path+'/m_pen_cr_agg.pkl')
            pickle_dump(self.m_pen_cr, path+'/m_pen_cr.pkl')

            m_pd_cr = monthly_product_diversity_cr(data, months=month, groupby=['user_id', 'brand_id', 'cat_id'])
            k_attrs = {
                'user_id': ['item'],  # ['cat', 'brand', 'item'],
                'brand_id': ['cat', 'item'],
                'cat_id': ['brand', 'item']
            }
            m_pd_cr_agg = monthly_product_diversity_agg(m_pd_cr, k_attrs)
            self.m_pd_cr_agg = m_pd_cr_agg
            self.m_pd_cr = m_pd_cr

            pickle_dump(self.m_pd_cr_agg, path+'/m_pd_cr_agg.pkl')
            pickle_dump(self.m_pd_cr, path+'/m_pd_cr.pkl')

            self.repeat = repeat_feature(data, months=[2, 3, 4], groupby=['brand_id', 'cat_id', 'item_id', 'user_id'])
            pickle_dump(self.repeat, path+'/repeat.pkl')

            self.item_profile()
            self.user_profile()
            self.brand_profile()
            self.cat_profile()

            pickle_dump(self.items, path+'/items.pkl')
            pickle_dump(self.users, path+'/users.pkl')
            pickle_dump(self.items, path+'/items.pkl')
            pickle_dump(self.brands, path+'/brands.pkl')
            pickle_dump(self.cats, path+'/cats.pkl')

            print('init finished')
        finally:
            print('finished..')
Пример #12
0
def load_all_training_data():
    df = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES + '_df_all.pickle')
    print('df.shape: ', df.shape)
    return df
Пример #13
0
def load_trained_data():
    df = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES +
                          '_df_trained.pickle')

    return df
Пример #14
0
 def load_mean_and_std(self, mean_and_std_path):
     self.y_mean, self.y_std = pickle_load(mean_and_std_path)
Пример #15
0
    util.pickle_object(RESUlT_BACK_UP_PATH, avg_rouges)


if __name__ == '__main__':
    # 自动处理正文语料并打包
    # util.data_pre_process(DATA_PATH, DATA_BACKUP_PATH, pickle_data=True)

    # 获取单个类比的触发词库保存为CSV并给出统计信息(用于结果展示)
    # tr = TriggerDict(os.path.join(TRIGGER_PATH, '结果.txt'))
    # tr.save_trigger_model_as_csv(os.path.join(TRIGGER_SAVE_PATH, '结果.csv'))
    # tr.show_info()

    # 直接打包三类触发词库结果
    # util.pickle_trigger_dict(TRIGGER_PATH, TRIGGER_BACKUP_PATH)

    # 读取已打包的触发词库,用字典信息存储
    triggers = util.pickle_load(TRIGGER_BACKUP_PATH)

    # 读取打包的正文语料
    ls = util.pickle_load(DATA_BACKUP_PATH)
    for article in ls:
        article.set_trigger_dict(triggers)

    # 创建用于测评的自动摘要结果
    for i, a in enumerate(ls):
        # a.create_baseline(12, os.path.join(BASELINE_PATH, 'baseline_{0}.txt'.format(i + 1)))
        a.create_auto_abstract(
            5, os.path.join(ABSTRACT_RESULT_PATH, '{0}.txt'.format(a.title)))
        # a.create_rouge_result(3, os.path.join(OUTPUT_PATH, 'hyp_{0}.txt'.format(i + 1)))
    # recalls_rouge = [rouge_result[i] for i in range(0, len(rouge_result), 2)]
Пример #16
0
 def _load_config(self):
     infile = "{}/{}_workspace.pkl".format(
         self._nbconfig['config_dir'],
         self._nbconfig['experiment_info']["data"])
     self._nbconfig = util.pickle_load(infile)
Пример #17
0
def cities_output():

    #pull 'ID' from input field and store it
    address = request.args.get('ID')

    import util
    import distances
    import getGeocodes
    from datetime import date

    #graph = util.pickle_load('/var/www/iAppreciateNYC/subwaydata/NYCsubway_network_graph_9-28.pkl')
    graph = util.pickle_load('subwaydata/NYCsubway_network_graph_9-28.pkl')
    geoObj = getGeocodes.getGeoObj(address)
    closestStation = distances.getClosestStationGraph(geoObj.latitude,
                                                      geoObj.longitude,
                                                      graph)

    closestStationName = graph.node[closestStation]['name']

    mymap = Map(
        identifier="view-side",
        lat=geoObj.latitude,
        lng=geoObj.longitude,
        markers=[(geoObj.latitude, geoObj.longitude)],
        zoom=15
    )

    mydb = mdb.connect(user="******", host="localhost",
                       db="iapp2",  charset='utf8')
    with mydb:
        cur = mydb.cursor()
        cur.execute("SELECT sellData, `%s` FROM stationPPSFT2;" %
                    closestStation[:60])
        query_results = cur.fetchall()

    sellDate1, ppsqf = zip(*query_results)
    with mydb:
        cur = mydb.cursor()
        resultTable = closestStation+'_GPprediction'
        cur.execute("SELECT sellData, `%s`, y_pred, sigma_pred FROM `%s`;" %
                    (closestStation[:60]+'_filtered', resultTable))
        query_results = cur.fetchall()

    sellDate2, smoothed, pred, sigma = zip(*query_results)

    from pygal.style import Style
    custom_style = Style(label_font_size=16, major_label_font_size=16,
                         colors=('#ff1100', '#E89B53', '#0000ff',
                                 '#E89B53', '#E89B53'))

    dateline = pygal.DateLine(disable_xml_declaration=True,
                              x_label_rotation=25,
                              x_title='Date',
                              y_title='Price per square foot',
                              style=custom_style,
                              show_x_guides=True,
                              show_legend=False)
    dateline.x_labels = [date(2008, 1, 1), date(2010, 1, 1),
                         date(2012, 1, 1), date(2014, 1, 1), date(2016, 1, 1),
                         date(2018, 1, 1), date(2020, 1, 1)]

    dateline.add(closestStation, zip(sellDate1, ppsqf))
    dateline.add('Forecast', zip(sellDate2, pred), show_dots=False,
                 stroke_style={'width': 5})
    dateline.add('Filtered', zip(sellDate2, smoothed), show_dots=False,
                 stroke_style={'width': 5})
    upperBound = (np.array(pred, dtype=np.float) +
                  1.96*np.array(sigma, dtype=np.float))
    lowerBound = (np.array(pred, dtype=np.float) -
                  1.96*np.array(sigma, dtype=np.float))
    dateline.add('Bound', zip(np.array(sellDate2)[np.isfinite(upperBound)],
                 upperBound[np.isfinite(upperBound)]),
                 stroke_style={'width': 5,
                 'dasharray': '3, 6, 12, 24'},
                 show_dots=False)
    dateline.add('Bound', zip(np.array(sellDate2)[np.isfinite(lowerBound)],
                 lowerBound[np.isfinite(lowerBound)]),
                 stroke_style={'width': 5,
                 'dasharray': '3, 6, 12, 24'},
                 show_dots=False)

    return render_template("output.html",
                           address=address,
                           station=closestStationName,
                           mymap=mymap,
                           line_chart=dateline)
Пример #18
0
def load_test_data():
    X = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES + '_X_test.pickle')
    y = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES + '_y_test.pickle')
    return X, y
Пример #19
0
 def _read_meta(self):
     self.meta = util.pickle_load(os.path.join(self.dirname, 'META'))
     if self.meta == None: self._blank_meta()
            ax.arrow(position[0], position[1], dx, dy, width = arrow_width, color='black')

    visualize_landmarks(ax, landmark_list, sign_types, 2*arrow_width)

    # Visualize actual position
    ax.scatter(actual_position[0], actual_position[1], s=200, marker='x', color='red')

    tick_steps = 10.0
    ax.xaxis.set_major_locator(plticker.MultipleLocator(base=tick_steps))
    ax.yaxis.set_major_locator(plticker.MultipleLocator(base=tick_steps))

    plt.show()


if __name__ == '__main__':
    landmark_list = util.pickle_load(MAP_PATH)
    query_image = cv2.imread(QUERY_IMAGE_PATH)
    assert(query_image is not None)
    query_image_name = basename(QUERY_IMAGE_PATH)

    camera = get_camera_malaga_extract_07_right()

    sign_types = detection.ALL_SIGN_TYPES
    print('Detecting traffic signs in query image...')
    query_detections, detection_debug_image = detection.detect_traffic_signs_in_image(query_image, sign_types)
    plt.figure()
    plt.imshow(util.bgr_to_rgb(detection_debug_image))
    plt.show()

    print('Calculating possible poses...')
    possible_camera_poses = get_possible_poses(landmark_list, POSITION_STEP_SIZE, ANGLE_STEP_SIZE, LANDMARK_MARGIN)
Пример #21
0
def plot_error_by_radius(data, roi_density, radii, ncenters, region='NA-WE',
                         replot=False, dumpfile=None, **gbrt_params):
    """ ncenters random centers are picked and over all given radii.
        Cross-validation errors (normalized RMSE and r2) are averaged over
        ncenters. One standard deviation mark is shown by a shaded region.
    """
    fig = plt.figure(figsize=(11,5))
    ax_rmse, ax_r2 = fig.add_subplot(1, 2, 1), fig.add_subplot(1, 2, 2)

    if replot:
        results = pickle_load(dumpfile)
    else:
        centers = [
            # HACK there's no easy way to check if for a given center the
            # demanded density is attainable for circles of all desired radii.
            # Ask for twice the density we need on the largest radius and hope
            # for the best!
            random_prediction_ctr(data, max(radii), region=region, min_density=2*roi_density)
            for _ in range(ncenters)
        ]
        shape = (ncenters, len(radii))
        # blank error matrix (keyed by center number and roi density index),
        # used to initialize multiple components of the results dictionary.
        blank = np.zeros(shape)

        results = {
            'ncenters': ncenters,
            'radii': radii,
            'errors': {
                'gbrt': {'rmse': blank.copy(), 'r2': blank.copy()},
                'linear': {'rmse': blank.copy(), 'r2': blank.copy()},
                'constant': {'rmse': blank.copy(), 'r2': blank.copy()},
            },
        }
        for idx_radius, radius in enumerate(radii):
            for idx_ctr, center in enumerate(centers):
                sys.stderr.write('# radius = %.0f, center %d/%d ' % (radius, idx_ctr + 1, ncenters))
                comp = compare_models(data, roi_density, radius, center, **gbrt_params)
                for k in results['errors'].keys():
                    # k is one of gbrt, linear, or constant
                    results['errors'][k]['r2'][idx_ctr][idx_radius] = comp[k][0]
                    results['errors'][k]['rmse'][idx_ctr][idx_radius] = comp[k][1]
        if dumpfile:
            pickle_dump(dumpfile, results, comment='GBRT performance results')

    errors = results['errors']
    radii = results['radii']
    ncenters = results['ncenters']

    num_sigma = 1

    # Plot GBRT results
    kw = {'alpha': .9, 'lw': 1, 'marker': 'o', 'markersize': 4, 'color': 'b'}
    mean_rmse = errors['gbrt']['rmse'].mean(axis=0)
    sd_rmse = np.sqrt(errors['gbrt']['rmse'].var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax_rmse.plot(radii, mean_rmse, label='GBRT', **kw)
    ax_rmse.fill_between(radii, lower_rmse, higher_rmse, facecolor='b', edgecolor='b', alpha=.3)

    mean_r2 = errors['gbrt']['r2'].mean(axis=0)
    sd_r2 = np.sqrt(errors['gbrt']['r2'].var(axis=0))
    lower_r2 = mean_r2 - num_sigma * sd_r2
    higher_r2 = mean_r2 + num_sigma * sd_r2
    ax_r2.plot(radii, errors['gbrt']['r2'].mean(axis=0), **kw)
    ax_r2.fill_between(radii, lower_r2, higher_r2, facecolor='b', edgecolor='b', alpha=.2)

    # Plot Linear Regression results
    kw = {'alpha': .7, 'lw': 1, 'marker': 'o', 'markersize': 4, 'markeredgecolor': 'r', 'color': 'r'}
    mean_rmse = errors['linear']['rmse'].mean(axis=0)
    sd_rmse = np.sqrt(errors['linear']['rmse'].var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax_rmse.plot(radii, mean_rmse, label='linear regression', **kw)
    ax_rmse.fill_between(radii, lower_rmse, higher_rmse, facecolor='r', edgecolor='r', alpha=.3)

    mean_r2 = errors['linear']['r2'].mean(axis=0)
    sd_r2 = np.sqrt(errors['linear']['r2'].var(axis=0))
    lower_r2 = mean_r2 - num_sigma * sd_r2
    higher_r2 = mean_r2 + num_sigma * sd_r2
    ax_r2.plot(radii, errors['linear']['r2'].mean(axis=0), **kw)
    ax_r2.fill_between(radii, lower_r2, higher_r2, facecolor='r', edgecolor='r', alpha=.2)

    # Plot constant predictor results
    kw = {'alpha': .7, 'lw': 1, 'ls': '--', 'marker': 'o', 'markersize': 4, 'color': 'k', 'markeredgecolor': 'k'}
    ax_rmse.plot(radii, errors['constant']['rmse'].mean(axis=0), label='constant predictor', **kw)
    ax_r2.plot(radii, errors['constant']['r2'].mean(axis=0), **kw)

    # Style plot
    ax_rmse.set_ylabel('Normalized RMSE', fontsize=14)
    ax_r2.set_ylabel('$r^2$', fontsize=16)
    ax_r2.set_ylim(-.05, 1)
    ax_r2.set_xlim(min(radii) - 100, max(radii) + 100)
    ax_r2.set_yticks(np.arange(0, 1.01, .1))
    ax_rmse.set_ylim(0, .5)
    ax_rmse.set_yticks(np.arange(0, .51, .05))
    ax_rmse.set_xlim(*ax_r2.get_xlim())
    for ax in [ax_rmse, ax_r2]:
        # FIXME force xlims to be the same
        ax.set_xlabel('radius of ROI (km)', fontsize=14)
        ax.grid(True)
    ax_rmse.legend(prop={'size':15}, numpoints=1)
    fig.tight_layout()
import glob
import numpy as np
from os.path import join, basename

import util


def get_pickle_files(dir_path):
    return sorted(glob.glob(join(dir_path, '*.pickle')))


DATASET_NAME = '10_right'
DIR_PATH = join('./output/scores/', DATASET_NAME)
OUTPUT_PATH = join('./output/scores/merged', DATASET_NAME + '.pickle')

pickle_files = get_pickle_files(DIR_PATH)

result = {}

for pickle_file in pickle_files:
    image_name = basename(pickle_file)[:-len('.pickle')]
    scores = util.pickle_load(pickle_file)
    result[image_name] = scores

#print(result[list(result.keys())[0]].shape)

util.pickle_save(OUTPUT_PATH, result)
Пример #23
0
 def _read_meta(self):
     self.meta = util.pickle_load(os.path.join(self.dirname, 'META'))
     if self.meta == None: self._blank_meta()
Пример #24
0
def plot_feature_importance_analysis(data, roi_density, radius, ncenters,
                                     dumpfile=None, replot=False, **gbrt_params):
    """ Plots feature importance results (cf. Friedman 2001 or ESL) averaged
        over ncenters rounds of cross validation for given ROI training density
        and radius.
    """
    raw_features = list(data)
    for f in ['lat', 'lon', 'GHF']:
        raw_features.pop(raw_features.index(f))

    # a map to collapse categorical dummies for feature importances. The dict
    # has keys in `raw_features` indices, and values in `features` indices.
    decat_by_raw_idx = {}
    features = []
    for idx, f in enumerate(raw_features):
        match = [c for c in CATEGORICAL_FEATURES if c == f[:len(c)]]
        if match:
            assert len(match) == 1
            try:
                i = features.index(match[0])
            except ValueError:
                features.append(match[0])
                i = len(features) - 1
            decat_by_raw_idx[idx] = i
            continue
        features.append(f)
        decat_by_raw_idx[idx] = len(features) - 1

    if replot:
        res = pickle_load(dumpfile)
        gbrt_importances = res['gbrt_importances']
    else:
        # at this point features contains original feature names and raw_features
        # contains categorical dummies, in each round we map
        # feature_importances_, which has the same size as raw_features, to feature
        # importances for original features by adding the importances of each
        # categorical dummy.

        centers = [random_prediction_ctr(data, radius, min_density=roi_density) for _ in range(ncenters)]
        gbrt_importances = np.zeros([ncenters, len(features)])
        for center_idx, center in enumerate(centers):
            sys.stderr.write('%d / %d ' % (center_idx + 1, ncenters))
            X_train, y_train, X_test, y_test = \
                split_with_circle(data, center, roi_density=roi_density, radius=radius)
            X_train = X_train.drop(['lat', 'lon'], axis=1)
            X_test = X_test.drop(['lat', 'lon'], axis=1)
            assert not X_test.empty

            gbrt = train_gbrt(X_train, y_train, **gbrt_params)
            raw_importances = gbrt.feature_importances_
            for idx, value in enumerate(raw_importances):
                gbrt_importances[center_idx][decat_by_raw_idx[idx]] += value

        if dumpfile:
            res = {'gbrt_importances': gbrt_importances, 'features': features}
            pickle_dump(dumpfile, res, 'feature importances')

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    means = gbrt_importances.mean(axis=0)
    sds = np.sqrt(gbrt_importances.var(axis=0))
    sort_order = list(np.argsort(means))

    feature_names = [FEATURE_NAMES[features[i]] for i in sort_order]

    means, sds = [means[i] for i in sort_order], [sds[i] for i in sort_order]
    _yrange = [i-0.4 for i in range(len(features))] # labels in the middle of bars
    ax.barh(_yrange, means, color='k', ecolor='k', alpha=.3, xerr=sds[::-1])
    ax.set_ylim(-1, len(features))
    ax.grid(True)
    ax.set_yticks(range(len(features)))
    ax.set_yticklabels(feature_names, rotation=0, fontsize=10)
    ax.set_title('GBRT feature importances')
    fig.subplots_adjust(left=0.3) # for vertical xtick labels
Пример #25
0
def plot_generalization_analysis(data, roi_density, radius, ncenters,
                                 ns_estimators, replot=False, dumpfile=None):
    """ For all given values for n_estimators (number of trees) for GBRT,
        perform cross-validation over ncenters ROIs with given radius and
        sample density. The average training and validation error for each
        number of trees is plotted. This is the standard plot to detect
        overfitting defined as the turning point beyond which validation error
        starts increasing while training error is driven down to zero. As
        expected, GBRT does not overfit (validation error plateaus).

        One standard deviation is indicated by a shaded region.
    """
    fig, ax = plt.subplots()

    if replot:
        res = pickle_load(dumpfile)
        for v in ['roi_density', 'radius', 'ns_estimators', 'train_rmses', 'test_rmses']:
            exec('%s = res["%s"]' % (v, v))
        assert len(train_rmses) == len(test_rmses), \
               'array length (# of centers) should be the same for training and test'
    else:
        sys.stderr.write('=> Experiment: Generalization ' + \
                         '(roi_density: %.2f, radius: %.2f,' % (roi_density, radius) +
                         ' no. centers: %d, no. of n_estimators: %d)\n' % (ncenters, len(ns_estimators)))
        centers = [random_prediction_ctr(data, radius, min_density=roi_density)
                   for _ in range(ncenters)]

        train_rmses = np.zeros([ncenters, len(ns_estimators)])
        test_rmses = np.zeros([ncenters, len(ns_estimators)])
        for center_idx, center in enumerate(centers):
            sys.stderr.write('# center %d/%d\n' % (center_idx + 1, ncenters))
            X_train, y_train, X_test, y_test = \
                split_with_circle(data, center, roi_density=roi_density, radius=radius)
            X_train = X_train.drop(['lat', 'lon'], axis=1)
            X_test = X_test.drop(['lat', 'lon'], axis=1)
            assert not X_test.empty

            for n_idx, n in enumerate(ns_estimators):
                sys.stderr.write('  # n_estimators: %d ' % n)
                gbrt = train_gbrt(X_train, y_train, n_estimators=n)
                _, train_rmse = error_summary(y_train, gbrt.predict(X_train))
                _, test_rmse  = error_summary(y_test, gbrt.predict(X_test))
                train_rmses[center_idx][n_idx] = train_rmse
                test_rmses[center_idx][n_idx] = test_rmse

        if dumpfile:
            res = {'roi_density': roi_density,
                   'radius': radius,
                   'ns_estimators': ns_estimators,
                   'train_rmses': train_rmses,
                   'test_rmses': test_rmses}
            pickle_dump(dumpfile, res, comment='generalization errors')

    num_sigma = 1

    mean_rmse = test_rmses.mean(axis=0)
    sd_rmse = np.sqrt(test_rmses.var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax.plot(ns_estimators, mean_rmse, 'r', marker='o', markersize=3, alpha=.9, label='validation')
    ax.fill_between(ns_estimators, lower_rmse, higher_rmse, facecolor='r', edgecolor='r', alpha=.3)

    mean_rmse = train_rmses.mean(axis=0)
    sd_rmse = np.sqrt(train_rmses.var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax.plot(ns_estimators, mean_rmse, 'g', marker='o', markersize=3, alpha=.9, label='training')
    ax.fill_between(ns_estimators, lower_rmse, higher_rmse, facecolor='g', edgecolor='g', alpha=.3)

    ax.grid(True)
    ax.set_xlim(ns_estimators[0] - 100, ns_estimators[-1] + 100)
    ax.set_ylim(0, .3)
    ax.set_yticks(np.arange(0, .31, .05))
    ax.set_xlabel('Number of trees')
    ax.set_ylabel('Normalized RMSE')
    ax.legend(prop={'size':12.5})
    fig.tight_layout()
Пример #26
0
def plot_sensitivity_analysis(data, roi_density, radius, noise_amps, ncenters,
                              replot=False, dumpfile=None):
    """ For each given noise amplitude, performs cross-validation on ncenters
        with given radius and density, the average over ncenters of
        normalized rmse between noise-free predictions and predictions based on
        noisy GHF is calculated. This perturbation in predictions is plotted
        against the expected absolute value of applied noise (amplitude).

        Both GBRT and linear regression are considered.
        One standard deviation is indicated by a shaded region.
        The case of Greenland is considered separately and overlayed.
    """
    fig = plt.figure(figsize=(10, 5))
    ax_gbrt = fig.add_subplot(1, 2, 1)
    ax_lin = fig.add_subplot(1, 2, 2)

    def _predict(X_train, y_train, X_test, noise_amp):
        # If noise ~ N(0, s^2), then mean(|noise|) = s * sqrt(2/pi),
        # cf. https://en.wikipedia.org/wiki/Half-normal_distribution
        # To get noise with mean(|noise|) / mean(y) = noise_ampl, we need to
        # have noise ~ N(0, s*^2) with s* = mean(y) * noise_ampl * sqrt(pi/2).
        noise = np.mean(y_train) * noise_amp * np.sqrt(np.pi/ 2) * np.random.randn(len(y_train))
        gbrt = train_gbrt(X_train.drop(['lat', 'lon'], axis=1),
                          y_train + noise)
        lin_reg = train_linear(X_train.drop(['lat', 'lon'], axis=1),
                               y_train + noise)
        gbrt_pred = gbrt.predict(X_test.drop(['lat', 'lon'], axis=1))
        lin_pred = lin_reg.predict(X_test.drop(['lat', 'lon'], axis=1))
        return gbrt_pred, lin_pred

    if replot:
        res = pickle_load(dumpfile)
        rmses_gbrt, rmses_lin = res['rmses_gbrt'], res['rmses_lin']
        noise_amps = res['noise_amps']
    else:
        centers = [random_prediction_ctr(data, radius, min_density=roi_density)
                   for _ in range(ncenters)]
        y0 = []
        centers = [None] + centers # one extra "center" (Greenland)
        rmses_gbrt = np.zeros((len(centers), len(noise_amps)))
        rmses_lin = np.zeros((len(centers), len(noise_amps)))
        for idx_ctr, center in enumerate(centers):
            if center is None:
                # Greenland case
                X_train, y_train, X_test = greenland_train_test_sets()
            else:
                X_train, y_train, X_test, _ = \
                    split_with_circle(data, center, roi_density=roi_density, radius=radius)
            sys.stderr.write('(ctr %d) noise_amp = 0.00 ' % (idx_ctr + 1))
            y0_gbrt, y0_lin = _predict(X_train, y_train, X_test, 0)
            for idx_noise, noise_amp in enumerate(noise_amps):
                sys.stderr.write('(ctr %d) noise_amp = %.2f ' % (idx_ctr + 1, noise_amp))
                y_gbrt, y_lin = _predict(X_train, y_train, X_test, noise_amp)
                rmse_gbrt = sqrt(mean_squared_error(y0_gbrt, y_gbrt)) / np.mean(y0_gbrt)
                rmse_lin = sqrt(mean_squared_error(y0_lin, y_lin)) / np.mean(y0_lin)
                rmses_gbrt[idx_ctr][idx_noise] = rmse_gbrt
                rmses_lin[idx_ctr][idx_noise] = rmse_lin

        if dumpfile:
            res = {'rmses_lin': rmses_lin, 'rmses_gbrt': rmses_gbrt, 'noise_amps': noise_amps}
            pickle_dump(dumpfile, res, 'sensitivity analysis')

    kw = dict(alpha=.6, lw=2, marker='o', color='k', label='global average')
    noise_amps = np.append([0], noise_amps)

    num_sigma = 1
    mean_rmse = rmses_lin[1:].mean(axis=0)
    sd_rmse = np.sqrt(rmses_lin[1:].var(axis=0))
    lower_rmse = np.append([0], mean_rmse - num_sigma * sd_rmse)
    higher_rmse = np.append([0], mean_rmse + num_sigma * sd_rmse)
    mean_rmse = np.append([0], mean_rmse)
    ax_lin.plot(noise_amps, mean_rmse, **kw)
    ax_lin.fill_between(noise_amps, lower_rmse, higher_rmse, facecolor='k', edgecolor='k', alpha=.2)

    mean_rmse = rmses_gbrt[1:].mean(axis=0)
    sd_rmse = np.sqrt(rmses_gbrt[1:].var(axis=0))
    lower_rmse = np.append([0], mean_rmse - num_sigma * sd_rmse)
    higher_rmse = np.append([0], mean_rmse + num_sigma * sd_rmse)
    mean_rmse = np.append([0], mean_rmse)
    ax_gbrt.plot(noise_amps, mean_rmse, **kw)
    ax_gbrt.fill_between(noise_amps, lower_rmse, higher_rmse, facecolor='k', edgecolor='k', alpha=.2)

    # Greenland case
    kw = dict(color='g', alpha=.5, lw=2.5, marker='o',
              markeredgewidth=0.0, label='Greenland')
    ax_lin.plot(noise_amps, np.append([0], rmses_lin[0]), **kw)
    ax_gbrt.plot(noise_amps, np.append([0], rmses_gbrt[0]), **kw)

    for ax in [ax_gbrt, ax_lin]:
        ax.set_xlabel('Relative magnitude of noise in training GHF', fontsize=12)
        ax.set_xlim(0, max(noise_amps) * 1.1)
        ax.set_aspect('equal')
        ax.grid(True)
        ax.set_xticks(np.arange(0, .35, .05))
        ax.set_yticks(np.arange(0, .35, .05))
        ax.set_xlim(-.025, .325)
        ax.set_ylim(-.025, .325)
        ax.legend(loc=1, fontsize=12)
    ax_gbrt.set_ylabel(r'Normalized RMSE difference in $\widehat{GHF}_{\mathrm{GBRT}}$', fontsize=12)
    ax_lin.set_ylabel(r'Normalized RMSE difference in $\widehat{GHF}_{\mathrm{lin}}$', fontsize=12)

    fig.tight_layout()
Пример #27
0
 def _read_seqno(self, seq):
     try:
         return util.pickle_load(os.path.join(self.dirname, snp(seq)))
     except IOError, e:
         log.err("Warning: got exception reading sequence number: " + str(e))
         return None
Пример #28
0
def plot_error_by_density(data, roi_densities, radius, ncenters, region='NA-WE',
                          replot=False, dumpfile=None, **gbrt_params):
    """ ncenters random centers are picked and over all given ROI densities.
        Cross-validation errors (normalized RMSE and r2) are averaged over
        ncenters. One standard deviation mark is shown by a shaded region.
    """
    sys.stderr.write('=> Experiment: Error by Density (region: %s, no. centers: %d, no. densities: %d)\n' %
                     (region, ncenters, len(roi_densities)))
    fig = plt.figure(figsize=(11,5))
    ax_rmse, ax_r2 = fig.add_subplot(1, 2, 1), fig.add_subplot(1, 2, 2)

    if replot:
        results = pickle_load(dumpfile)
    else:
        centers = [
            random_prediction_ctr(data, radius, region=region, min_density=max(roi_densities))
            for _ in range(ncenters)
        ]
        shape = (ncenters, len(roi_densities))
        # blank error matrix (keyed by center number and roi density index),
        # used to initialize multiple components of the results dictionary.
        blank = np.zeros(shape)

        results = {
            'ncenters': ncenters,
            'roi_densities': roi_densities,
            'errors': {
                'gbrt': {'rmse': blank.copy(), 'r2': blank.copy()},
                'linear': {'rmse': blank.copy(), 'r2': blank.copy()},
                'constant': {'rmse': blank.copy(), 'r2': blank.copy()},
            },
        }
        for idx_density, roi_density in enumerate(roi_densities):
            for idx_ctr, center in enumerate(centers):
                sys.stderr.write('# density = %.2f, center %d/%d ' % (roi_density, idx_ctr + 1, ncenters))
                comp = compare_models(data, roi_density, radius, center, **gbrt_params)
                for k in results['errors'].keys():
                    # k is one of gbrt, linear, or constant
                    results['errors'][k]['r2'][idx_ctr][idx_density] = comp[k][0]
                    results['errors'][k]['rmse'][idx_ctr][idx_density] = comp[k][1]
        if dumpfile:
            pickle_dump(dumpfile, results, comment='GBRT performance results')

    errors = results['errors']
    roi_densities = results['roi_densities']
    ncenters = results['ncenters']
    num_sigma = 1

    # Plot GBRT results
    kw = {'alpha': .9, 'lw': 1, 'marker': 'o', 'markersize': 4, 'color': 'b'}
    mean_rmse = errors['gbrt']['rmse'].mean(axis=0)
    sd_rmse = np.sqrt(errors['gbrt']['rmse'].var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax_rmse.plot(roi_densities, mean_rmse, label='GBRT', **kw)
    ax_rmse.fill_between(roi_densities, lower_rmse, higher_rmse, facecolor='b', edgecolor='b', alpha=.3)

    mean_r2 = errors['gbrt']['r2'].mean(axis=0)
    sd_r2 = np.sqrt(errors['gbrt']['r2'].var(axis=0))
    lower_r2 = mean_r2 - num_sigma * sd_r2
    higher_r2 = mean_r2 + num_sigma * sd_r2
    ax_r2.plot(roi_densities, errors['gbrt']['r2'].mean(axis=0), **kw)
    ax_r2.fill_between(roi_densities, lower_r2, higher_r2, facecolor='b', edgecolor='b', alpha=.2)

    # Plot Linear Regression results
    kw = {'alpha': .7, 'lw': 1, 'marker': 'o', 'markersize': 4, 'markeredgecolor': 'r', 'color': 'r'}
    mean_rmse = errors['linear']['rmse'].mean(axis=0)
    sd_rmse = np.sqrt(errors['linear']['rmse'].var(axis=0))
    lower_rmse = mean_rmse - num_sigma * sd_rmse
    higher_rmse = mean_rmse + num_sigma * sd_rmse
    ax_rmse.plot(roi_densities, mean_rmse, label='linear regression', **kw)
    ax_rmse.fill_between(roi_densities, lower_rmse, higher_rmse, facecolor='r', edgecolor='r', alpha=.3)

    mean_r2 = errors['linear']['r2'].mean(axis=0)
    sd_r2 = np.sqrt(errors['linear']['r2'].var(axis=0))
    lower_r2 = mean_r2 - num_sigma * sd_r2
    higher_r2 = mean_r2 + num_sigma * sd_r2
    ax_r2.plot(roi_densities, errors['linear']['r2'].mean(axis=0), **kw)
    ax_r2.fill_between(roi_densities, lower_r2, higher_r2, facecolor='r', edgecolor='r', alpha=.2)

    # Plot constant predictor results
    kw = {'alpha': .7, 'lw': 1, 'ls': '--', 'marker': 'o', 'markersize': 4, 'color': 'k', 'markeredgecolor': 'k'}
    ax_rmse.plot(roi_densities, errors['constant']['rmse'].mean(axis=0), label='constant predictor', **kw)
    ax_r2.plot(roi_densities, errors['constant']['r2'].mean(axis=0), **kw)

    # Style plot
    ax_rmse.set_ylabel('Normalized RMSE', fontsize=14)
    ax_r2.set_ylabel('$r^2$', fontsize=16)
    ax_r2.set_ylim(-.05, 1)
    ax_r2.set_xlim(min(roi_densities) - 5, max(roi_densities) + 5)
    ax_r2.set_yticks(np.arange(0, 1.01, .1))
    ax_rmse.set_ylim(0, .5)
    ax_rmse.set_yticks(np.arange(0, .51, .05))
    ax_rmse.set_xlim(*ax_r2.get_xlim())
    for ax in [ax_rmse, ax_r2]:
        # FIXME force xlims to be the same
        ax.set_xlabel('density of training points in ROI ($10^{-6}$ km $^{-2}$)',
                      fontsize=14)
        ax.grid(True)
    ax_rmse.legend(prop={'size':15}, numpoints=1)
    fig.tight_layout()

def print_heading(heading):
    heading_width = 80
    print('')
    print('=' * heading_width)
    print(heading)
    print('=' * heading_width)
    print('')


if __name__ == '__main__':
    print_heading('Feature detection')

    # Check if there are cached detections available.
    detections = util.pickle_load(DETECTION_CACHE_PATH)
    if detections is None:
        print(f'No saved detections found at \'{DETECTION_CACHE_PATH}\'. Running detection...')
        detections = detection.detect_traffic_signs(IMAGE_DIR_PATH, debug_output_path=DETECTION_DEBUG_PATH)
        if (len(detections) > 0):
            util.pickle_save(DETECTION_CACHE_PATH, detections)
    else:
        print(f'Loaded detections from \'{DETECTION_CACHE_PATH}\'.\n')

    detection_count = sum([len(detections[x]) for x in detections])
    print(f'Detected {detection_count} traffic signs in total.')


    print_heading('Feature matching')

    matches = matching.match_detections(IMAGE_DIR_PATH, detections)
Пример #30
0
if __name__ == '__main__':
    # opt = argparse.ArgumentParser()
    # opt.add_argument('log_file', help="Path to tfevents file")
    # opt = vars(opt.parse_args())

    DP_arcface_log_val_path = "insightface/recognition/logs/DP_arcface_delta-0.001_lr-0.005_emb-512/20191209-212154/valid/events.out.tfevents.1575944514.c42a-s29.ufhpc.152218.143.v2"
    DP_arcface_log_tr_path = "insightface/recognition/logs/DP_arcface_delta-0.001_lr-0.005_emb-512/20191209-212154/train/events.out.tfevents.1575944514.c42a-s29.ufhpc.152218.135.v2"
    arcface_log_val_path = "insightface/recognition/logs/arcface_lr-0.005_emb-512/20191209-195158/valid/events.out.tfevents.1575939118.c42a-s29.ufhpc.128458.141.v2"
    arcface_log_tr_path = "insightface/recognition/logs/arcface_lr-0.005_emb-512/20191209-195158/train/events.out.tfevents.1575939118.c42a-s29.ufhpc.128458.133.v2"

    if not os.path.exists('figs'):
        os.makedirs('figs')

    process_DP_eps_acc(DP_arcface_log_tr_path, DP_arcface_log_val_path, "DP_arcface")
    # process_DP_eps_acc(DP_triplet_log_path, "DP_triplet")
    process_acc(arcface_log_tr_path, arcface_log_val_path, "arcface")
    # process_acc(triplet_log_path, "triplet")

    for tag in os.listdir('save'):
        adv_sim, adv_label, queries = pickle_load(os.path.join('save', tag, 'adv_simlabelqueries.pkl'))
        clean_sim, clean_label = pickle_load(os.path.join('save', tag, 'clean_simlabel.pkl'))
        
        draw_curve(adv_sim, adv_label, f"adv_{tag}")
        draw_curve(clean_sim, clean_label, f"clean_{tag}")
        
        with open(f'figs/hsja_queries_{tag}.txt', 'w') as f:
            mean, std = np.mean(queries), np.std(queries)
            f.write(f'{mean} +- {std}\n')

    tick_steps = 10.0
    ax.xaxis.set_major_locator(plticker.MultipleLocator(base=tick_steps))
    ax.yaxis.set_major_locator(plticker.MultipleLocator(base=tick_steps))
    plt.show()


GPS_MEASUREMENTS_PATH = '07/gps.csv'
IMU_MEASUREMENTS_PATH = '07/imu.csv'
gps_full_data, imu_full_data = load_gps_and_imu_data(GPS_MEASUREMENTS_PATH,
                                                     IMU_MEASUREMENTS_PATH)
gt_estimator = GroundTruthEstimator(gps_full_data,
                                    imu_full_data,
                                    print_kf_progress=True)

landmark_list = util.pickle_load('./map_07.pickle')
possible_camera_poses = util.pickle_load('./map_07_possible_poses.pickle')
scores = util.pickle_load('./output/scores/merged/07_right_map.pickle'
                          )['img_CAMERA1_1261230000.908327_right.jpg']
detections = util.pickle_load(
    './detections_07_right.pickle')['img_CAMERA1_1261230000.908327_right.jpg']

# image = cv2.imread('../final/images/localization/img_CAMERA1_1261230000.908327_right.jpg')
# detection.detect_traffic_signs_in_image(image, {detection.TrafficSignType.CROSSING})

camera = get_camera_malaga_extract_07_right()
#pose = np.array([149.,         -12.,           0.,          -0.53446911,   0.46297167, -0.46297167,   0.53446911])
pose = ImagePose(position=np.array([149., -12., 0.]),
                 orientation=np.array(
                     [-0.53446911, 0.46297167, -0.46297167, 0.53446911]))
#pose = ImagePose(position=np.array([149., -17., 0.]), orientation=np.array([-0.53446911, 0.46297167, -0.46297167, 0.53446911]))
Пример #32
0
        #print(image_name)
        ground_truth_pos = get_ground_truth(image_name, gt_estimator)
        score_arr = scores[image_name]
        detection_list = detections[image_name]

        if len(detection_list) == detection_cnt:
            rank_result = get_rank(possible_poses, score_arr, ground_truth_pos, top_n)
            rank[i,:] = rank_result
            i += 1

    return rank, count


if __name__ == '__main__':
    print("Loading general data...")
    possible_poses = util.pickle_load(POSES_PATH)
    gps_full_data = np.load(GPS_PATH)
    imu_full_data = None#np.genfromtxt(IMU_PATH, skip_header=1)
    gt_estimator = GroundTruthEstimator(gps_full_data, imu_full_data, print_kf_progress=True)

    assert(possible_poses is not None)

    print("Done")


    def plot_query_sets(top_n, detection_cnt, quantile_size):
        """
        Plots and saves evaluation as .svg and .png files in "./output/evaluation/" folder

        :param top_n: Number of scores from high to low to be taken in part of the evaluation
        :param detection_cnt: Number of landmarks detected in the query image
Пример #33
0
def main():
    # Set up command line arguments
    parser = argparse.ArgumentParser(
        description='Parse the PrIMe database.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '-o',
        '--outdir',
        type=str,
        metavar='DIR',
        help='Directory to save pickled dictionaries and lists in')
    parser.add_argument('depository',
                        type=str,
                        nargs='?',
                        default='depository',
                        metavar='PATH',
                        help='Path to PrIMe depository')
    args = parser.parse_args()
    out_dir = args.outdir
    depository = args.depository

    prime_species_dict = prime_reactions_dict = prime_species_in_reactions_dict = rmg_species_dict = None
    prime_species_path = os.path.join(out_dir, 'prime_species_dict.pickle')
    prime_reactions_path = os.path.join(out_dir, 'prime_reactions_dict.pickle')
    prime_species_in_reactions_path = os.path.join(
        out_dir, 'prime_species_in_reactions_dict.pickle')
    rmg_species_path = os.path.join(out_dir, 'rmg_species_dict.pickle')
    if out_dir is not None:
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        else:
            # Check if we can load existing dictionaries, which could save a lot of time
            print('Trying to load dictionaries...')
            try:
                prime_species_dict = pickle_load(prime_species_path)
            except IOError:
                print('Could not find prime_species_dict')
            else:
                print('Successfully loaded prime_species_dict')
            try:
                prime_reactions_dict = pickle_load(prime_reactions_path)
            except IOError:
                print('Could not find prime_reactions_dict')
            else:
                print('Successfully loaded prime_reactions_dict')
            try:
                prime_species_in_reactions_dict = pickle_load(
                    prime_species_in_reactions_path)
            except IOError:
                print('Could not find prime_species_in_reactions_dict')
            else:
                print('Successfully loaded prime_species_in_reactions_dict')
            try:
                rmg_species_dict = pickle_load(rmg_species_path)
            except IOError:
                print('Could not find rmg_species_dict')
            else:
                print('Successfully loaded rmg_species_dict')

    if prime_species_dict is None:
        print('Parsing species...')
        prime_species_dict = parse_species(depository)

    parsed_reactions = False
    nrxn_pre_kinetics = None
    if prime_reactions_dict is None:
        print('Parsing reactions...')
        prime_reactions_dict = parse_reactions(depository, prime_species_dict)
        nrxn_pre_kinetics = len(prime_reactions_dict)
        print('Parsing kinetics...')
        prime_reactions_dict = get_kinetics(depository, prime_reactions_dict)
        parsed_reactions = True
        # Note: The reactions are not necessarily in the correct direction at this point.
        #       Have to run match_direction first.

    print('Number of valid PrIMe species: {}'.format(len(prime_species_dict)))
    if parsed_reactions:
        print('Number of valid PrIMe reactions: {}'.format(nrxn_pre_kinetics))
    print('Number of valid PrIMe reactions with kinetics: {}'.format(
        len(prime_reactions_dict)))

    if out_dir is not None:
        print('Saving PrIMe species and reactions dictionaries to {}'.format(
            out_dir))
        pickle_dump(prime_species_path, prime_species_dict)
        pickle_dump(prime_reactions_path, prime_reactions_dict)

    if prime_species_in_reactions_dict is None:
        print('Extracting species in reactions...')
        # Only convert species actually involved in reactions
        prime_species_in_reactions_dict = {}
        for rxn in prime_reactions_dict.itervalues():
            for spc in rxn.reactants:
                prime_species_in_reactions_dict[spc.prime_id] = spc
            for spc in rxn.products:
                prime_species_in_reactions_dict[spc.prime_id] = spc

    if out_dir is not None:
        print('Saving PrIMe species in reactions dictionary to {}'.format(
            out_dir))
        pickle_dump(prime_species_in_reactions_path,
                    prime_species_in_reactions_dict)

    print('Converting species to RMG types...')
    if rmg_species_dict is None:
        rmg_species_dict = {}
    count_resolve_errors = 0
    for prime_id, spc in prime_species_in_reactions_dict.iteritems():
        # Don't bother converting if we already did so in a previous run
        if prime_id in rmg_species_dict:
            continue

        try:
            rmg_species_dict[prime_id] = spc.get_rmg_species()
        except ConversionError as e:
            count_resolve_errors += 1
            warnings.warn('Skipped {}: {}'.format(prime_id, e))
            continue
        except (ValueError, AttributeError, AtomTypeError) as e:
            warnings.warn('Skipped {}: {}'.format(prime_id, e))
            continue
        except KeyError as e:
            warnings.warn('Skipped {}: Atom type {} is not supported.'.format(
                prime_id, e))
            continue
        except urllib2.URLError as e:
            warnings.warn(
                'URLError encountered for {}: {}, retrying...'.format(
                    prime_id, e))
            try:
                rmg_species_dict[prime_id] = spc.get_rmg_species()
            except urllib2.URLError as e:
                warnings.warn(
                    'URLError encountered for {}: {}, retrying...'.format(
                        prime_id, e))
                try:
                    rmg_species_dict[prime_id] = spc.get_rmg_species()
                except urllib2.URLError as e:
                    warnings.warn('Skipped {}: {}'.format(prime_id, e))
                    continue
        except Exception as e:
            if "Couldn't parse" in str(e):
                warnings.warn('Skipped {}: {}'.format(prime_id, e))
                continue
            else:
                print('Error encountered during conversion of species {}.'.
                      format(prime_id),
                      file=sys.stderr)
                # Save output regardless, so we don't have to do all the work again next time
                if out_dir is not None:
                    print(
                        'Saving RMG species dictionary to {}'.format(out_dir))
                    pickle_dump(rmg_species_path, rmg_species_dict)
                raise
        else:
            print('Converted {}.'.format(prime_id))

    print('Number of PrIMe species in reactions: {}'.format(
        len(prime_species_in_reactions_dict)))
    print('Number of RMG species in reactions: {}'.format(
        len(rmg_species_dict)))
    print('Number of CIRpy resolve errors: {}'.format(count_resolve_errors))

    if out_dir is not None:
        print('Saving RMG species dictionary to {}'.format(out_dir))
        pickle_dump(rmg_species_path, rmg_species_dict)

    print('Converting reactions to RMG types...')
    reactions = []
    for rxn in prime_reactions_dict.itervalues():
        try:
            rxn.get_rmg_species_from_dict(rmg_species_dict)
        except KeyError:
            continue
        else:
            reactions.append(rxn.get_rmg_reaction())

    print('Number of RMG reactions: {}'.format(len(reactions)))

    if out_dir is not None:
        print('Saving RMG reactions list to {}'.format(out_dir))
        pickle_dump(os.path.join(out_dir, 'reactions.pickle'), reactions)
Пример #34
0
def load_training_data():
    X = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES + '_X.pickle')
    y = util.pickle_load(DATA_FOLDER + TRAINING_EXAMPLES + '_y.pickle')
    return X, y