Exemplo n.º 1
0
 def initial_call(self, modulo, nb_calls, dataset, model):
     print_info('Init Callback: ' + str(self))
     self.modulo = modulo
     self.nb_calls = nb_calls
     if self.dataset is None:
         self.dataset = dataset
     self.model = model.module if type(model) is DataParallel else model
Exemplo n.º 2
0
def do_extraction(dataset, labels_index, file_name='representation_tsne'):
    representation, colors, labels = extract_representation(dataset, model, labels_index=labels_index)

    representation_embedded = TSNE(n_components=2).fit_transform(representation)

    zipped = list(zip(representation_embedded, colors, labels))
    zipped.sort(key=lambda tup: tup[2])
    c = zipped[0][2]
    artists = []
    col, rep = [], []

    artists.append((rep, col, c))

    for row in zipped:
        if row[2] != c:
            col, rep = [], []
            c = row[2]
            artists.append((rep, col, c))
        col.append(row[1])
        rep.append(row[0])

    # converting to numpy
    for i in range(len(artists)):
        artists[i] = (np.array(artists[i][0]), np.array(artists[i][1]), artists[i][2])

    path = output_path(file_name + '.dump')
    with open(path, 'wb') as f:
        pickle.dump(artists, f)
    print_info('Representation saved at: ' + path)
Exemplo n.º 3
0
def create_model(model_class, model_params=None, model_name='model'):
    """
    create and eventually load model
    :param model_name:
    :param model_class:
    :param model_params:
    :param model_name:
    :return:
    """

    model_params = {} if model_params is None else model_params

    model = model_class(**model_params)

    if special_parameters.load_model:  # recover from checkpoint
        _load_model(model, model_name)

    # configure usage on GPU
    if use_gpu():
        model.to(first_device())
        model = torch.nn.DataParallel(model, device_ids=all_devices())

    # print info about devices
    print_info('Device(s)): ' + str(device_description()))

    return model
Exemplo n.º 4
0
def create_ign_sparse(source_occ,
                      source_ign,
                      patch_size=64,
                      error_path=output_path("error_extract/"),
                      **kwargs):

    r = check_source(source_occ)
    occurrences = r['occurrences']
    r = check_source(source_ign)
    ign_images = r['maps']

    la93 = Proj(init='epsg:2154')

    # extract manager
    im_manager = IGNImageManager(ign_images)
    extract_size = patch_size
    extract_step = 1

    # loading the occurrence file
    df = pd.read_csv(occurrences, header='infer', sep=';', low_memory=False)
    max_lat = df['Latitude'].max()
    print(max_lat)

    # sorting the dataset to optimise the extraction
    df.sort_values('Latitude', inplace=True)

    print_info(str(len(df)) + ' occurrences to extract!')
def plot_corr(model, X, absolute_value, threshold, figure_name='pcr'):
    activations, _ = get_activations(model, X)
    activations, _ = np.unique(activations, return_inverse=True, axis=0)

    min_activations = 10

    # partition that are not on the domain are removed.

    for i, v in enumerate(np.all(activations == activations[0, :], axis=0)):
        if v:
            activations[:, i] = 0
    # unique after corrections
    activations, _ = np.unique(activations, return_inverse=True, axis=0)

    nb_activations = activations.shape[0]
    nb_c_activations = min(nb_activations, min_activations)

    nb_params = -1
    for n, p in model.named_parameters():
        if len(p.shape) == 2:
            nb_params += 1
    plt(figure_name, figsize=(nb_c_activations * 6.4, nb_params * 4.8))

    vmin = 0. if absolute_value else -1.
    vmax = 1.
    print_info(str(nb_activations) + ' affine spaces used')
    for idx, a in enumerate(range(nb_c_activations)):
        tc = 0
        c = 0
        for name, params in model.named_parameters():

            if len(params.shape) == 2:

                A = params.detach().numpy()
                B = np.zeros((A.shape[0], A.shape[0]))
                plt(figure_name).subplot(nb_params, nb_c_activations,
                                         1 + c * nb_c_activations + idx)
                for i in range(A.shape[0]):

                    for j in range(A.shape[0]):
                        if activations[idx,
                                       tc + i] != 0 and activations[idx, tc +
                                                                    j] != 0:
                            B[i,
                              j] = np.dot(A[i, :], A[j, :]) / np.linalg.norm(
                                  A[i, :]) / np.linalg.norm(A[j, :])
                        else:
                            B[i, j] = None
                if absolute_value:
                    B = np.abs(B)
                if type(threshold) is not bool:
                    B = (B > threshold).astype(int)
                plt(figure_name).imshow(B, vmin=vmin, vmax=vmax)
                plt(figure_name).title(name)
                plt(figure_name).colorbar()
                tc += A.shape[0]
                c += 1
            if tc >= activations.shape[1]:
                break
Exemplo n.º 6
0
def save_classifier_weight(model):
    w = model.state_dict()['fc.weight']
    w = w.numpy()
    print(w)
    print(type(w))
    print_info("save weight")
    result_path = output_path('weight.npy')
    np.save(result_path, w)
    print_info("saved !")
Exemplo n.º 7
0
def fit(train,
        test,
        export=False,
        training_params=None,
        export_params=None,
        **kwargs):
    if not use_gpu():
        print_errors('XGBoost can only be executed on a GPU for the moment',
                     do_exit=True)

    training_params = {} if training_params is None else training_params
    export_params = {} if export_params is None else export_params

    d_test = xgb.DMatrix(np.asarray(test.get_vectors()),
                         label=np.asarray(test.labels))

    if not validation_only:
        print_h1('Training: ' + special_parameters.setup_name)
        print_info("get vectors...")

        X = np.asarray(train.get_vectors())
        y = np.asarray(train.labels)

        d_train = xgb.DMatrix(X, label=y)

        gpu_id = first_device().index

        kwargs['verbosity'] = verbose_level()
        kwargs['gpu_id'] = gpu_id

        eval_list = [(d_test, 'eval'), (d_train, 'train')]

        print_info("fit model...")

        bst = xgb.train(kwargs,
                        d_train,
                        num_boost_round=kwargs["num_boost_round"],
                        verbose_eval=kwargs["verbose_eval"],
                        evals=eval_list)

        save_model(bst)

    else:
        bst = load_model()

    print_h1('Validation/Export: ' + special_parameters.setup_name)
    predictions = bst.predict(d_test, ntree_limit=bst.best_ntree_limit)
    res = validate(predictions,
                   np.array(test.labels),
                   training_params['metrics']
                   if 'metrics' in training_params else tuple(),
                   final=True)
    print_notification(res, end='')
    if export:
        export_results(test, predictions, **export_params)
Exemplo n.º 8
0
def check_machine():
    """
    execute some commands to print specific information about the machine.
    """

    # list of commands to be executed
    commands = ('env', 'module list', 'pwd', 'hostname')

    for c in commands:
        print(('[' + c + ']' + ' ' + '*' * 80)[:80])
        print_info(os.popen(c).read())
Exemplo n.º 9
0
    def last_call(self):
        step = 0.005
        x = np.arange(-1, 1. + step, step)
        y = np.sqrt(np.maximum(1. - x**2, np.zeros(x.shape)))
        plt('circle').plot(x, y)
        y = -np.sqrt(np.maximum(1. - x**2, np.zeros(x.shape)))
        plt('circle').plot(x, y)
        labels = self.dataset.labels
        dataset = self.dataset.dataset
        plt('circle').scatter(dataset[labels == 0][:, 0],
                              dataset[labels == 0][:, 1])
        plt('circle').scatter(dataset[labels == 1][:, 0],
                              dataset[labels == 1][:, 1])
        for i, p in enumerate(self.parameters[0]):
            norm = np.sqrt(p[0]**2 + p[1]**2)
            if norm > self.coef_norm:
                self.coef_norm = norm

        for i, p in enumerate(self.parameters[0]):
            p /= self.coef_norm
            norm = np.sqrt(p[0]**2 + p[1]**2)

            new_norm = norm * self.wk[0][i] if self.use_wk else norm

            b = -self.bias[0][i] if self.use_bias else 0.
            b /= norm
            dx, dy = p[0] * new_norm / norm, p[1] * new_norm / norm

            x, y = (0, 0) if not self.use_bias else (p[0] * b / norm,
                                                     p[1] * b / norm)

            self.arrows.append(
                plt('circle').arrow(x,
                                    y,
                                    dx,
                                    dy,
                                    shape='full',
                                    head_width=0.04,
                                    head_length=0.08))

        fig = get_figure('circle')
        self.axis = fig.gca()

        anim = FuncAnimation(fig,
                             self.update,
                             frames=np.arange(0, len(self.parameters)),
                             interval=200)
        path = output_path('circle.gif')
        print_info('Saving GIF at ' + path)
        anim.save(path, dpi=80, writer='imagemagick')
        delete_figure('circle')
Exemplo n.º 10
0
def detect_machine():
    hostname = socket.gethostname()
    found_machine = False
    for k, v in clusters.items():
        for h in v:
            if hostname.startswith(h):
                special_parameters.machine = k
                found_machine = True
                break
            if found_machine:
                break
    if not found_machine:
        special_parameters.machine = hostname
    print_info('The machine was identified as ' + special_parameters.machine)
def plot_occurrences(train, val, test):

    # df_train = pd.read_csv("/home/bdeneu/data/occurrences_glc18.csv", header='infer', sep=';', low_memory=False)
    # df_test = pd.read_csv("/home/bdeneu/data/occurrences_glc18_test_withlabel.csv", header='infer', sep=';', low_memory=False)
    # d_train = df_train[['Latitude', 'Longitude']].to_numpy()
    # d_test = df_test[['Latitude', 'Longitude']].to_numpy()

    d_train = np.asarray(train.dataset)
    d_test = np.asarray(test.dataset)
    d_val = np.asarray(val.dataset)

    geo_tr = project(d_train[:, 0], d_train[:, 1])
    #geo_te = project(d_test[:, 0], d_test[:, 1])
    #geo_va = project(d_val[:, 0], d_val[:, 1])

    #print(geo_te)
    s = 0.8
    plt.style.use('classic')
    fig, ax = plt.subplots()
    #ax.scatter(geo_tr[0][:], geo_tr[1][:], color='#00cc99', marker='s', s=s, label="train")
    ax.scatter(geo_tr[0][:], geo_tr[1][:], color='#93c47d', marker='s', s=s, label="train")
    #ax.scatter(geo_va[0][:], geo_va[1][:], color='#33ff33', marker='s', s=s, label="val")
    #ax.scatter(geo_te[0][:], geo_te[1][:], color='#d9ff66', marker='s', s=s, label="test")
    # ax = fig.add_subplot(111, axisbg='white')

    ax.set_xlim(3200, 4400)
    ax.set_ylim(2000, 3200)
    ax.spines['bottom'].set_color('#dddddd')
    ax.spines['top'].set_color('#dddddd')
    ax.spines['right'].set_color('#dddddd')
    ax.spines['left'].set_color('#dddddd')
    ax.tick_params(axis='x', colors='#dddddd')
    ax.tick_params(axis='y', colors='#dddddd')
    ax.yaxis.label.set_color('#dddddd')
    ax.xaxis.label.set_color('#dddddd')
    ax.title.set_color('#dddddd')
    #plt.legend(loc=1, markerscale=0.8, facecolor='#00FFFFFF')
    print("here")
    plt.show()
    print_info('figure saved at: ' + output_path('occurrences.png'))
    fig.savefig(output_path('occurrences.png'), transparent=True)
Exemplo n.º 12
0
def extract_7z(source, extension='.7z'):

    # loading a specific source
    r = check_source(source)

    dir_name = r['archive']
    dest_name = r['maps']

    os.chdir(dir_name)  # change directory from working dir to dir with files

    n = len(os.listdir(dir_name))

    for i, item in enumerate(
            os.listdir(dir_name)):  # loop through items in dir
        print_info(
            '\n------------------------------------------------------------------------------'
        )
        print_info(str(i + 1) + '/' + str(n))
        if item.endswith(
                extension):  # check for ".zip" or ".7z", etc. extension
            file_name = os.path.abspath(item)  # get full path of files
            print_h2(file_name)
            print_info('\n')

            os.system('7z x ' + file_name + ' -o' + dest_name)
Exemplo n.º 13
0
def get_species_neurons_correlations():
    activations = np.load(output_path('activations.npy'))
    logits = np.load(output_path('logits.npy'))
    print_info("calculate correlation matrix between features and species")

    mean_act = np.mean(activations, axis=0)
    std_act = np.std(activations, axis=0)
    norm_act = (activations - mean_act) / std_act

    mean_log = np.mean(logits, axis=0)
    std_log = np.std(logits, axis=0)
    norm_log = (logits - mean_log) / std_log

    size = activations.shape[0] * activations.shape[1]
    c = size - np.count_nonzero(activations)
    print(str(c) + "/" + str(size) + " (" + str(c * 100.0 / size) + "%)")

    matrix = np.zeros((activations.shape[1], logits.shape[1]), dtype=float)

    for i in progressbar.progressbar(range(activations.shape[0])):
        act = norm_act[i]
        log = norm_log[i]
        for j in range(norm_act.shape[1]):
            matrix[j] += (log * act[j]) / activations.shape[0]

    result_path = output_path('correlation_activations.npy')
    print_info("save activations for species:", result_path)
    np.save(result_path, matrix)
    print_info("saved !")
Exemplo n.º 14
0
def print_model_parameters(model):
    for name, param in model.named_parameters():
        print_info(name + ' ' + str(param.shape))
    print_info('\n' + '*' * 50 + '\n')
    for name, param in model.named_parameters():
        print_info(name + ' *' * 10 + '\n' +
                   str(param.data.detach().numpy()).replace('array', ''))
Exemplo n.º 15
0
def export_bigdata(model, test, batch_size, buffer_size, size):
    num_workers = special_parameters.nb_workers
    test_loader = torch.utils.data.DataLoader(test,
                                              shuffle=False,
                                              batch_size=batch_size,
                                              num_workers=num_workers)

    results = []

    model.eval()
    export_path = output_path('predictions.csv')
    # check if labels have been indexed
    index_path = output_path('index.json')

    indexed_labels = get_index(index_path)

    with open(export_path, 'w') as f:
        print_info('Exporting predictions at ' + export_path)
        f.write('id,class_id,rank,proba\n')  # header

        warnings.simplefilter(
            'ignore')  # warning because old import in progressbar
        bar = progressbar.ProgressBar(max_value=len(test_loader))
        warnings.simplefilter('default')
        for idx, data in enumerate(test_loader):
            # get the inputs
            inputs, labels = data

            outputs = model(inputs)

            results.append(outputs.detach().cpu().numpy())
            if len(results) >= buffer_size:
                _export_bigdata(f, results, test, indexed_labels, size)
                results = []
            bar.update(idx)
        if len(results) >= 0:
            _export_bigdata(f, results, test, indexed_labels, size)
        bar.finish()
Exemplo n.º 16
0
def extract_patch(source, offset=0, check_file=True):
    """
    Extract IGN patch from IGN maps.
    :param source:
    :param offset:
    :param check_file:
    :return:
    """

    # checking the source
    r = check_source(source)

    # extract manager
    im_manager = IGNImageManager(r['maps'])
    extract_size = 64
    extract_step = 1

    # loading the occurrence file
    df = pd.read_csv(r['occurrences'],
                     header='infer',
                     sep=';',
                     low_memory=False)

    # sorting the dataset to optimise the extraction
    df.sort_values('Latitude', inplace=True)

    # offset management
    df = df.iloc[offset:]

    print_info(str(len(df)) + ' occurrences to extract!')

    im_manager.extract_patches(
        df[[r['longitude'], r['latitude'], r['id_name']]],
        r['patches'],
        size=extract_size,
        step=extract_step,
        check_file=check_file)
Exemplo n.º 17
0
    def wrapper(*args, **kwargs):
        start = time.time()
        print_info('[Executing ' + func.__name__ + ']')

        # check changeable parameters (command line and more)
        if func.__name__ in hp.overriding_parameters():
            for arg, name in zip(args, func.__code__.co_varnames):
                kwargs[name] = arg
            args = tuple()
            merge(kwargs, hp.overriding_parameters()[func.__name__])
        if len(args) > 0 or len(kwargs) > 0:
            add_config_elements('[' + func.__name__ + ']')
        if len(args) > 0:
            print_info('Args: ' + format_dict_and_tuple(args))
            add_config_elements('Args: ' + format_dict_and_tuple(args))

        if len(kwargs) > 0:
            print_info('Kwargs: ' + format_dict_and_tuple(kwargs))
            add_config_elements('Kwargs: ' + format_dict_and_tuple(kwargs))

        results = func(*args, **kwargs)
        print_durations(time.time() - start)
        return results
def compute_neural_directions(model,
                              X,
                              absolute_value,
                              threshold,
                              min_activations=10):
    # this method only works on fully connected models
    if type(model) is not fully_connected.Net:
        print_errors(str(type(model)) + ' must be of type ' +
                     str(fully_connected.Net) + '.',
                     do_exit=True)

    layers = [m for m in model.modules()
              if type(m) in (BatchNorm1d, Linear)][:-1]
    final_layers = []
    it = 0

    while it < len(layers):
        # linear layer
        M = layers[it]
        it += 1

        linear_app = M.weight.detach().cpu().numpy()

        if it < len(layers) and type(layers[it]) is BatchNorm1d:
            A = layers[it]
            var = np.diag(A.running_var.cpu().numpy())

            gamma = np.diag(A.weight.detach().cpu().numpy())
            bn = np.matmul(gamma, np.linalg.inv(var))

            linear_app = np.matmul(bn, linear_app)
            it += 1
        final_layers.append(linear_app)

    # get activations
    activations, _ = get_activations(model, X)
    activations, _ = np.unique(activations, return_inverse=True, axis=0)

    # partitions where change is not on the domain are removed.

    for i, v in enumerate(np.all(activations == activations[0, :], axis=0)):
        if v:
            activations[:, i] = 0

    # unique after corrections
    activations, _ = np.unique(activations, return_inverse=True, axis=0)

    vmin = 0. if absolute_value else -1.
    vmax = 1.

    vectors = [[] for _ in range(len(final_layers))]
    n_act = min(min_activations, len(activations))
    print_info("n_act: %d" % n_act)
    for i in range(n_act):

        la = None
        for li, l in enumerate(final_layers):
            activated = activations[i][li * l.shape[0]:(li + 1) * l.shape[0]]

            if la is None:
                la = final_layers[li] * activated[:, np.newaxis]
            else:
                la = np.matmul(final_layers[li], la) * activated[:, np.newaxis]

            for n in la:
                vectors[li].append(n)
            continue

    return vectors, vmin, vmax
Exemplo n.º 19
0
from datascience.visu.patch import pplot_patch
import numpy as np

# with option --more idx=12 to change the index from the command line...
from engine.logging import print_info
from engine.parameters.special_parameters import get_parameters

# load the idx + 1 first elements

idx = get_parameters('idx', 0)

train, _, _ = occurrence_loader(EnvironmentalIGNDataset,
                                source='full_ign',
                                id_name='X_key',
                                label_name='glc19SpId',
                                validation_size=0,
                                test_size=0,
                                limit=idx + 1)

patch, _ = train[idx]

patch = [l.int() for l in patch]

patch = patch[:-3] + [np.transpose(np.stack(patch[-3:], axis=0), (1, 2, 0))]

print_info('Printing patch at ' + str(train.dataset[idx]))

pplot_patch(patch, header=train.named_dimensions)

save_fig()
Exemplo n.º 20
0
def get_species_neurons_activations(model, grid_points, batch_size=32):
    activations = predict_grid(model, grid_points, batch_size=batch_size, features_activation=True)
    predictions = predict_grid(model, grid_points, batch_size=batch_size)
    logits = predict_grid(model, grid_points, batch_size=batch_size, logit=True)

    result_path = output_path('activations.npy')
    print_info("save activations:", result_path)
    np.save(result_path, activations)
    result_path = output_path('predictions.npy')
    print_info("save predictions:", result_path)
    np.save(result_path, predictions)
    result_path = output_path('logits.npy')
    print_info("save logits", result_path)
    np.save(result_path, logits)
    print_info("saved !")

    print_info("save weight")
    w = model.state_dict()['fc.weight']
    w = w.numpy()
    result_path = output_path('weight.npy')
    np.save(result_path, w)
    print_info("saved !")
Exemplo n.º 21
0
def predict(model,
            loader,
            loss,
            export=False,
            filters=tuple(),
            validation_size=10000,
            compute_loss=False):
    """
        Give the prediction of the model on a test set
        :param compute_loss:
        :param filters: set some output to 0
        :param validation_size:
        :param model: the model
        :param loader: the test set loader
        :param loss: the loss function
        :param export: if False the predictions are not saved, otherwise the results are exported on file.
                       if export is true the loader must not be shuffled...
        :return: the arrays of predictions and corresponding labels
        """

    if len(loader) > _memory_overflow_size and (
            validation_size == -1 or validation_size > _memory_overflow_size):
        print_warning(
            '[predict] The dataset size is {}. Large datasets can cause memory '
            'overflow during standard prediction...'.format(len(loader)))

    with torch.no_grad():
        total = 0
        model.eval()

        y_preds = []
        y_labels = []
        running_loss = 0.0
        idx = 0
        if hasattr(model, 'last_sigmoid') and compute_loss:
            model.last_sigmoid = False
        elif hasattr(model, 'last_sigmoid'):
            model.last_sigmoid = True

        for idx, data in enumerate(loader):

            inputs, labels = data
            if use_gpu():
                labels = labels.cuda()
            # wrap them in Variable
            labels_variable = loss.output(labels)
            outputs = model(inputs)

            # if not test set
            if compute_loss and labels[0] != -1:
                loss_value = loss(outputs, labels)
                running_loss += loss_value.item()
            outputs = loss.output(outputs)

            total += labels_variable.size(0)

            y_preds.extend(outputs.data.tolist())
            y_labels.extend(labels_variable.data.tolist())

            if total >= validation_size != -1 and not export:
                break
        running_loss /= (idx + 1)  # normalizing the loss
        if compute_loss:
            print_info('Validation loss: ' + str(running_loss))
            add_scalar('Loss/Validation', running_loss)
        predictions, labels = np.asarray(y_preds), np.asarray(y_labels)

        # filtering some predicted labels
        for f in filters:
            f(predictions)

        # TODO filtering official labels

    return predictions, labels, running_loss
def plot_on_map(activations,
                map_ids,
                n_cols=1,
                n_rows=1,
                figsize=4,
                log_scale=False,
                mean_size=1,
                selected=tuple(),
                legend=None,
                output="activations",
                style="grey",
                exp_scale=False,
                cmap=None,
                alpha=None,
                bad_alpha=1.,
                font_size=12,
                color_text='black',
                color_tick='black'):
    if log_scale:
        print_info("apply log...")
        activations = activations + 1.0
        activations = np.log(activations)
    elif exp_scale:
        print_info("apply exp...")
        p = np.full(activations.shape, 1.2)
        activations = np.power(p, activations)

    print_info("construct array activation map...")
    pos = []
    max_x = 0
    max_y = 0
    for id_ in map_ids:
        x, y = id_.split("_")
        x, y = int(x), int(y)
        pos.append((x, y))
        if x > max_x:
            max_x = x
        if y > max_y:
            max_y = y
    size = max(max_x + 1, max_y + 1)
    while size % mean_size != 0:
        size += 1
    nb = n_cols * n_rows
    act_map = np.ndarray((nb, size, size))
    act_map[:] = np.nan

    print_info("select neurons to print...")
    if len(selected) > 0:
        list_select = selected
    else:
        list_select = random.sample(list(range(activations.shape[1])), nb)

    print_info("fill activation map array...")
    for k, act in enumerate(activations):
        for idx, j in enumerate(list_select):
            x, y = pos[k][0], pos[k][1]
            act_map[idx, x, y] = act[j]
    """
    fig, axs = plt.subplots(n_rows, n_cols, sharex='col', sharey='row',
                            figsize=(n_cols*figsize*1.2, n_rows*figsize))
    fig.subplots_adjust(wspace=0.5)
    plt.tight_layout(pad=1.5)

    print_info("make figure...")
    for j in range(nb):
        if mean_size != 1:
            height, width = act_map[j].shape
            act_map_j = np.ma.average(np.split(np.ma.average(np.split(act_map[j], width // mean_size, axis=1),
                                               axis=2), height // mean_size, axis=1), axis=2)
        else:
            act_map_j = act_map[j]

        masked_array = np.ma.array(act_map_j, mask=np.isnan(act_map_j))
        cmap = matplotlib.cm.inferno
        cmap.set_bad('grey', 1.)
        im = axs[j // n_cols, j % n_cols].imshow(masked_array, cmap=cmap, interpolation='none')
        axs[j // n_cols, j % n_cols].set_title(str(list_select[j]))
        divider = make_axes_locatable(axs[j // n_cols, j % n_cols])
        cax = divider.append_axes("right", size="5%", pad=0.05)
        fig.colorbar(im, cax=cax)
    """

    font = {'family': 'normal', 'weight': 'bold', 'size': font_size}

    matplotlib.rc('font', **font)

    if legend is None:
        legend = list(map(str, list_select))

    mplt.rcParams['text.color'] = color_text
    mplt.rcParams['axes.labelcolor'] = color_tick
    mplt.rcParams['xtick.color'] = color_tick
    mplt.rcParams['ytick.color'] = color_tick

    plt(output, figsize=(n_cols * figsize * 1.2, n_rows * figsize))
    fig = get_figure(output)
    fig.subplots_adjust(wspace=0.05)

    print_info("make figure...")
    for j in range(nb):
        if mean_size != 1:
            height, width = act_map[j].shape
            act_map_j = np.nanmean(np.split(np.nanmean(np.split(act_map[j],
                                                                width //
                                                                mean_size,
                                                                axis=1),
                                                       axis=2),
                                            height // mean_size,
                                            axis=1),
                                   axis=2)
        else:
            act_map_j = act_map[j]
        print(act_map_j[2, 572])
        masked_array = np.ma.array(act_map_j, mask=np.isnan(act_map_j))
        if cmap is None:
            if style == "grey":
                cmap = matplotlib.cm.inferno
                cmap.set_bad('grey', bad_alpha)
            elif style == "white":
                cmap = matplotlib.cm.inferno
                cmap.set_bad('white', bad_alpha)
        ax = plt(output).subplot(n_rows, n_cols, j + 1)
        ax.set_facecolor((0, 0, 0, 0))
        im = plt(output).imshow(masked_array,
                                cmap=cmap,
                                interpolation='none',
                                alpha=alpha)
        plt(output).title(legend[j])
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="5%", pad=0.05)
        plt(output).colorbar(im, cax=cax)

    fig.tight_layout(pad=0.05)
    fig.patch.set_alpha(0.0)

    save_fig(figure_name=output, extension='png')
Exemplo n.º 23
0
def export_config():
    path = output_path('config.txt')
    print_info('Writing config at: ' + path)
    with open(path, 'a') as f:
        f.write(' '.join(sys.argv) + '\n')
Exemplo n.º 24
0
from datascience.data.util.source_management import check_source
import json
import pandas as pd

from engine.logging import print_info

source = check_source('glc20')
raw_occurrences_path = source['raw_source']
occurrences_path = source['occurrences']  # destination

with open(raw_occurrences_path, 'rb') as f:
    d = json.load(f)

data = {'id': [], 'lat': [], 'lon': [], 'species_id': [], 'species_name': []}

for row in d:
    if row['results']['status'] == 'BEST_REF':
        data['id'].append(row['id'])
        data['lat'].append(row['lat'])
        data['lon'].append(row['lon'])
        data['species_id'].append(row['results']['id'])
        data['species_name'].append(row['results']['name'])

df = pd.DataFrame(data=data)

print_info('Saving file')

df.to_csv(occurrences_path, header=True, sep=';', index=False)
Exemplo n.º 25
0
def _save_fig(path_name, figure):

    print_info('Saving figure at: ' + path_name)
    figure.savefig(path_name)
def check_extraction(source,
                     save_errors=True,
                     save_filtered=True,
                     id_name='X_key'):
    """
    check if all patches from an occurrences file have been extracted. Can save the list of errors and
    filtered the dataset keeping the correctly extracted data.

    :param id_name: the column that contains the patch id that will be used to construct its path
    :param save_filtered: save the dataframe filtered from the error
    :param save_errors: save the errors found in a file
    :param source: the source referring the occurrence file and the patches path
    """

    # retrieve details of the source
    r = check_source(source)
    if 'occurrences' not in r or 'patches' not in r:
        print_errors(
            'Only sources with occurrences and patches can be checked',
            do_exit=True)

    df = pd.read_csv(r['occurrences'],
                     header='infer',
                     sep=';',
                     low_memory=False)
    nb_errors = 0
    errors = []
    for idx, row in progressbar.progressbar(enumerate(df.iterrows())):
        patch_id = str(int(row[1][id_name]))

        # constructing the path of a patch given its id
        path = os.path.join(r['patches'], patch_id[-2:], patch_id[-4:-2],
                            patch_id + '.npy')

        # if the path does not correspond to a file, then it's an error
        if not os.path.isfile(path):
            errors.append(row[1][id_name])
            nb_errors += 1

    if nb_errors > 0:
        # summary of the error
        print_info(str(nb_errors) + ' errors found during the check...')

        if save_errors:
            # filter the dataframe using the errors
            df_errors = df[df[id_name].isin(errors)]

            error_path = output_path('_errors.csv')
            print_info('Saving error file at: ' + error_path)

            # save dataframe to the error file
            df_errors.to_csv(error_path, header=True, index=False, sep=';')
        if save_filtered:
            # filter the dataframe keeping the non errors
            df_filtered = df[~df[id_name].isin(errors)]
            filtered_path = r['occurrences'] + '.tmp'
            print_info('Saving filtered dataset at: ' + filtered_path)
            df_filtered.to_csv(filtered_path,
                               header=True,
                               index=False,
                               sep=';')
    else:
        print_info('No error has been found!')