Exemplo n.º 1
0
def _update_data_by_remove_repeated_ids(first_loop_data: DataBase,
                                        light_curve_data: DataBase,
                                        id_key_name: str,
                                        pool_labels_class: str = 'Ia') -> Tuple[
        DataBase, DataBase]:
    """
    Updates first loop and initial data by removing repetitive id indices

    Parameters
    ----------
    first_loop_data
        first loop light curve data
    light_curve_data
        initial light curve training data
    id_key_name
        object identification key name
    pool_labels_class
        pool labels class name
    """
    repeated_id_flags = np.in1d(
        first_loop_data.pool_metadata[id_key_name].values,
        light_curve_data.train_metadata[id_key_name].values)
    first_loop_data.pool_metadata = first_loop_data.pool_metadata[
        ~repeated_id_flags]
    first_loop_data.pool_features = first_loop_data.pool_features[
        ~repeated_id_flags]
    pool_labels = (
            first_loop_data.pool_metadata['type'].values == pool_labels_class)
    first_loop_data.pool_labels = pool_labels.astype(int)
    light_curve_data.pool_features = first_loop_data.pool_features
    light_curve_data.pool_metadata = first_loop_data.pool_metadata
    light_curve_data.pool_labels = first_loop_data.pool_labels
    return first_loop_data, light_curve_data
Exemplo n.º 2
0
def _update_light_curve_data_for_next_epoch(
        light_curve_data: DataBase, next_day_data: DataBase,
        canonical_data: DataBase, is_queryable: bool, strategy: str,
        is_separate_files: bool) -> DataBase:
    """
    Updates samples for next epoch

    Parameters
    ----------
    light_curve_data
        light curve learning data
    next_day_data
        next day light curve data
    canonical_data
        canonical strategy light curve data
    is_queryable
        If True, allow queries only on objects flagged as queryable.
        Default is True.
    strategy
        Query strategy. Options are (all can be run with budget):
        "UncSampling", "UncSamplingEntropy", "UncSamplingLeastConfident",
        "UncSamplingMargin", "QBDMI", "QBDEntropy", "RandomSampling",
    is_separate_files
        If True, consider samples separately read
        from independent files. Default is False.
    """
    light_curve_data.pool_metadata = next_day_data.pool_metadata
    light_curve_data.pool_features = next_day_data.pool_features
    light_curve_data.pool_labels = next_day_data.pool_labels

    if not is_separate_files:
        light_curve_data.test_metadata = next_day_data.test_metadata
        light_curve_data.test_features = next_day_data.test_features
        light_curve_data.test_labels = next_day_data.test_labels

        light_curve_data.validation_metadata = next_day_data.validation_metadata
        light_curve_data.validation_features = next_day_data.validation_features
        light_curve_data.validation_labels = next_day_data.validation_labels

    if strategy == 'canonical':
        light_curve_data.queryable_ids = canonical_data.queryable_ids

    if is_queryable:
        queryable_flag = light_curve_data.pool_metadata['queryable'].values
        light_curve_data.queryable_ids = light_curve_data.pool_metadata[
            'id'].values[queryable_flag]
    else:
        light_curve_data.queryable_ids = light_curve_data.pool_metadata[
            'id'].values
    return light_curve_data
Exemplo n.º 3
0
def _update_next_day_pool_data(next_day_data: DataBase,
                               next_day_pool_metadata_indices) -> DataBase:
    """
    Removes metadata value data from next day pool sample

    Parameters
    ----------
    next_day_data
        next day light curve data
    next_day_pool_metadata_indices
        indices of metadata value in next day light curve data
    """
    # remove obj from pool sample
    next_day_data.pool_metadata = next_day_data.pool_metadata.drop(
        next_day_data.pool_metadata.index[next_day_pool_metadata_indices])
    next_day_data.pool_labels = np.delete(
        next_day_data.pool_labels, next_day_pool_metadata_indices, axis=0)
    next_day_data.pool_features = np.delete(
        next_day_data.pool_features, next_day_pool_metadata_indices, axis=0)
    return next_day_data