示例#1
0
def print_percentages(what, df_sel, df, color='red'):
    if color == 'red':
        lu.print_red(f"{what} ",
                     f"{len(df_sel)} = {round(100*len(df_sel)/len(df),1)}%")
    elif color == 'blue':
        lu.print_blue(f"{what} ",
                      f"{len(df_sel)} = {round(100*len(df_sel)/len(df),1)}%")
示例#2
0
def do_classification(skim_dir, list_models, sntypes):
    """ SNN classification
    """
    import SuperNNova.supernnova.conf as conf
    from SuperNNova.supernnova.data import make_dataset
    from SuperNNova.supernnova.visualization import early_prediction
    from SuperNNova.supernnova.validation import validate_rnn, metrics

    lu.print_blue(f"Classifying {skim_dir}")
    # get config args
    snn_args = conf.get_args()

    # create database
    snn_args.data = True
    snn_args.data_testing = True
    snn_args.dump_dir = f"{skim_dir}/"
    snn_args.raw_dir = f"{skim_dir}/"
    snn_args.fits_dir = "./"
    snn_args.sntypes = sntypes
    settings = conf.get_settings(snn_args)

    # # make dataset
    # make_dataset.make_dataset(settings)

    for model in list_models:
        # add model file to settings
        snn_args.model_files = model
        settings = conf.get_settings(snn_args)

        model_files = [model]
        # # # classify
        # snn_args.validate_rnn = True
        model_settings = conf.get_settings_from_dump(
            settings,
            snn_args.model_files,
            override_source_data=settings.override_source_data,
        )
        # fetch predictions
        prediction_file = validate_rnn.get_predictions(
            model_settings, model_file=snn_args.model_files)
        # Compute metrics
        metrics.get_metrics_singlemodel(model_settings,
                                        prediction_file=prediction_file,
                                        model_type="rnn")
        # plot lcs
        model_settings.model_files = snn_args.model_files
        early_prediction.make_early_prediction(model_settings, nb_lcs=20)

        # evaluate classifications
        df = eu.fetch_prediction_info(settings, model_settings, skim_dir)

        # plots init
        path_plot = f"{snn_args.dump_dir}/figures/"
        eu.plot_efficiency(df, skim_dir, path_plot)

    # "the classified sample"
    eu.pair_plots(df, path_plot)
示例#3
0
def skim_data(raw_dir, dump_dir, fits_file, timevar, debug=False):
    """ Skim PHOT and HEAD.FITS
    """
    list_files = glob.glob(os.path.join(f"{raw_dir}", "*PHOT.FITS"))
    if debug:
        lu.print_yellow('Debugging mode')
        list_files = list_files[:1]
    lu.print_green(
        f"Starting data skimming, found {len(list_files)} to operate on")

    # load Bazin
    df_fits = None
    if Path(fits_file).exists():
        df_fits = du.load_fits(fits_file)

    tmp_type_list = []
    filenames = []
    # skim each FITS file
    for fname in list_files:
        # fetch data year as prefix
        dump_prefix = Path(fname).name.split("_")[0]
        lu.print_blue(f"Processing: {dump_prefix}")

        df_header, df_phot = du.read_fits(fname)
        if df_fits is not None:
            df_header = pd.merge(df_header, df_fits, on='SNID')
        df_header = df_header[[
            k for k in df_header.keys() if 'Unnamed' not in k
        ]]
        # apply cuts
        unique_types, filename = apply_cut_save(df_header,
                                                df_phot,
                                                timevar=timevar,
                                                dump_dir=dump_dir,
                                                dump_prefix=dump_prefix)
        tmp_type_list += unique_types
        filenames.append(filename)
def get_sample_stats_and_plots(df_pred,
                               photo_Ia,
                               photo_nonIa,
                               skim_dir,
                               model_files=None,
                               out_dir=None,
                               plot=False):

    # inspect sample
    path_plots = f"{skim_dir}/figures/"
    Path(path_plots).mkdir(parents=True, exist_ok=True)
    vars_to_plot = [
        k for k in [
            'REDSHIFT_FINAL', 'PRIVATE(DES_numepochs_ml)', 'all_class0',
            'PRIVATE(DES_cand_type)', 'TYPE', 'PRIVATE(DES_mjd_trigger)',
            'PKMJDINI'
        ] if k in photo_Ia['all'].keys()
    ]
    if 'fake' in skim_dir:
        df_dic = {'all_lcs': df_pred, 'photo Ia sample': photo_Ia['all']}
        vars_to_plot += [
            'PRIVATE(DES_fake_salt2x1)', 'PRIVATE(DES_fake_salt2c)'
        ]
    else:
        df_dic = {
            'all_lcs': df_pred,
            'photo Ia sample': photo_Ia['all'],
            'contaminants': photo_Ia['spec_nonIa'],
            'photo other but spec Ia ': photo_nonIa['spec_Ia']
        }
    for var in [k for k in vars_to_plot]:
        vu.plot_superimposed_hist(df_dic,
                                  var,
                                  nameout=f"{path_plots}/hist_{var}_dist.png",
                                  log=True)

    for var in ['FLUXCAL_max', 'SNRMAX1']:
        # photo sample zoom
        vu.plot_superimposed_hist(df_dic,
                                  var,
                                  nameout=f"{path_plots}/hist_{var}_dist.png",
                                  log=True,
                                  limits_from_photo_sample=True)

    # Stats
    lu.print_green(cut_type)
    eu.print_percentages(f"photo Ias          ",
                         photo_Ia['all'],
                         df_pred,
                         color='blue')
    if dtype == 'real':
        lu.print_blue(f"      are spec Ias  ", len(photo_Ia['spec_Ia']))
        lu.print_blue(f'      are spec other', len(photo_Ia['spec_nonIa']))
        lu.print_blue(
            f'               gals ',
            len(photo_Ia['spec_nonIa'][photo_Ia['spec_nonIa']['TYPE'] == 81]))
        lu.print_red(f"missed Ias          ", len(photo_nonIa['spec_Ia']))

    # dump sample
    if not out_dir:
        out_dir = f"{skim_dir}/sample/"
        Path(out_dir).mkdir(parents=True, exist_ok=True)
    # filla na
    photo_Ia['all_no_spec_nonIa'] = photo_Ia['all_no_spec_nonIa'].fillna(0)
    photo_Ia['spec_nonIa'] = photo_Ia['spec_nonIa'].fillna(0)
    photo_Ia['all_no_spec_nonIa'][[
        'SNID', 'HOSTGAL_OBJID', 'DEC', 'RA', 'TYPE', 'REDSHIFT_FINAL',
        'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ', 'all_class0', 'c', 'x1'
    ]].to_csv(f'{out_dir}/photo_Ia.csv')
    # dump contaminants
    photo_Ia['spec_nonIa'][[
        'SNID', 'HOSTGAL_OBJID', 'DEC', 'RA', 'TYPE', 'REDSHIFT_FINAL',
        'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ', 'all_class0', 'c', 'x1'
    ]].to_csv(f'{out_dir}/photo_Ia_spec_contamination.csv')
    # dump missed Ias
    photo_nonIa['spec_Ia'][[
        'SNID', 'HOSTGAL_OBJID', 'DEC', 'RA', 'TYPE', 'REDSHIFT_FINAL',
        'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ', 'all_class0', 'c', 'x1'
    ]].to_csv(f'{out_dir}/photo_nonIa_spec_Ia.csv')

    # plot lcs
    if plot:
        vu.plot_early_classification(skim_dir,
                                     prefix='photo_Ia_',
                                     df=photo_Ia['all'],
                                     model_files=model_files,
                                     out_dir=out_dir)
        vu.plot_early_classification(skim_dir,
                                     prefix='photo_Ia_spec_contamination',
                                     df=photo_Ia['spec_nonIa'],
                                     model_files=model_files,
                                     out_dir=out_dir)
        vu.plot_early_classification(skim_dir,
                                     prefix='photo_nonIa_',
                                     df=photo_nonIa['all'],
                                     model_files=model_files,
                                     out_dir=out_dir)
        vu.plot_early_classification(skim_dir,
                                     prefix='photo_nonIa_spec_Ia_',
                                     df=photo_nonIa['spec_Ia'],
                                     model_files=model_files,
                                     out_dir=out_dir)

        # sample histograms of type
        vu.plot_hist(photo_Ia['all'],
                     'TYPE',
                     nameout=f"{path_plots}/photo_Ia_hist_type.png",
                     log=True)
        vu.plot_hist(photo_nonIa['all'],
                     'TYPE',
                     nameout=f"{path_plots}/photo_nonIa_hist_type.png",
                     log=True)

        df_dic = {
            'photo Ia sample': photo_Ia['all'],
            'photo & spec Ia ': photo_Ia['spec_Ia'],
            'photo other but spec Ia ': photo_nonIa['spec_Ia']
        }
        for var in [
                'REDSHIFT_FINAL', 'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ',
                'all_class0'
        ]:
            vu.plot_superimposed_hist(
                df_dic,
                var,
                nameout=f"{path_plots}/hist_{var}_dist_spec.png",
                log=True,
                only_positive_x=True)
        for var in ['FLUXCAL_max', 'c', 'x1']:
            vu.plot_superimposed_hist(
                df_dic,
                var,
                nameout=f"{path_plots}/hist_{var}_dist_spec.png",
                log=True,
                only_positive_x=False,
                bins=20)
path_des_data = os.environ.get("DES_DATA")
model_name = "vanilla_S_0_CLF_2_R_None_photometry_DF_1.0_N_global_lstm_32x2_0.05_128_True_mean_C"
list_models = glob.glob("../SuperNNova_general/trained_models_mutant/*/*.pt")

for model in list_models:
    model_files = [model]

    for dtype in ["real", "fake"]:
        df_pred = {}
        photo_Ia = {}
        photo_nonIa = {}

        for cut_type in ['clump']:  #['bazin','clump','trigger']:
            print()
            lu.print_blue(
                f'_____STATS FOR {dtype} with window {cut_type} model {Path(model).name.split("_")[0]}_____'
            )
            print()

            skim_dir = f"./dumps/{dtype}/{cut_type}/"

            # fetch predictions
            df_pred[cut_type] = du.load_predictions_and_info(
                skim_dir, model_name)

            # add salt2 fit parameters
            raw_dir = f"{path_des_data}/DESALL_forcePhoto_{dtype}_snana_fits/"
            saltfit = du.load_fitres(raw_dir)
            saltfit = saltfit[['SNID'] + [
                k for k in saltfit.keys() if k not in df_pred[cut_type].keys()
            ]]
        # load preds & enrich
        df_pred = du.load_predictions(fname_preds)
        df_pred = du.enrich_predictions(df_pred, path_dtype_data)

        # save the preds
        dic_pred[dtype][name_model] = df_pred

        # Select a default "photometric" sample
        photo_sample = df_pred[df_pred['predicted_target'] == 0]
        dic_pred[dtype][name_model]['photo_sample'] = np.array([df_pred['predicted_target'] == 0])[0]

        # save photo sample
        cols_to_save = ['SNID','HOSTGAL_OBJID', 'DEC', 'RA', 'SNTYPE', 'REDSHIFT_FINAL', 'HOSTGAL_PHOTOZ', 'HOSTGAL_SPECZ']
        cols_to_save += ['all_class0'] if 'vanilla' in name_model else ['all_class0_median','all_class0_std']
        photo_sample[cols_to_save].to_csv(f"{out_dir}/photo_sample.csv")
        lu.print_blue(name_model)
        print(f'photo sample {len(photo_sample)} representing {int(len(photo_sample)/len(df_pred)*100)}%')

        # metrics
        if dtype == 'real':
            # Ias
            dic_pred[dtype][name_model]['photo_spec_Ia'] = np.array([(df_pred['predicted_target'] == 0) & ((df_pred['SNTYPE'] == 1) | (
                df_pred['SNTYPE'] == 101))])[0]
            spec_Ia = df_pred[(df_pred['SNTYPE'] == 1) |
                              (df_pred['SNTYPE'] == 101)]
            print('spec Ia', len(dic_pred[dtype][name_model][dic_pred[dtype][name_model]['photo_spec_Ia']==True]), f"from {len(spec_Ia)}")
            # non Ias
            dic_pred[dtype][name_model]['photo_spec_nonIa'] = np.array([(df_pred['predicted_target'] == 0) & (df_pred['SNTYPE'] != 0) & (df_pred['SNTYPE'] != 1) & (
                df_pred['SNTYPE'] != 101)])[0]
            spec_non_Ia = df_pred[(df_pred['SNTYPE'] != 0) & (df_pred['SNTYPE'] != 1) &
                              (df_pred['SNTYPE'] != 101)]