예제 #1
0
 def run_step_encode(self, df_norm):
     stream=get_encode_stream(df_norm, self.base, self.dtype)
     if self.save['stream']: 
         self.save_txt(stream, 'stream')
     elif self.idx is not None:
         self.save_txt(stream[self.idx[0]:self.idx[1]],f'stream{self.idx[-1]}')
     return stream
예제 #2
0
def process_rebin(df_norm, base, dtype):
    stream1D = get_encode_stream(df_norm, base, dtype)
    return stream1D
예제 #3
0
def main():
    try:
        os.mkdir(PRETRAIN_PATH)
    except:
        print('here we go!')

    if PREPRO_HH_PHOTO:
        if PREPRO_STREAM_PHOTO:
            if PREPRO_NORM_PHOTO:
                if PREPRO_NORM_PARAMS:
                    print('=====================PREPRO SPECS====================')
                    dfspec,vmin,vrng,df_lbl=prepro_specs(SPEC_DATA, ftr, r=0.01,w=True,wpath=PRETRAIN_PATH)
                elif PREPRO_NORM_PHOTO:
                    vmin=np.loadtxt(f'{PRETRAIN_PATH}/vmin.txt')
                    vrng=np.loadtxt(f'{PRETRAIN_PATH}/vrng.txt')
                print('=====================PREPRO PHOTO====================')
                dfphoto=prepro_photos(PHOTO_DATA, vmin, vrng, base,ftr, w=True, wpath=PRETRAIN_PATH)
            else:
                print('=====================LOADING PHOTO NORM ====================')
                dfphoto=pd.read_csv(f'{PRETRAIN_PATH}/photo_norm_{base}.csv',index=False)
            print('=====================ENCODE PHOTO ====================')
            photo_stream=get_encode_stream(dfphoto, base,dtype)
            np.savetxt(f'{PRETRAIN_PATH}/photo_stream.txt',photo_stream)
        else:
            print('=====================LOADING PHOTO STREAM ====================')
            photo_stream=np.loadtxt(f'{PRETRAIN_PATH}/photo_stream.txt')
        print('===================== COUNTING PHOTO HH==================')
        photoHH_pd=get_HH_pd(photo_stream,base,ftr_len,dtype, EXACT_COUNTING,topk,r=16, d=1000000,c=None,device=None)
        photoHH_pd.to_csv(f'{PRETRAIN_PATH}/photo_HH.csv', index=False)
    else:
        photoHH_pd=pd.read_csv(f'{PRETRAIN_PATH}/photo_HH.csv')
    print('photoHH_pd',photoHH_pd)

    if PREPRO_STREAM_SPEC:
        if not PREPRO_NORM_PARAMS:
            dfspec=pd.read_csv(f'{PRETRAIN_PATH}/spec_norm.csv')
            df_lbl=pd.read_csv(f'{PRETRAIN_PATH}/spec_lbl.csv')
        print('=====================ENCODING SPEC ====================')
        spec_stream=get_encode_stream(dfspec, base,dtype)
        np.savetxt(f'{PRETRAIN_PATH}/spec_stream.txt',spec_stream)
        df_lbl['encode']=spec_stream
        df_lbl.to_csv(f'{PRETRAIN_PATH}/spec_lbl_encode.csv', index=False)
    else:
        df_lbl=pd.read_csv(f'{PRETRAIN_PATH}/spec_lbl_encode.csv')
        if PREPRO_HH_SPEC:
            spec_stream=np.loadtxt(f'{PRETRAIN_PATH}/spec_stream.txt')
    

        
    if PREPRO_HH_SPEC:  
        print('=====================COUNTING PHOTO HH====================')
        specHH_pd=get_HH_pd(spec_stream,base,ftr_len,dtype,True,topk)
        print('=====================UMAPPING SPEC ====================')
        specHH_pd.to_csv(f'{PRETRAIN_PATH}/specHH_pd.csv',index=False)
    elif MAP_PHOTO:
        specHH_pd=pd.read_csv(f'{PRETRAIN_PATH}/specHH_pd.csv')
        print('specHH_pd',specHH_pd)
        
    if MAP_PHOTO:
        print('=============MAPPING PHOTO============')
        if PREPRO_SPEC_UMAP:
            HH_pdQS,umapT_spec= get_spec_mapping(specHH_pd,ftr, df_lbl, base,name,umap_comp,HH_cut=20000)
            print('HH_pdQS',HH_pdQS)
            # joblib.dump(model_dict, f'{PRETRAIN_PATH}/model_b{base}.sav')
            joblib.dump(umapT_spec, f'{PRETRAIN_PATH}/umap_spec_b{base}.sav')
            HH_pdQS.to_csv(f'{PRETRAIN_PATH}/spec_HHQS.csv',index=False)
        else:
            umapT_spec=joblib.load(f'{PRETRAIN_PATH}/umap_spec_b{base}.sav')
            # umapT=joblib.load(f'{PRETRAIN_PATH}/umap_b{base}.sav')
        print('=====================UMAP PROJECTING PHOTO ====================')
        photo_mapped=get_mapping_pd(photoHH_pd,umapT_spec, list(range(ftr_len)))
        print('=====================SAVING SMAPPED PHOTO ====================')
        photo_mapped.to_csv(f'{PRETRAIN_PATH}/photoUTe{EXACT_COUNTING}.csv',index=False)
    else:
        if not PREPRO_HH_SPEC :  df_lbl=pd.read_csv(f'{PRETRAIN_PATH}/spec_lbl_encode.csv')
        print('=============MAPPING SPEC============')
        if PREPRO_SPEC_UT:
            if PREPRO_PHOTO_UMAP:
                photoHH_pdh=photoHH_pd[:topk]
                print(photoHH_pdh)
                try: umapT_photo=get_umap_pd(photoHH_pdh,list(range(ftr_len)), umap_comp)
                except: umapT_photo=get_umap_pd(photoHH_pdh,ftr_str, umap_comp)
                joblib.dump(umapT_photo, f'{PRETRAIN_PATH}/umap_photo_b{base}.sav')
                photoHH_pdh.to_csv(f'{PRETRAIN_PATH}/photoHH_pdh.csv', index=False)
            else:
                umapT_photo=joblib.load(f'pretrain/umap_photo_b{base}.sav')
            if not PREPRO_NORM_PARAMS:
                dfspec=pd.read_csv(f'{PRETRAIN_PATH}/spec_norm.csv')
            dfspec=(dfspec*(base-1)).round()
            spec_pm=get_mapping_pd(dfspec,umapT_photo,dfspec.keys())
            spec_pm.to_csv(f'{PRETRAIN_PATH}/spec_pm_e{EXACT_COUNTING}.csv',index=False)
        else:
            spec_pm=pd.read_csv(f'{PRETRAIN_PATH}/spec_pm_e{EXACT_COUNTING}.csv')
        specUT_lbled= pd.concat([spec_pm,df_lbl],axis=1)
        specUT_lbled.to_csv(f'{PRETRAIN_PATH}/spec_pm_e{EXACT_COUNTING}_lbl.csv',index=False)
예제 #4
0
def main():
    try:
        os.mkdir(PRETRAIN)
    except:
        print('here we go!')
    if PRE_NORM:
        dfphoto, dfspec, df_lbl = prepro_photo_spec(PHOTO_DATA,
                                                    SPEC_DATA,
                                                    base,
                                                    ftr,
                                                    wpath=PRETRAIN)
    if PRE_HH:
        print('=====================ENCODE PHOTO ====================')
        photo_stream = get_encode_stream(dfphoto, base, dtype)
        spec_stream = get_encode_stream(dfspec, base, dtype)
        # np.savetxt(f'{PRETRAIN}/photo_stream.txt',photo_stream)
        # np.savetxt(f'{PRETRAIN}/spec_stream.txt',spec_stream)
        df_lbl['encode'] = spec_stream
        df_lbl.to_csv(f'{PRETRAIN}/spec_lbl_encode.csv', index=False)
        photo_HH = get_HH_pd(photo_stream,
                             base,
                             ftr_len,
                             dtype,
                             EXACT,
                             topk,
                             r=16,
                             d=1000000,
                             c=None,
                             device=None)
        if not EXACT:
            assert len(photo_HH) <= topk
        else:
            photo_HH = photo_HH[:topk]
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
        spec_HH = get_HH_pd(spec_stream, base, ftr_len, dtype, True, topk)
        spec_HH.to_csv(f'{PRETRAIN}/spec_HH.csv', index=False)
    elif PRE_UMAP or MAP_SPEC:
        photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv')
        spec_HH = pd.read_csv(f'{PRETRAIN}/spec_HH.csv')
        df_lbl = pd.read_csv(f'{PRETRAIN}/spec_lbl_encode.csv')
    print('photo_HH', photo_HH)
    print('spec_HH', spec_HH)

    if PRE_UMAP:
        print('=============GETTING UMAP============')
        try:
            photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp)
        except:
            photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp)
        joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT_b{base}.sav')
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
    elif MAP_SPEC:
        photo_uT = joblib.load(f'pretrain/photo_uT_b{base}.sav')

    if MAP_SPEC:
        if not PRE_NORM:
            dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv')
        dfspec_block = (dfspec * (base - 1)).round()
        assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <=
                                                  base - 1)
        spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys())
        spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False)
    else:
        spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv')

    spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1)
    spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False)

    if UPLOAD_SCI:
        username = '******'
        password = '******'
        # password = getpass.getpass()
        sciserver_token = Authentication.login(username, password)
        CasJobs.uploadPandasDataFrameToTable(
            dataFrame=photo_HH,
            tableName=f'{name}b{base}e{EXACT}std',
            context="MyDB")
예제 #5
0
def main():
    try:
        os.mkdir(PRETRAIN)
    except:
        print('here we go!')
    if PRE_SPEC:
        dfspec, vmean, vstd, df_lbl = prepro_std_specs(SPEC_DATA,
                                                       ftr=ftr,
                                                       sig=3.0,
                                                       w=True,
                                                       wpath=PRETRAIN)
    elif PRE_PHOTO_HH:
        vmean = np.loadtxt(f'{PRETRAIN_PATH}/vmean.txt')
        vstd = np.loadtxt(f'{PRETRAIN_PATH}/vstd.txt')

    if PRE_PHOTO_HH:
        print('=====================PREPRO PHOTO====================')
        dfphoto = prepro_std_photos(PHOTO_DATA, vmean, vstd, ftr=ftr, sig=3.0)
        photo_stream = get_encode_stream(dfphoto, base, dtype)
        photo_HH = get_HH_pd(photo_stream,
                             base,
                             ftr_len,
                             dtype,
                             EXACT,
                             topk,
                             r=16,
                             d=1000000,
                             c=None,
                             device=None)
        if not EXACT:
            assert len(photo_HH) <= topk
        else:
            photo_HH = photo_HH[:topk]
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
    elif PRE_UMAP:
        photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv',
                               columns=list(range(ftr_len)))

    if PRE_UMAP:
        print('=============GETTING UMAP============')
        try:
            photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp)
        except:
            photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp)
        joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT.sav')
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
    elif MAP_SPEC:
        photo_uT = joblib.load(f'pretrain/photo_uT.sav')

    if MAP_SPEC:
        if not PRE_SPEC:
            dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv')
        dfspec_block = (dfspec * (base - 1)).round()
        assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <=
                                                  base - 1)
        spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys())
        spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False)
    else:
        spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv')

    spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1)
    spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False)

    if UPLOAD_SCI:
        username = '******'
        password = '******'
        # password = getpass.getpass()
        sciserver_token = Authentication.login(username, password)
        CasJobs.uploadPandasDataFrameToTable(
            dataFrame=photo_HH,
            tableName=f'{name}b{base}e{EXACT}std',
            context="MyDB")