Пример #1
0
    def test_CasJobs_uploadPandasDataFrameToTable_uploadCSVDataToTable(self):
        try:

            df = pandas.read_csv(StringIO(CasJobs_TestTableCSV),
                                 index_col=None)

            result = CasJobs.uploadPandasDataFrameToTable(
                dataFrame=df, tableName=CasJobs_TestTableName2, context="MyDB")
            table = CasJobs.executeQuery(sql="select * from " +
                                         CasJobs_TestTableName2,
                                         context="MyDB",
                                         format="pandas")
            result2 = CasJobs.executeQuery(sql="DROP TABLE " +
                                           CasJobs_TestTableName2,
                                           context="MyDB",
                                           format="csv")
            self.assertEqual(result, True)
            self.assertItemsEqual(table, df)

            result = CasJobs.uploadCSVDataToTable(
                csvData=CasJobs_TestTableCSV,
                tableName=CasJobs_TestTableName2,
                context="MyDB")
            df2 = CasJobs.executeQuery(sql="select * from " +
                                       CasJobs_TestTableName2,
                                       context="MyDB",
                                       format="pandas")
            result2 = CasJobs.executeQuery(sql="DROP TABLE " +
                                           CasJobs_TestTableName2,
                                           context="MyDB",
                                           format="csv")
            self.assertEqual(result, True)
            self.assertItemsEqual(df, df2)

        finally:
            try:
                csv = CasJobs.executeQuery(sql="DROP TABLE " +
                                           CasJobs_TestTableName2,
                                           context="MyDB",
                                           format="csv")
            except:
                pass
def testUploadDataFrame(df,token, tablename):
    response= CasJobs.uploadPandasDataFrameToTable(df,tablename,token=token)

# In[ ]:

# get numpy array containing the results of a query

array = CasJobs.getNumpyArrayFromQuery(queryString=CasJobs_TestQuery, context=CasJobs_TestDatabase)
print(array)


# In[ ]:

#uploads a Pandas dataframe into a Database table

df = pandas.read_csv(StringIO(CasJobs_TestTableCSV), index_col=None)
result = CasJobs.uploadPandasDataFrameToTable(dataFrame=df, tableName=CasJobs_TestTableName2, context="MyDB")
table = CasJobs.executeQuery(sql="select * from " + CasJobs_TestTableName2, context="MyDB", format="pandas")
print(result)
print(table)


# In[ ]:

# drop or delete table just created:

result2 = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context=CasJobs_TestDatabase, format="pandas")
print(result2)


# In[ ]:
Пример #4
0
def main():
    try:
        os.mkdir(PRETRAIN)
    except:
        'lets GO'
    print(PREPRO_CUTOFF, PREPRO_NORM, PREPRO_STREAM, PREPRO_HH, PREPRO_UMAP,
          PREPRO_KMEAN, SAVE_ALL, PREDICT_ALL, UPLOAD_SCI)
    if PREPRO_NORM:
        print(
            f'=================LOADING N={num} Smoothing {ISSMTH} ================='
        )
        data1Ds, pc = process_dataset_pc(data_dir, num, pca_comp, ISSMTH, SMTH,
                                         TEST)
        intensity, pca_results = process_pca(data1Ds, pc, num)
        # df_pca=pd.DataFrame(pca_results, columns=list(range(pca_comp)))
        df_norm, mask, ftr_len0 = process_intensity(pca_results,
                                                    intensity,
                                                    pca_comp,
                                                    PREPRO_CUTOFF,
                                                    ONPCA,
                                                    ONINT,
                                                    r=0.01,
                                                    wdir=PRETRAIN)
        assert ftr_len0 == ftr_len
        mask2d = mask.reshape((num, 1004 * 1344))
        if SAVE_ALL:
            np.savetxt(f'{PRETRAIN}/mask_all.txt', mask)
        else:
            mask0 = mask2d[pidx]
            idxii = int(mask2d[:pidx].sum())
            idxjj = int(mask2d[:(pidx + 1)].sum())
            assert idxjj - idxii == mask0.sum()
            print(mask0.shape, mask.sum(), 'saving mask')
            np.savetxt(f'{PRETRAIN}/mask{pidx}.txt', mask0)
        # df_norm.to_csv(f'{PRETRAIN}/df_norm.csv',index=False)
        # df_normt=df_norm[idxii:idxjj]


#         df_normt.to_csv(f'{PRETRAIN}/df_norm{pidx}.csv',index=False)
# elif PREPRO_STREAM:
#     print(f'=================LOADING df_norm =================')
#     df_norm=pd.read_csv(f'{PRETRAIN}/df_norm.csv')

    if PREPRO_STREAM:
        print(f'=================ENCODING Base={base} =================')
        stream = process_rebin(df_norm, base, dtype)
        if SAVE_ALL:
            np.savetxt(f'{PRETRAIN}/stream_b{base}.txt', stream)
        else:
            stream0 = stream[idxii:idxjj]
            np.savetxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt', stream0)
    elif PREPRO_HH:
        print(f'=================LOADING STREAM =================')
        stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt')
        if not PREDICT_ALL:
            stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt')
    if PREPRO_HH:
        assert EXACT == 0
        topk = 20000
        print(f'=================DECODE {ftr_len} DIM =================')
        HH_pd = get_HH_pd(stream,
                          base,
                          ftr_len,
                          dtype,
                          EXACT,
                          topk,
                          r=16,
                          d=1000000,
                          c=None,
                          device=None)
        HH_pd.to_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv', index=False)
    elif PREPRO_UMAP:
        print(f'=================LOADING HH_pd==============')
        HH_pd = pd.read_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv')
        print(HH_pd.head())

    if PREPRO_UMAP:
        print(f'=================GETTING UMAP =================')
        # # lb,ub=int(HH_pd['freq'][0]*lbr),int(HH_pd['freq'][0])
        # HH_pdc=HH_pd[HH_pd['freq']>lb]
        # # print(len(HH_pdc),len(HH_pd),HH_pd['freq'][0],'lb',lb,'HHratio',lbr)
        # if len(HH_pdc)>20000:
        HH_pdc = HH_pd[:20000]
        print(len(HH_pdc), len(HH_pd), HH_pd['freq'][0])
        print(f'=================LOADING HH_pd==============')
        umapT = get_umap_pd(HH_pdc, list(range(ftr_len)))
        # print(HH_pdc.keys())
        HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False)
    elif PREPRO_KMEAN:
        HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv')

    if PREPRO_KMEAN:
        print(f'=================KMEAN CLUSTERING =================')
        kmap = get_kmean_lbl(HH_pdc, N_cluster, u1='u1', u2='u2')
        joblib.dump(kmap, f'{PRETRAIN}/kmap_k{N_cluster}e{EXACT}.sav')
        HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False)
    else:
        HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv')

    if PREDICT_ALL:
        print(f'=================PREDICTING ALL {num} LABEL==============')
        if not PREPRO_NORM: mask = np.loadtxt(f'{PRETRAIN}/mask_all.txt')
        if not PREPRO_HH: stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt')
        pred_k = get_pred_stream(stream,
                                 mask,
                                 HH_pdc,
                                 f'k{N_cluster}',
                                 val='HH',
                                 bg=0,
                                 color=0,
                                 sgn=1)
        pred_k = pred_k.reshape((num, 1004, 1344))
        print(
            f'=================SAVING PREDICTION of ALL {num} LABEL=============='
        )
        np.savetxt(f'{PRETRAIN}/pred_k{N_cluster}e{EXACT}.txt', pred_k)
    else:
        print(f'=================PREDICTING id{pidx} LABEL==============')
        if not PREPRO_NORM: mask0 = np.loadtxt(f'{PRETRAIN}/mask{pidx}.txt')
        if not PREPRO_HH:
            stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt')
        pred_k = get_pred_stream(stream0,
                                 mask0,
                                 HH_pdc,
                                 f'k{N_cluster}',
                                 val='HH',
                                 bg=0,
                                 color=0,
                                 sgn=1)
        pred_k = pred_k.reshape((1004, 1344))
        print(
            f'=================SAVING PREDICTION of id{pidx} LABEL=============='
        )
        np.savetxt(
            f'{PRETRAIN}/pred_k{N_cluster}{pidx}_f{name}b{base}sm1c3sige{EXACT}.txt',
            pred_k)

    if UPLOAD_SCI:
        username = '******'
        password = '******'
        # password = getpass.getpass()
        sciserver_token = Authentication.login(username, password)
        CasJobs.uploadPandasDataFrameToTable(
            dataFrame=HH_pdc,
            tableName=f'b{base}sm{SMTH}f{name}sig3e{EXACT}_v1',
            context="MyDB")
Пример #5
0
def main():
    try:
        os.mkdir(PRETRAIN)
    except:
        print('here we go!')
    if PRE_NORM:
        dfphoto, dfspec, df_lbl = prepro_photo_spec(PHOTO_DATA,
                                                    SPEC_DATA,
                                                    base,
                                                    ftr,
                                                    wpath=PRETRAIN)
    if PRE_HH:
        print('=====================ENCODE PHOTO ====================')
        photo_stream = get_encode_stream(dfphoto, base, dtype)
        spec_stream = get_encode_stream(dfspec, base, dtype)
        # np.savetxt(f'{PRETRAIN}/photo_stream.txt',photo_stream)
        # np.savetxt(f'{PRETRAIN}/spec_stream.txt',spec_stream)
        df_lbl['encode'] = spec_stream
        df_lbl.to_csv(f'{PRETRAIN}/spec_lbl_encode.csv', index=False)
        photo_HH = get_HH_pd(photo_stream,
                             base,
                             ftr_len,
                             dtype,
                             EXACT,
                             topk,
                             r=16,
                             d=1000000,
                             c=None,
                             device=None)
        if not EXACT:
            assert len(photo_HH) <= topk
        else:
            photo_HH = photo_HH[:topk]
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
        spec_HH = get_HH_pd(spec_stream, base, ftr_len, dtype, True, topk)
        spec_HH.to_csv(f'{PRETRAIN}/spec_HH.csv', index=False)
    elif PRE_UMAP or MAP_SPEC:
        photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv')
        spec_HH = pd.read_csv(f'{PRETRAIN}/spec_HH.csv')
        df_lbl = pd.read_csv(f'{PRETRAIN}/spec_lbl_encode.csv')
    print('photo_HH', photo_HH)
    print('spec_HH', spec_HH)

    if PRE_UMAP:
        print('=============GETTING UMAP============')
        try:
            photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp)
        except:
            photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp)
        joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT_b{base}.sav')
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
    elif MAP_SPEC:
        photo_uT = joblib.load(f'pretrain/photo_uT_b{base}.sav')

    if MAP_SPEC:
        if not PRE_NORM:
            dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv')
        dfspec_block = (dfspec * (base - 1)).round()
        assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <=
                                                  base - 1)
        spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys())
        spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False)
    else:
        spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv')

    spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1)
    spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False)

    if UPLOAD_SCI:
        username = '******'
        password = '******'
        # password = getpass.getpass()
        sciserver_token = Authentication.login(username, password)
        CasJobs.uploadPandasDataFrameToTable(
            dataFrame=photo_HH,
            tableName=f'{name}b{base}e{EXACT}std',
            context="MyDB")
Пример #6
0
 def upload_df(self, df, tablename='mytable'):
     result = CasJobs.uploadPandasDataFrameToTable(dataFrame=df,
                                                   tableName=tablename,
                                                   context="MyDB")
     if result:
         print("Table [{}] uploaded successfully.".format(tablename))
Пример #7
0
def main():
    try:
        os.mkdir(PRETRAIN)
    except:
        print('here we go!')
    if PRE_SPEC:
        dfspec, vmean, vstd, df_lbl = prepro_std_specs(SPEC_DATA,
                                                       ftr=ftr,
                                                       sig=3.0,
                                                       w=True,
                                                       wpath=PRETRAIN)
    elif PRE_PHOTO_HH:
        vmean = np.loadtxt(f'{PRETRAIN_PATH}/vmean.txt')
        vstd = np.loadtxt(f'{PRETRAIN_PATH}/vstd.txt')

    if PRE_PHOTO_HH:
        print('=====================PREPRO PHOTO====================')
        dfphoto = prepro_std_photos(PHOTO_DATA, vmean, vstd, ftr=ftr, sig=3.0)
        photo_stream = get_encode_stream(dfphoto, base, dtype)
        photo_HH = get_HH_pd(photo_stream,
                             base,
                             ftr_len,
                             dtype,
                             EXACT,
                             topk,
                             r=16,
                             d=1000000,
                             c=None,
                             device=None)
        if not EXACT:
            assert len(photo_HH) <= topk
        else:
            photo_HH = photo_HH[:topk]
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
    elif PRE_UMAP:
        photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv',
                               columns=list(range(ftr_len)))

    if PRE_UMAP:
        print('=============GETTING UMAP============')
        try:
            photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp)
        except:
            photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp)
        joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT.sav')
        photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False)
    elif MAP_SPEC:
        photo_uT = joblib.load(f'pretrain/photo_uT.sav')

    if MAP_SPEC:
        if not PRE_SPEC:
            dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv')
        dfspec_block = (dfspec * (base - 1)).round()
        assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <=
                                                  base - 1)
        spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys())
        spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False)
    else:
        spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv')

    spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1)
    spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False)

    if UPLOAD_SCI:
        username = '******'
        password = '******'
        # password = getpass.getpass()
        sciserver_token = Authentication.login(username, password)
        CasJobs.uploadPandasDataFrameToTable(
            dataFrame=photo_HH,
            tableName=f'{name}b{base}e{EXACT}std',
            context="MyDB")