def test_CasJobs_uploadPandasDataFrameToTable_uploadCSVDataToTable(self): try: df = pandas.read_csv(StringIO(CasJobs_TestTableCSV), index_col=None) result = CasJobs.uploadPandasDataFrameToTable( dataFrame=df, tableName=CasJobs_TestTableName2, context="MyDB") table = CasJobs.executeQuery(sql="select * from " + CasJobs_TestTableName2, context="MyDB", format="pandas") result2 = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") self.assertEqual(result, True) self.assertItemsEqual(table, df) result = CasJobs.uploadCSVDataToTable( csvData=CasJobs_TestTableCSV, tableName=CasJobs_TestTableName2, context="MyDB") df2 = CasJobs.executeQuery(sql="select * from " + CasJobs_TestTableName2, context="MyDB", format="pandas") result2 = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") self.assertEqual(result, True) self.assertItemsEqual(df, df2) finally: try: csv = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") except: pass
def testUploadDataFrame(df,token, tablename): response= CasJobs.uploadPandasDataFrameToTable(df,tablename,token=token)
# In[ ]: # get numpy array containing the results of a query array = CasJobs.getNumpyArrayFromQuery(queryString=CasJobs_TestQuery, context=CasJobs_TestDatabase) print(array) # In[ ]: #uploads a Pandas dataframe into a Database table df = pandas.read_csv(StringIO(CasJobs_TestTableCSV), index_col=None) result = CasJobs.uploadPandasDataFrameToTable(dataFrame=df, tableName=CasJobs_TestTableName2, context="MyDB") table = CasJobs.executeQuery(sql="select * from " + CasJobs_TestTableName2, context="MyDB", format="pandas") print(result) print(table) # In[ ]: # drop or delete table just created: result2 = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context=CasJobs_TestDatabase, format="pandas") print(result2) # In[ ]:
def main(): try: os.mkdir(PRETRAIN) except: 'lets GO' print(PREPRO_CUTOFF, PREPRO_NORM, PREPRO_STREAM, PREPRO_HH, PREPRO_UMAP, PREPRO_KMEAN, SAVE_ALL, PREDICT_ALL, UPLOAD_SCI) if PREPRO_NORM: print( f'=================LOADING N={num} Smoothing {ISSMTH} =================' ) data1Ds, pc = process_dataset_pc(data_dir, num, pca_comp, ISSMTH, SMTH, TEST) intensity, pca_results = process_pca(data1Ds, pc, num) # df_pca=pd.DataFrame(pca_results, columns=list(range(pca_comp))) df_norm, mask, ftr_len0 = process_intensity(pca_results, intensity, pca_comp, PREPRO_CUTOFF, ONPCA, ONINT, r=0.01, wdir=PRETRAIN) assert ftr_len0 == ftr_len mask2d = mask.reshape((num, 1004 * 1344)) if SAVE_ALL: np.savetxt(f'{PRETRAIN}/mask_all.txt', mask) else: mask0 = mask2d[pidx] idxii = int(mask2d[:pidx].sum()) idxjj = int(mask2d[:(pidx + 1)].sum()) assert idxjj - idxii == mask0.sum() print(mask0.shape, mask.sum(), 'saving mask') np.savetxt(f'{PRETRAIN}/mask{pidx}.txt', mask0) # df_norm.to_csv(f'{PRETRAIN}/df_norm.csv',index=False) # df_normt=df_norm[idxii:idxjj] # df_normt.to_csv(f'{PRETRAIN}/df_norm{pidx}.csv',index=False) # elif PREPRO_STREAM: # print(f'=================LOADING df_norm =================') # df_norm=pd.read_csv(f'{PRETRAIN}/df_norm.csv') if PREPRO_STREAM: print(f'=================ENCODING Base={base} =================') stream = process_rebin(df_norm, base, dtype) if SAVE_ALL: np.savetxt(f'{PRETRAIN}/stream_b{base}.txt', stream) else: stream0 = stream[idxii:idxjj] np.savetxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt', stream0) elif PREPRO_HH: print(f'=================LOADING STREAM =================') stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt') if not PREDICT_ALL: stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt') if PREPRO_HH: assert EXACT == 0 topk = 20000 print(f'=================DECODE {ftr_len} DIM =================') HH_pd = get_HH_pd(stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) HH_pd.to_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv', index=False) elif PREPRO_UMAP: print(f'=================LOADING HH_pd==============') HH_pd = pd.read_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv') print(HH_pd.head()) if PREPRO_UMAP: print(f'=================GETTING UMAP =================') # # lb,ub=int(HH_pd['freq'][0]*lbr),int(HH_pd['freq'][0]) # HH_pdc=HH_pd[HH_pd['freq']>lb] # # print(len(HH_pdc),len(HH_pd),HH_pd['freq'][0],'lb',lb,'HHratio',lbr) # if len(HH_pdc)>20000: HH_pdc = HH_pd[:20000] print(len(HH_pdc), len(HH_pd), HH_pd['freq'][0]) print(f'=================LOADING HH_pd==============') umapT = get_umap_pd(HH_pdc, list(range(ftr_len))) # print(HH_pdc.keys()) HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False) elif PREPRO_KMEAN: HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv') if PREPRO_KMEAN: print(f'=================KMEAN CLUSTERING =================') kmap = get_kmean_lbl(HH_pdc, N_cluster, u1='u1', u2='u2') joblib.dump(kmap, f'{PRETRAIN}/kmap_k{N_cluster}e{EXACT}.sav') HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False) else: HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv') if PREDICT_ALL: print(f'=================PREDICTING ALL {num} LABEL==============') if not PREPRO_NORM: mask = np.loadtxt(f'{PRETRAIN}/mask_all.txt') if not PREPRO_HH: stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt') pred_k = get_pred_stream(stream, mask, HH_pdc, f'k{N_cluster}', val='HH', bg=0, color=0, sgn=1) pred_k = pred_k.reshape((num, 1004, 1344)) print( f'=================SAVING PREDICTION of ALL {num} LABEL==============' ) np.savetxt(f'{PRETRAIN}/pred_k{N_cluster}e{EXACT}.txt', pred_k) else: print(f'=================PREDICTING id{pidx} LABEL==============') if not PREPRO_NORM: mask0 = np.loadtxt(f'{PRETRAIN}/mask{pidx}.txt') if not PREPRO_HH: stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt') pred_k = get_pred_stream(stream0, mask0, HH_pdc, f'k{N_cluster}', val='HH', bg=0, color=0, sgn=1) pred_k = pred_k.reshape((1004, 1344)) print( f'=================SAVING PREDICTION of id{pidx} LABEL==============' ) np.savetxt( f'{PRETRAIN}/pred_k{N_cluster}{pidx}_f{name}b{base}sm1c3sige{EXACT}.txt', pred_k) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=HH_pdc, tableName=f'b{base}sm{SMTH}f{name}sig3e{EXACT}_v1', context="MyDB")
def main(): try: os.mkdir(PRETRAIN) except: print('here we go!') if PRE_NORM: dfphoto, dfspec, df_lbl = prepro_photo_spec(PHOTO_DATA, SPEC_DATA, base, ftr, wpath=PRETRAIN) if PRE_HH: print('=====================ENCODE PHOTO ====================') photo_stream = get_encode_stream(dfphoto, base, dtype) spec_stream = get_encode_stream(dfspec, base, dtype) # np.savetxt(f'{PRETRAIN}/photo_stream.txt',photo_stream) # np.savetxt(f'{PRETRAIN}/spec_stream.txt',spec_stream) df_lbl['encode'] = spec_stream df_lbl.to_csv(f'{PRETRAIN}/spec_lbl_encode.csv', index=False) photo_HH = get_HH_pd(photo_stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) if not EXACT: assert len(photo_HH) <= topk else: photo_HH = photo_HH[:topk] photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) spec_HH = get_HH_pd(spec_stream, base, ftr_len, dtype, True, topk) spec_HH.to_csv(f'{PRETRAIN}/spec_HH.csv', index=False) elif PRE_UMAP or MAP_SPEC: photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv') spec_HH = pd.read_csv(f'{PRETRAIN}/spec_HH.csv') df_lbl = pd.read_csv(f'{PRETRAIN}/spec_lbl_encode.csv') print('photo_HH', photo_HH) print('spec_HH', spec_HH) if PRE_UMAP: print('=============GETTING UMAP============') try: photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp) except: photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp) joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT_b{base}.sav') photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif MAP_SPEC: photo_uT = joblib.load(f'pretrain/photo_uT_b{base}.sav') if MAP_SPEC: if not PRE_NORM: dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv') dfspec_block = (dfspec * (base - 1)).round() assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <= base - 1) spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys()) spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False) else: spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv') spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1) spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=photo_HH, tableName=f'{name}b{base}e{EXACT}std', context="MyDB")
def upload_df(self, df, tablename='mytable'): result = CasJobs.uploadPandasDataFrameToTable(dataFrame=df, tableName=tablename, context="MyDB") if result: print("Table [{}] uploaded successfully.".format(tablename))
def main(): try: os.mkdir(PRETRAIN) except: print('here we go!') if PRE_SPEC: dfspec, vmean, vstd, df_lbl = prepro_std_specs(SPEC_DATA, ftr=ftr, sig=3.0, w=True, wpath=PRETRAIN) elif PRE_PHOTO_HH: vmean = np.loadtxt(f'{PRETRAIN_PATH}/vmean.txt') vstd = np.loadtxt(f'{PRETRAIN_PATH}/vstd.txt') if PRE_PHOTO_HH: print('=====================PREPRO PHOTO====================') dfphoto = prepro_std_photos(PHOTO_DATA, vmean, vstd, ftr=ftr, sig=3.0) photo_stream = get_encode_stream(dfphoto, base, dtype) photo_HH = get_HH_pd(photo_stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) if not EXACT: assert len(photo_HH) <= topk else: photo_HH = photo_HH[:topk] photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif PRE_UMAP: photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv', columns=list(range(ftr_len))) if PRE_UMAP: print('=============GETTING UMAP============') try: photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp) except: photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp) joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT.sav') photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif MAP_SPEC: photo_uT = joblib.load(f'pretrain/photo_uT.sav') if MAP_SPEC: if not PRE_SPEC: dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv') dfspec_block = (dfspec * (base - 1)).round() assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <= base - 1) spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys()) spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False) else: spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv') spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1) spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=photo_HH, tableName=f'{name}b{base}e{EXACT}std', context="MyDB")