def retrieve_sdss_photometry(ra: float, dec: float): """ Retrieve SDSS photometry for a given field, in a 0.2 x 0.2 degree box centred on the passed coordinates coordinates. (Note - the width of the box is in RA degrees, not corrected for spherical distortion) :param ra: Right Ascension of the centre of the desired field, in degrees. :param dec: Declination of the centre of the desired field, in degrees. :return: Retrieved photometry table, as a pandas dataframe, if successful; if not, None. """ try: from SciServer import Authentication, CasJobs except ImportError: print("It seems that SciScript/SciServer is not installed, or not accessible to this environment. " "\nIf you wish to automatically download SDSS data, please install " "\nSciScript (https://github.com/sciserver/SciScript-Python); " "\notherwise, retrieve the data manually from " "\nhttp://skyserver.sdss.org/dr16/en/tools/search/sql.aspx") return None print(f"Querying SDSS DR16 archive for field centring on RA={ra}, DEC={dec}") user = keys['sciserver_user'] password = keys["sciserver_pwd"] Authentication.login(UserName=user, Password=password) # Construct an SQL query to send to SciServer query = "SELECT objid,ra,dec" for f in sdss_filters: query += f",psfMag_{f},psfMagErr_{f},fiberMag_{f},fiberMagErr_{f},fiber2Mag_{f},fiber2MagErr_{f},petroMag_{f},petroMagErr_{f} " query += "FROM PhotoObj " query += f"WHERE ra BETWEEN {ra - 0.1} AND {ra + 0.1} " query += f"AND dec BETWEEN {dec - 0.1} AND {dec + 0.1} " print(f"Retrieving photometry from SDSS DR16 via SciServer for field at {ra}, {dec}...") df = CasJobs.executeQuery(sql=query, context='DR16') if len(df.index) == 0: df = None return df
def login(username, password): token1 = Authentication.login(username, password) user = Authentication.getKeystoneUserWithToken(token1) print("userName="******"id=" + user.id) iden = Authentication.identArgIdentifier() print("ident=" + iden)
def test_Authentication_allMethods(self): newToken1 = "myToken1" newToken2 = "myToken2" token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) token2 = Authentication.getToken() token3 = Authentication.getKeystoneToken() token4 = Authentication.token.value user = Authentication.getKeystoneUserWithToken(token1) iden = Authentication.identArgIdentifier() self.assertEqual(iden, "--ident=") self.assertNotEqual(token1, "") self.assertIsNot(token1, None) self.assertEqual(token1, token2) self.assertEqual(token1, token3) self.assertEqual(token1, token4) self.assertEqual(user.userName, Authentication_loginName) self.assertIsNot(user.id, None) self.assertNotEqual(user.id, "") Authentication.setToken(newToken1) self.assertEqual(newToken1, Authentication.getToken()) Authentication.setKeystoneToken(newToken2) self.assertEqual(newToken2, Authentication.getKeystoneToken())
def __setupserver(self): print("Setting up SkyServer...") Authentication_loginName = self.username Authentication_loginPassword = self.password token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) token2 = Authentication.getToken() token3 = Authentication.getKeystoneToken() token4 = Authentication.token.value user = Authentication.getKeystoneUserWithToken(token1) iden = Authentication.identArgIdentifier()
def _authenticate(username, password): """Authenticates with SciServer Authenticates user with SciServer using the provided username and password. Parameters ---------- username : str SciServer username. password : str SciServer password. Returns ------- token : str Authentication token from SciServer """ return Authentication.login(username, password)
def do(self,user='******',password='******',search=1,path_to_model='YSE_App/data_ingest/YSE_DNN_photoZ_model_315.hdf5'): """ Predicts photometric redshifts from RA and DEC points in SDSS An outline of the algorithem is: first pull from SDSS u,g,r,i,z magnitudes from SDSS; should be able to handle a list/array of RA and DEC place u,g,r,i,z into a vector, append the derived information into the data array predict the information from the model return the predictions in the same order to the user inputs: Ra: list or array of len N, right ascensions of target galaxies in decimal degrees Dec: list or array of len N, declination of target galaxies in decimal degrees search: float, arcmin tolerance to search for the object in SDSS Catalogue path_to_model: str, filepath to saved model for prediction Returns: predictions: array of len N, photometric redshift of input galaxy """ try: nowdate = datetime.datetime.utcnow() - datetime.timedelta(1) from django.db.models import Q #HAS To Remain Here, I dunno why print('Entered the photo_z cron') #save time b/c the other cron jobs print a time for completion transients = (Transient.objects.filter(Q(host__photo_z__isnull=True) & Q(host__isnull=False))) #print('Number of test transients:', len(transients)) RA=[] #Needs to be list b/c don't know how many hosts are None DEC=[] outer_mask = [] #create an integer index mask that we will place values into because some hosts dont have a RA and DEC assigned transients_withhost = [] for i,transient_obj in enumerate(transients): if transient_obj.host != None: RA.append(transient_obj.host.ra) DEC.append(transient_obj.host.dec) outer_mask.append(i) #provides integer index mask outer_mask = np.array(outer_mask) #make that an array N_outer = len(transients) #gives size of returned array Ra = np.array(RA) Dec = np.array(DEC) N = len(Ra)#gives size of query array Q = N//1000#decompose the length of transients needing classification if N%1000 != 0: Q=Q+1 #catch remainder and start a new batch total_job = [] #store all pandas job dataframes for j in range(Q): #iterate over batches if j == (Q-1): Ra_batch = Ra[j*1000:((j+1)*1000 + N%1000)] #grab batch remainder Dec_batch = Dec[j*1000:((j+1)*1000 + N%1000)] else: Ra_batch = Ra[j*1000:(j+1)*1000] #other wise grab batch of 1000 Dec_batch = Dec[j*1000:(j+1)*1000] hold=[] #a list for holding the strings that I want to place into an sql query for val in range(len(Ra_batch)): string = '({},{},{}),|'.format(str(val),str(Ra[val]),str(Dec[val])) hold.append(string) #Now construct the full query sql = "CREATE TABLE #UPLOAD(|id INT PRIMARY KEY,|up_ra FLOAT,|up_dec FLOAT|)|INSERT INTO #UPLOAD| VALUES|" for data in hold: sql = sql + data #there is a comma that needs to be deleted from the last entry for syntax to work sql = sql[0:(len(sql)-2)] + '|' #append the rest to it sql = sql + "SELECT|p.u,p.g,p.r,p.i,p.z,p.extinction_u,p.extinction_g,p.extinction_r,p.extinction_i,p.extinction_z,p.petroRad_u,p.petroRad_g,p.petroRad_r,p.petroRad_i,p.petroRad_z,p.petroR50_r,p.petroR90_r,zi.e_bv_sfd|FROM #UPLOAD as U|OUTER APPLY dbo.fGetNearestObjEq((U.up_ra),(U.up_dec),{}) as N|LEFT JOIN PhotoObjAll AS p ON N.objid=p.objID|JOIN SpecObjAll za on (p.objID = za.bestObjID)|JOIN galSpecInfo zi ON (zi.SpecObjID = za.specObjID)".format(str(search)) #change all | to new line: when we change to Unix system will need to change this new line sql = sql.replace('|','\n') #login, change to some other credentials later Authentication.login('awe2','StandardPassword') job = CasJobs.executeQuery(sql,'DR12','pandas') #this line sends and retrieves the result print('Query {} of {} complete'.format(j+1,Q)) job['dered_u'] = job['u'].values - job['extinction_u'].values job['dered_g'] = job['g'].values - job['extinction_g'].values job['dered_r'] = job['r'].values - job['extinction_r'].values job['dered_i'] = job['i'].values - job['extinction_i'].values job['dered_z'] = job['z'].values - job['extinction_z'].values job['u-g']= job['dered_u'].values - job['dered_g'].values job['g-r']= job['dered_g'].values - job['dered_r'].values job['r-i']= job['dered_r'].values - job['dered_i'].values job['i-z']= job['dered_i'].values - job['dered_z'].values job['u_over_z']= job['dered_u'].values / job['dered_z'].values job['C'] = job['petroR90_r'].values/job['petroR50_r'].values total_job.append(job) print('left the query loop') query_result = pd.concat(total_job) #now feed to a RF model for prediction X = query_result[['dered_u','dered_g','dered_r','dered_i','dered_z','u-g','g-r','r-i','i-z','u_over_z','petroRad_u','petroRad_g','petroRad_r','petroRad_i','petroRad_z','petroR50_r','petroR90_r','C','e_bv_sfd']].values print(X.shape) #define and load in the model def create_model(learning_rate): model = keras.Sequential([]) model.add(keras.layers.Dense(input_shape=(19,),units=19,activation=keras.activations.linear)) #tried relu #model.add(keras.layers.Dropout(rate=0.1)) model.add(keras.layers.Dense(units=19,activation=tf.nn.relu)) #model.add(keras.layers.Dropout(rate=0.1)) model.add(keras.layers.Dense(units=19,activation=tf.nn.relu)) #model.add(keras.layers.Dropout(rate=0.1)) model.add(keras.layers.Dense(units=19,activation=tf.nn.relu)) #tf.nn.relu #model.add(keras.layers.Dropout(rate=0.1)) model.add(keras.layers.Dense(units=315,activation=keras.activations.softmax)) #RMS = keras.optimizers.RMSprop(learning_rate=learning_rate) adam = keras.optimizers.Adam(lr=learning_rate) model.compile(optimizer=adam, loss='categorical_crossentropy') return(model) keras.backend.clear_session() model = create_model(learning_rate = 1e-3)#couldve been anything for this, just gonna predict model.load_weights(path_to_model) #Need to deal with NANs now since many objects are outside the SDSS footprint, later models will learn to deal with this #ideas: need to retain a mask of where the nans are in the row mask = np.invert((query_result.isna().any(1).values)) #true was inside SDSS footprint #also will want this mask in indices so we can insert the predicted data correctly indices=[] for i,val in enumerate(mask): if val == True: indices.append(i) #predict on data that is not NAN predictions = model.predict(X[mask,:], verbose=2) #make nan array with size of what user asked for return_me = np.ones(N)*np.nan #now replace nan with the predictions in order return_me[indices] = predictions return_me_outer = np.ones(N_outer) * np.nan return_me_outer[outer_mask] = return_me print('time taken:', datetime.datetime.utcnow() - nowdate) print('uploading now') tz,mpz = [],[] for t,pz in zip(transients,return_me): if pz != pz: continue host = t.host #import pdb; pdb.set_trace() host.photo_z = pz host.save() tz += [host.redshift] mpz += [pz] plt.plot(tz,mpz,'.') plt.savefig('test.png') print('time taken with upload:', datetime.datetime.utcnow() - nowdate) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() print("""Photo-z cron failed with error %s at line number %s"""%(e,exc_tb.tb_lineno))
class TestFileService(unittest.TestCase): token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) def setUp(self): pass # ******************************************************************************************************* # Files section def test_Files_getFileServices(self): fileServices = Files.getFileServices() self.assertTrue(fileServices.__len__() > 0) def test_Files_getFileServicesNames(self): fileServiceNames = Files.getFileServicesNames() self.assertTrue(fileServiceNames.__len__() > 0) def test_Files_getFileServicesNames(self): fileServiceNames = Files.getFileServicesNames() found = False for fileService in fileServiceNames: if fileService.get('name') == Files_FileServiceName: found = True self.assertTrue(found) def test_Files_getFileServiceFromName(self): fileService = Files.getFileServiceFromName(Files_FileServiceName) self.assertTrue(fileService.get('name') == Files_FileServiceName) def test_Files_getRootVolumesInfo(self): fileService = Files.getFileServiceFromName(Files_FileServiceName) rootVolumes = Files.getRootVolumesInfo(fileService) self.assertTrue(rootVolumes.__len__() > 0) found = False for rootVolume in rootVolumes: if rootVolume.get('rootVolumeName') == Files_RootVolumeName1: found = True self.assertTrue(found) found = False for rootVolume in rootVolumes: if rootVolume.get('rootVolumeName') == Files_RootVolumeName2: found = True self.assertTrue(found) def test_Files_getUserVolumesInfo(self): fileService = Files.getFileServiceFromName(Files_FileServiceName) userVolumesInfo = Files.getUserVolumesInfo(fileService) self.assertTrue(userVolumesInfo.__len__() > 0) def test_Files_createUserVolume_deleteUserVolume(self): fileService = Files.getFileServiceFromName(Files_FileServiceName) Files.createUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1 ]), quiet=False) Files.deleteUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1 ]), quiet=False) def test_Files_createDir_upload_dirList_download_download_shareUserVolume( self): try: fileService = Files.getFileServiceFromName(Files_FileServiceName) os.remove(Files_LocalFileName) Files.deleteUserVolume(fileService, Files_RootVolumeName1, Files_UserVolumeName1, quiet=True) Files.deleteUserVolume(fileService, Files_RootVolumeName1, Files_UserVolumeName2, quiet=True) except: pass try: fileService = Files.getFileServiceFromName(Files_FileServiceName) Files.createUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1 ]), quiet=False) Files.createUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName2 ]), quiet=False) Files.createDir( fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1 ])) Files.createDir( fileService, "/".join([ Files_RootVolumeName2, Authentication_loginName, Files_UserVolumeName2, Files_NewDirectoryName2 ])) dirList = Files.dirList(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1 ]), level=2) self.assertTrue( dirList.get('root').get('name') == Files_NewDirectoryName1) Files.upload(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ]), data=Files_LocalFileContent) dirList = Files.dirList(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1 ]), level=2) self.assertTrue( dirList.get('root').get('files')[0].get('name') == Files_LocalFileName) Files.download(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ]), localFilePath=Files_LocalFileName) with open(Files_LocalFileName, 'r') as myfile: downloadedFileContent = myfile.read() assert (downloadedFileContent == Files_LocalFileContent) Files.delete( fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ])) dirList = Files.dirList(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1 ]), level=2) self.assertIsNone(dirList.get('root').get('files')) Files.upload(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ]), localFilePath=Files_LocalFileName, quiet=False) Files.move( fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ]), fileService, "/".join([ Files_RootVolumeName2, Authentication_loginName, Files_UserVolumeName2, Files_NewDirectoryName2, Files_LocalFileName ])) Files.shareUserVolume( fileService, "/".join([ Files_RootVolumeName2, Authentication_loginName, Files_UserVolumeName2 ]), sharedWith=Authentication_login_sharedWithName, type="USER", allowedActions=["read"]) token1 = Authentication.login( Authentication_login_sharedWithName, Authentication_login_sharedWithPassword) string = Files.download(fileService, "/".join([ Files_RootVolumeName2, Authentication_loginName, Files_UserVolumeName2, Files_NewDirectoryName2, Files_LocalFileName ]), format="txt") self.assertTrue(string, Files_LocalFileContent) token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) finally: try: os.remove(Files_LocalFileName) Files.deleteUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1 ]), quiet=True) Files.deleteUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName2 ]), quiet=True) except: pass
class TestSciDrive(unittest.TestCase): token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) def setUp(self): pass # ******************************************************************************************************* # SciDrive section: def test_SciDrive_createContainer_directoryList_delete(self): try: responseDelete = SciDrive.delete(SciDrive_Directory) except: pass try: responseCreate = SciDrive.createContainer(SciDrive_Directory) self.assertEqual(responseCreate, True) dirList = SciDrive.directoryList(SciDrive_Directory) self.assertTrue(dirList["path"].__contains__(SciDrive_Directory)) finally: responseDelete = SciDrive.delete(SciDrive_Directory) self.assertEqual(responseDelete, True) def test_SciDrive_publicUrl(self): try: responseDelete = SciDrive.delete(SciDrive_Directory) except: pass responseCreate = SciDrive.createContainer(SciDrive_Directory) url = SciDrive.publicUrl(SciDrive_Directory) responseDelete = SciDrive.delete(SciDrive_Directory) isUrl = url.startswith("http") self.assertEqual(responseCreate, True) self.assertEqual(isUrl, True) self.assertEqual(responseDelete, True) def test_SciDrive_upload_download_delete(self): try: if (sys.version_info > (3, 0)): #python3 file = open(SciDrive_FileName, "w") else: #python2 file = open(SciDrive_FileName, "wb") file.write(SciDrive_FileContent) file.close() responseUpload = SciDrive.upload(path=SciDrive_Directory + "/" + SciDrive_FileName, localFilePath=SciDrive_FileName) stringio = SciDrive.download(path=SciDrive_Directory + "/" + SciDrive_FileName, format="StringIO") fileContent = stringio.read() responseDelete = SciDrive.delete(SciDrive_Directory) self.assertEqual(responseUpload["path"], SciDrive_Directory + "/" + SciDrive_FileName) self.assertEqual(fileContent, SciDrive_FileContent) self.assertEqual(responseDelete, True) responseUpload = SciDrive.upload(path=SciDrive_Directory + "/" + SciDrive_FileName, data=SciDrive_FileContent) fileContent = SciDrive.download(path=SciDrive_Directory + "/" + SciDrive_FileName, format="text") responseDelete = SciDrive.delete(SciDrive_Directory) self.assertEqual(responseUpload["path"], SciDrive_Directory + "/" + SciDrive_FileName) self.assertEqual(fileContent, SciDrive_FileContent) self.assertEqual(responseDelete, True) finally: try: os.remove(SciDrive_FileName) except: pass
class TestCasJobs(unittest.TestCase): token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) def setUp(self): pass # ******************************************************************************************************* # CasJobs section: def test_CasJobs_getSchemaName(self): casJobsId = CasJobs.getSchemaName() self.assertNotEqual(casJobsId, "") def test_CasJobs_getTables(self): tables = CasJobs.getTables(context="MyDB") def test_CasJobs_executeQuery(self): df = CasJobs.executeQuery(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase, format="pandas") self.assertEqual(CasJobs_TestTableCSV, df.to_csv(index=False)) def test_CasJobs_submitJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery + " into MyDB." + CasJobs_TestTableName1, context=CasJobs_TestDatabase) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) df = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName1, context="MyDB", format="csv") self.assertNotEqual(jobId, "") def test_CasJobs_getJobStatus(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) jobDescription = CasJobs.getJobStatus(jobId) self.assertEqual(jobDescription["JobID"], jobId) def test_CasJobs_cancelJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) isCanceled = CasJobs.cancelJob(jobId=jobId) self.assertEqual(isCanceled, True) def test_CasJobs_waitForJob(self): jobId = CasJobs.submitJob(sql=CasJobs_TestQuery, context=CasJobs_TestDatabase) jobDescription = CasJobs.waitForJob(jobId=jobId, verbose=True) self.assertGreaterEqual(jobDescription["Status"], 3) def test_CasJobs_writeFitsFileFromQuery(self): #CasJobs.getFitsFileFromQuery try: result = CasJobs.writeFitsFileFromQuery( fileName=CasJobs_TestFitsFile, queryString=CasJobs_TestQuery, context="MyDB") self.assertEqual(result, True) self.assertEqual(os.path.isfile(CasJobs_TestFitsFile), True) finally: try: os.remove(CasJobs_TestFitsFile) except: pass def test_CasJobs_getPandasDataFrameFromQuery(self): #CasJobs.getPandasDataFrameFromQuery df = CasJobs.getPandasDataFrameFromQuery(queryString=CasJobs_TestQuery, context=CasJobs_TestDatabase) self.assertEqual(df.to_csv(index=False), CasJobs_TestTableCSV) def test_CasJobs_getNumpyArrayFromQuery(self): #CasJobs.getNumpyArrayFromQuery array = CasJobs.getNumpyArrayFromQuery(queryString=CasJobs_TestQuery, context=CasJobs_TestDatabase) newArray = pandas.read_csv(StringIO(CasJobs_TestTableCSV), index_col=None).as_matrix() self.assertEqual(array.all(), newArray.all()) def test_CasJobs_uploadPandasDataFrameToTable_uploadCSVDataToTable(self): try: df = pandas.read_csv(StringIO(CasJobs_TestTableCSV), index_col=None) result = CasJobs.uploadPandasDataFrameToTable( dataFrame=df, tableName=CasJobs_TestTableName2, context="MyDB") table = CasJobs.executeQuery(sql="select * from " + CasJobs_TestTableName2, context="MyDB", format="pandas") result2 = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") self.assertEqual(result, True) self.assertItemsEqual(table, df) result = CasJobs.uploadCSVDataToTable( csvData=CasJobs_TestTableCSV, tableName=CasJobs_TestTableName2, context="MyDB") df2 = CasJobs.executeQuery(sql="select * from " + CasJobs_TestTableName2, context="MyDB", format="pandas") result2 = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") self.assertEqual(result, True) self.assertItemsEqual(df, df2) finally: try: csv = CasJobs.executeQuery(sql="DROP TABLE " + CasJobs_TestTableName2, context="MyDB", format="csv") except: pass
def login(): '''Log in to SciServer''' return Authentication.login('******', '******')
return np.exp(val / 512.0) else: return val num_lines = 25 LOGIN_NAME = 'loranoosterhaven' LOGIN_PASSWORD = '******' IMG_WIDTH = 512 IMG_HEIGHT = 512 with open("GalaxyZoo1_DR_table7.csv") as csv_file: # login token1 = Authentication.login(LOGIN_NAME, LOGIN_PASSWORD) csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for x in range(0, num_lines): row = next(csv_reader) if line_count == 0: line_count += 1 else: ra_sx, dec_sx = row[1], row[2] c = SkyCoord(ra_sx, dec_sx, unit=u.degree) print(f'object {row[0]}', c) line_count += 1 img = SkyServer.getJpegImgCutout(ra=c.ra.deg,
def main(): try: os.mkdir(PRETRAIN) except: print('here we go!') if PRE_SPEC: dfspec, vmean, vstd, df_lbl = prepro_std_specs(SPEC_DATA, ftr=ftr, sig=3.0, w=True, wpath=PRETRAIN) elif PRE_PHOTO_HH: vmean = np.loadtxt(f'{PRETRAIN_PATH}/vmean.txt') vstd = np.loadtxt(f'{PRETRAIN_PATH}/vstd.txt') if PRE_PHOTO_HH: print('=====================PREPRO PHOTO====================') dfphoto = prepro_std_photos(PHOTO_DATA, vmean, vstd, ftr=ftr, sig=3.0) photo_stream = get_encode_stream(dfphoto, base, dtype) photo_HH = get_HH_pd(photo_stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) if not EXACT: assert len(photo_HH) <= topk else: photo_HH = photo_HH[:topk] photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif PRE_UMAP: photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv', columns=list(range(ftr_len))) if PRE_UMAP: print('=============GETTING UMAP============') try: photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp) except: photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp) joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT.sav') photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif MAP_SPEC: photo_uT = joblib.load(f'pretrain/photo_uT.sav') if MAP_SPEC: if not PRE_SPEC: dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv') dfspec_block = (dfspec * (base - 1)).round() assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <= base - 1) spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys()) spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False) else: spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv') spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1) spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=photo_HH, tableName=f'{name}b{base}e{EXACT}std', context="MyDB")
class TestJobs(unittest.TestCase): token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) def setUp(self): pass # ******************************************************************************************************* # Jobs section # Docker Jobs ################################################################################################ def test_Jobs_getDockerComputeDomains(self): dockerComputeDomains = Jobs.getDockerComputeDomains() self.assertTrue(dockerComputeDomains.__len__() > 0) found = False for dockerComputeDomain in dockerComputeDomains: if dockerComputeDomain.get('name') == Jobs_DockerComputeDomainName: found = True self.assertTrue(found) def test_Jobs_getDockerComputeDomainsNames(self): dockerComputeDomainsNames = Jobs.getDockerComputeDomainsNames() self.assertTrue( Jobs_DockerComputeDomainName in dockerComputeDomainsNames) def test_Jobs_getDockerComputeDomainFromName(self): dockerComputeDomain = Jobs.getDockerComputeDomainFromName( Jobs_DockerComputeDomainName) self.assertTrue( dockerComputeDomain.get('name') in Jobs_DockerComputeDomainName) def test_Jobs_submitNotebookJob_cancel_waitForJob_getJobStatus_getJobDescription_submitShellCommandJob( self): fileService = Files.getFileServiceFromName(Jobs_FileServiceName) try: Files.deleteUserVolume(fileService, Jobs_RootVolumeName, Jobs_UserVolumeName) except: pass Files.createUserVolume(fileService, Jobs_RootVolumeName, Jobs_UserVolumeName) Files.upload(fileService, Jobs_RootVolumeName, Jobs_UserVolumeName, Jobs_DirectoryName + "/" + Jobs_NotebookName, localFilePath=Jobs_NotebookName) dockerComputeDomain = Jobs.getDockerComputeDomainFromName( Jobs_DockerComputeDomainName) jobId_1 = Jobs.submitNotebookJob( '/home/idies/workspace/' + Jobs_UserVolumeName + '/' + Jobs_DirectoryName + '/' + Jobs_NotebookName, dockerComputeDomain, Jobs_DockerImageName, Jobs_UserVolumes, Jobs_DataVolumes, Jobs_Parameters, Jobs_Alias) Jobs.cancelJob(jobId_1) jobStatus = Jobs.getJobStatus(jobId_1) self.assertTrue(jobStatus.get('status') == 128) jobId_2 = Jobs.submitNotebookJob(Jobs_RemoteNotebookPath, dockerComputeDomain, Jobs_DockerImageName, Jobs_UserVolumes, Jobs_DataVolumes, Jobs_Parameters, Jobs_Alias) jobStatus = Jobs.waitForJob(jobId_2) self.assertTrue(jobStatus == Jobs.getJobStatus(jobId_2)) self.assertTrue(jobStatus.get('status') == 32) job = Jobs.getJobDescription(jobId_2) self.assertTrue(job.get('username') == Authentication_loginName) self.assertTrue(job.get('dockerImageName') == Jobs_DockerImageName) self.assertTrue(job.get('scriptURI') == Jobs_RemoteNotebookPath) self.assertTrue(job.get('submitterDID') == Jobs_Alias) jobDirectory = job.get('resultsFolderURI') relativePath = jobDirectory.split( 'scratch/')[1] + Jobs_NoteBookOutPutFile string = Files.download(fileService, 'scratch', '', relativePath, format="txt", userVolumeOwner=Authentication_loginName) string.rstrip("\n") self.assertTrue(string, job.get('resultsFolderURI')) jobs = Jobs.getJobsList(top=2) found = False for job in jobs: if jobId_1 == job.get("id"): found = True self.assertTrue(found) found = False for job in jobs: if jobId_2 == job.get("id"): found = True self.assertTrue(found) jobId = Jobs.submitShellCommandJob(Jobs_ShellCommand, dockerComputeDomain, Jobs_DockerImageName, Jobs_UserVolumes, Jobs_DataVolumes, Jobs_Alias) jobStatus = Jobs.waitForJob(jobId) self.assertTrue(jobStatus == Jobs.getJobStatus(jobId)) self.assertTrue(jobStatus.get('status') == 32) job = Jobs.getJobDescription(jobId) self.assertTrue(job.get('username') == Authentication_loginName) self.assertTrue(job.get('dockerImageName') == Jobs_DockerImageName) self.assertTrue(job.get('command') == Jobs_ShellCommand) self.assertTrue(job.get('submitterDID') == Jobs_Alias) jobDirectory = job.get('resultsFolderURI') relativePath = jobDirectory.split('scratch/')[1] + "command.txt" string = Files.download(fileService, 'scratch', '', relativePath, format="txt", userVolumeOwner=Authentication_loginName) string.rstrip("\n") self.assertTrue(string, job.get('resultsFolderURI')) Files.deleteUserVolume(fileService, Jobs_RootVolumeName, Jobs_UserVolumeName) # RDB Jobs ################################################################################################ def test_Jobs_getRDBComputeDomains(self): rdbComputeDomains = Jobs.getRDBComputeDomains() self.assertTrue(rdbComputeDomains.__len__() > 0) found = False for rdbComputeDomain in rdbComputeDomains: if rdbComputeDomain.get('name') == Jobs_RDBComputeDomainName: found = True self.assertTrue(found) def test_Jobs_getRDBComputeDomainsNames(self): rdbComputeDomainsNames = Jobs.getRDBComputeDomainsNames() self.assertTrue(Jobs_RDBComputeDomainName in rdbComputeDomainsNames) def test_Jobs_getRDBComputeDomainFromName(self): rdbComputeDomain = Jobs.getRDBComputeDomainFromName( Jobs_RDBComputeDomainName) self.assertTrue( rdbComputeDomain.get('name') in Jobs_RDBComputeDomainName) def test_Jobs_submitRDBJob(self): rdbComputeDomain = Jobs.getRDBComputeDomainFromName( Jobs_RDBComputeDomainName) jobId = Jobs.submitRDBQueryJob(Jobs_SqlQuery, rdbComputeDomain, Jobs_DatabaseContextName, Jobs_QueryResultsFile, Jobs_Alias) jobStatus = Jobs.waitForJob(jobId) self.assertTrue(jobStatus == Jobs.getJobStatus(jobId)) self.assertTrue(jobStatus.get('status') == 32) job = Jobs.getJobDescription(jobId) self.assertTrue(job.get('username') == Authentication_loginName) self.assertTrue(job.get('rdbDomainName') == Jobs_RDBComputeDomainName) self.assertTrue( job.get('databaseContextName') == Jobs_DatabaseContextName) self.assertTrue(job.get('inputSql') == Jobs_SqlQuery) self.assertTrue(job.get('submitterDID') == Jobs_Alias) fileService = Files.getFileServiceFromName(Jobs_FileServiceName) jobDirectory = job.get('resultsFolderURI') relativePath = jobDirectory.split( 'scratch/')[1] + Jobs_QueryResultsFile + '.csv' string = Files.download(fileService, 'scratch', '', relativePath, format="txt", userVolumeOwner=Authentication_loginName) string.rstrip("\n") self.assertTrue(string, Jobs_SqlQueryResult) def test_Jobs_getJobDirectory(self): #TBD pass
def test_Files_createDir_upload_dirList_download_download_shareUserVolume( self): try: fileService = Files.getFileServiceFromName(Files_FileServiceName) os.remove(Files_LocalFileName) Files.deleteUserVolume(fileService, Files_RootVolumeName1, Files_UserVolumeName1, quiet=True) Files.deleteUserVolume(fileService, Files_RootVolumeName1, Files_UserVolumeName2, quiet=True) except: pass try: fileService = Files.getFileServiceFromName(Files_FileServiceName) Files.createUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1 ]), quiet=False) Files.createUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName2 ]), quiet=False) Files.createDir( fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1 ])) Files.createDir( fileService, "/".join([ Files_RootVolumeName2, Authentication_loginName, Files_UserVolumeName2, Files_NewDirectoryName2 ])) dirList = Files.dirList(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1 ]), level=2) self.assertTrue( dirList.get('root').get('name') == Files_NewDirectoryName1) Files.upload(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ]), data=Files_LocalFileContent) dirList = Files.dirList(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1 ]), level=2) self.assertTrue( dirList.get('root').get('files')[0].get('name') == Files_LocalFileName) Files.download(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ]), localFilePath=Files_LocalFileName) with open(Files_LocalFileName, 'r') as myfile: downloadedFileContent = myfile.read() assert (downloadedFileContent == Files_LocalFileContent) Files.delete( fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ])) dirList = Files.dirList(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1 ]), level=2) self.assertIsNone(dirList.get('root').get('files')) Files.upload(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ]), localFilePath=Files_LocalFileName, quiet=False) Files.move( fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1, Files_NewDirectoryName1, Files_LocalFileName ]), fileService, "/".join([ Files_RootVolumeName2, Authentication_loginName, Files_UserVolumeName2, Files_NewDirectoryName2, Files_LocalFileName ])) Files.shareUserVolume( fileService, "/".join([ Files_RootVolumeName2, Authentication_loginName, Files_UserVolumeName2 ]), sharedWith=Authentication_login_sharedWithName, type="USER", allowedActions=["read"]) token1 = Authentication.login( Authentication_login_sharedWithName, Authentication_login_sharedWithPassword) string = Files.download(fileService, "/".join([ Files_RootVolumeName2, Authentication_loginName, Files_UserVolumeName2, Files_NewDirectoryName2, Files_LocalFileName ]), format="txt") self.assertTrue(string, Files_LocalFileContent) token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) finally: try: os.remove(Files_LocalFileName) Files.deleteUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName1 ]), quiet=True) Files.deleteUserVolume(fileService, "/".join([ Files_RootVolumeName1, Authentication_loginName, Files_UserVolumeName2 ]), quiet=True) except: pass
from sys import argv import os from SciServer import CasJobs from SciServer import Authentication username = "******" password = "******" token = Authentication.login(username, password) user = Authentication.getKeystoneUserWithToken(token) #csv = open("./metadata_table.csv", 'r') #csv_str = csv.read() #csv.close() #success = CasJobs.uploadCSVDataToTable(csv_str, "osc_metadata") #print(success) query1 = "create table osc_metadata (FileName varchar(255), FileSize varchar(255))" CasJobs.executeQuery(sql=query1, format='json') # get file name and size f = argv[1] f_name = os.path.basename(f) f_size = os.path.getsize(f) print(f_name) print(f_size)
# ******************************************************************************************************* # Authentication section # ******************************************************************************************************* # In[ ]: #help(Authentication) # In[ ]: #logging in and getting current token from different ways token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword); token2 = Authentication.getToken() token3 = Authentication.getKeystoneToken() token4 = Authentication.token.value print("token1=" + token1)# print("token2=" + token2)# print("token3=" + token3)# print("token4=" + token4)# # In[ ]: #getting curent user info user = Authentication.getKeystoneUserWithToken(token1) print("userName=" + user.userName)
warnings.filterwarnings("ignore", category=UserWarning) import numpy as np from SciServer import Authentication, CasJobs from SciServer import Config import urllib3 import pandas as pd import sys import os from astropy.io import ascii from astropy.table import Table urllib3.disable_warnings() token = '27d25960dc5f4f0aa838612a18897e23' #Authentication.setToken(token) Authentication.login('pwang55', '4a552d') Config.CasJobsRESTUri = 'http://skyserver.sdss.org/CasJobs/RestApi' context = 'DR16' # SDSS query code will create anohter argv so argv number check number has to +1 # If path is given, there will be total 7 args, and the first arg split by / should have multiple length if (len(sys.argv) == 7) and (len(sys.argv[1].split('/')) > 1): datapath = sys.argv[1] clustername = sys.argv[2] ra = sys.argv[3] dec = sys.argv[4] radius = sys.argv[5] # if path argument doesn't end in /, add it if datapath[-1:] != '/': datapath = datapath + '/'
class TestSkyServer(unittest.TestCase): token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) def setUp(self): pass # ******************************************************************************************************* # SkyServer section: def test_SkyServer_sqlSearch(self): #sql search df = SkyServer.sqlSearch(sql=SkyServer_TestQuery, dataRelease=SkyServer_DataRelease) self.assertEqual(SkyServer_QueryResultCSV, df.to_csv(index=False)) def test_SkyServer_getJpegImgCutout(self): #image cutout img = SkyServer.getJpegImgCutout( ra=197.614455642896, dec=18.438168853724, width=512, height=512, scale=0.4, dataRelease=SkyServer_DataRelease, opt="OG", query= "SELECT TOP 100 p.objID, p.ra, p.dec, p.r FROM fGetObjFromRectEq(197.6,18.4,197.7,18.5) n, PhotoPrimary p WHERE n.objID=p.objID" ) im = skimage.io.imread("./TestGalaxy.jpeg") self.assertEqual(img.tobytes(), im.tobytes()) def test_SkyServer_radialSearch(self): # radial search df = SkyServer.radialSearch(ra=258.25, dec=64.05, radius=0.1, dataRelease=SkyServer_DataRelease) self.maxDiff = None self.assertEqual(SkyServer_RadialSearchResultCSV, df.to_csv(index=False, float_format="%.6f")) def test_SkyServer_rectangularSearch(self): #rectangular search df = SkyServer.rectangularSearch(min_ra=258.3, max_ra=258.31, min_dec=64, max_dec=64.01, dataRelease=SkyServer_DataRelease) self.maxDiff = None self.assertEqual(SkyServer_RectangularSearchResultCSV, df.to_csv(index=False, float_format="%.6f")) def test_SkyServer_objectSearch(self): #object search object = SkyServer.objectSearch(ra=258.25, dec=64.05, dataRelease=SkyServer_DataRelease) self.maxDiff = None self.assertEqual(SkyServer_ObjectSearchResultObjID, object[0]["Rows"][0]["id"])
def main(): try: os.mkdir(PRETRAIN) except: print('here we go!') if PRE_NORM: dfphoto, dfspec, df_lbl = prepro_photo_spec(PHOTO_DATA, SPEC_DATA, base, ftr, wpath=PRETRAIN) if PRE_HH: print('=====================ENCODE PHOTO ====================') photo_stream = get_encode_stream(dfphoto, base, dtype) spec_stream = get_encode_stream(dfspec, base, dtype) # np.savetxt(f'{PRETRAIN}/photo_stream.txt',photo_stream) # np.savetxt(f'{PRETRAIN}/spec_stream.txt',spec_stream) df_lbl['encode'] = spec_stream df_lbl.to_csv(f'{PRETRAIN}/spec_lbl_encode.csv', index=False) photo_HH = get_HH_pd(photo_stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) if not EXACT: assert len(photo_HH) <= topk else: photo_HH = photo_HH[:topk] photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) spec_HH = get_HH_pd(spec_stream, base, ftr_len, dtype, True, topk) spec_HH.to_csv(f'{PRETRAIN}/spec_HH.csv', index=False) elif PRE_UMAP or MAP_SPEC: photo_HH = pd.read_csv(f'{PRETRAIN}/photo_HH.csv') spec_HH = pd.read_csv(f'{PRETRAIN}/spec_HH.csv') df_lbl = pd.read_csv(f'{PRETRAIN}/spec_lbl_encode.csv') print('photo_HH', photo_HH) print('spec_HH', spec_HH) if PRE_UMAP: print('=============GETTING UMAP============') try: photo_uT = get_umap_pd(photo_HH, list(range(ftr_len)), umap_comp) except: photo_uT = get_umap_pd(photo_HH, ftr_str, umap_comp) joblib.dump(photo_uT, f'{PRETRAIN}/photo_uT_b{base}.sav') photo_HH.to_csv(f'{PRETRAIN}/photo_HH.csv', index=False) elif MAP_SPEC: photo_uT = joblib.load(f'pretrain/photo_uT_b{base}.sav') if MAP_SPEC: if not PRE_NORM: dfspec = pd.read_csv(f'{PRETRAIN}/spec_norm.csv') dfspec_block = (dfspec * (base - 1)).round() assert (dfspec_block.min().min() >= 0) & (dfspec_block.max().max() <= base - 1) spec_pm = get_mapping_pd(dfspec_block, photo_uT, dfspec.keys()) spec_pm.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv', index=False) else: spec_pm = pd.read_csv(f'{PRETRAIN}/spec_pm_e{EXACT}.csv') spec_pmlbl = pd.concat([spec_pm, df_lbl], axis=1) spec_pmlbl.to_csv(f'{PRETRAIN}/spec_pm_e{EXACT}_lbl.csv', index=False) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=photo_HH, tableName=f'{name}b{base}e{EXACT}std', context="MyDB")
class TestSkyQuery(unittest.TestCase): token1 = Authentication.login(Authentication_loginName, Authentication_loginPassword) def setUp(self): pass # ******************************************************************************************************* # SkyQuery section: #-- submitting jobs: def test_SkyQuery_listQueues(self): queueList = SkyQuery.listQueues() def test_SkyQuery_getQueueInfo(self): queueInfo = SkyQuery.getQueueInfo('quick') queueInfo = SkyQuery.getQueueInfo('long') def test_SkyQuery_submitJob(self): jobId = SkyQuery.submitJob(query=SkyQuery_Query, queue="quick") self.assertNotEqual(jobId, "") def test_SkyQuery_getJobStatus(self): jobId = SkyQuery.submitJob(query=SkyQuery_Query, queue="quick") jobDescription = SkyQuery.getJobStatus(jobId=jobId) def test_SkyQuery_waitForJob(self): jobId = SkyQuery.submitJob(query=SkyQuery_Query, queue="quick") jobDescription = SkyQuery.waitForJob(jobId=jobId, verbose=True) self.assertEqual(jobDescription["status"], "completed") def test_SkyQuery_cancelJob(self): isCanceled = SkyQuery.cancelJob( SkyQuery.submitJob(query=SkyQuery_Query, queue="long")) self.assertEqual(isCanceled, True) #-- uploading and downloading csv tables: def test_SkyQuery_uploadTable_getTable_getTableInfo_listTableColumns_dropTable( self): try: result = SkyQuery.dropTable(tableName=SkyQuery_TestTableName, datasetName="MyDB") except: pass result = SkyQuery.uploadTable(uploadData=SkyQuery_TestTableCSV, tableName=SkyQuery_TestTableName, datasetName="MyDB", format="csv") self.assertEqual(result, True) table = SkyQuery.getTable(tableName=SkyQuery_TestTableName, datasetName="MyDB", top=10) self.assertEqual(SkyQuery_TestTableCSVdownloaded, table.to_csv(index=False)) info = SkyQuery.getTableInfo(tableName="webuser." + SkyQuery_TestTableName, datasetName="MyDB") columns = SkyQuery.listTableColumns(tableName="webuser." + SkyQuery_TestTableName, datasetName="MyDB") result = SkyQuery.dropTable(tableName=SkyQuery_TestTableName, datasetName="MyDB") self.assertEqual(result, True) #-- getting database info def test_SkyQuery_listJobs(self): quickJobsList = SkyQuery.listJobs('quick') longJobsList = SkyQuery.listJobs('long') def test_SkyQuery_listAllDatasets(self): datasets = SkyQuery.listAllDatasets() def test_SkyQuery_getDatasetInfo(self): info = SkyQuery.getDatasetInfo("MyDB") def test_SkyQuery_listDatasetTables(self): tables = SkyQuery.listDatasetTables("MyDB")
def main(): try: os.mkdir(PRETRAIN) except: 'lets GO' print(PREPRO_CUTOFF, PREPRO_NORM, PREPRO_STREAM, PREPRO_HH, PREPRO_UMAP, PREPRO_KMEAN, SAVE_ALL, PREDICT_ALL, UPLOAD_SCI) if PREPRO_NORM: print( f'=================LOADING N={num} Smoothing {ISSMTH} =================' ) data1Ds, pc = process_dataset_pc(data_dir, num, pca_comp, ISSMTH, SMTH, TEST) intensity, pca_results = process_pca(data1Ds, pc, num) # df_pca=pd.DataFrame(pca_results, columns=list(range(pca_comp))) df_norm, mask, ftr_len0 = process_intensity(pca_results, intensity, pca_comp, PREPRO_CUTOFF, ONPCA, ONINT, r=0.01, wdir=PRETRAIN) assert ftr_len0 == ftr_len mask2d = mask.reshape((num, 1004 * 1344)) if SAVE_ALL: np.savetxt(f'{PRETRAIN}/mask_all.txt', mask) else: mask0 = mask2d[pidx] idxii = int(mask2d[:pidx].sum()) idxjj = int(mask2d[:(pidx + 1)].sum()) assert idxjj - idxii == mask0.sum() print(mask0.shape, mask.sum(), 'saving mask') np.savetxt(f'{PRETRAIN}/mask{pidx}.txt', mask0) # df_norm.to_csv(f'{PRETRAIN}/df_norm.csv',index=False) # df_normt=df_norm[idxii:idxjj] # df_normt.to_csv(f'{PRETRAIN}/df_norm{pidx}.csv',index=False) # elif PREPRO_STREAM: # print(f'=================LOADING df_norm =================') # df_norm=pd.read_csv(f'{PRETRAIN}/df_norm.csv') if PREPRO_STREAM: print(f'=================ENCODING Base={base} =================') stream = process_rebin(df_norm, base, dtype) if SAVE_ALL: np.savetxt(f'{PRETRAIN}/stream_b{base}.txt', stream) else: stream0 = stream[idxii:idxjj] np.savetxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt', stream0) elif PREPRO_HH: print(f'=================LOADING STREAM =================') stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt') if not PREDICT_ALL: stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt') if PREPRO_HH: assert EXACT == 0 topk = 20000 print(f'=================DECODE {ftr_len} DIM =================') HH_pd = get_HH_pd(stream, base, ftr_len, dtype, EXACT, topk, r=16, d=1000000, c=None, device=None) HH_pd.to_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv', index=False) elif PREPRO_UMAP: print(f'=================LOADING HH_pd==============') HH_pd = pd.read_csv(f'{PRETRAIN}/HH_pd_b{base}e{EXACT}.csv') print(HH_pd.head()) if PREPRO_UMAP: print(f'=================GETTING UMAP =================') # # lb,ub=int(HH_pd['freq'][0]*lbr),int(HH_pd['freq'][0]) # HH_pdc=HH_pd[HH_pd['freq']>lb] # # print(len(HH_pdc),len(HH_pd),HH_pd['freq'][0],'lb',lb,'HHratio',lbr) # if len(HH_pdc)>20000: HH_pdc = HH_pd[:20000] print(len(HH_pdc), len(HH_pd), HH_pd['freq'][0]) print(f'=================LOADING HH_pd==============') umapT = get_umap_pd(HH_pdc, list(range(ftr_len))) # print(HH_pdc.keys()) HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False) elif PREPRO_KMEAN: HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv') if PREPRO_KMEAN: print(f'=================KMEAN CLUSTERING =================') kmap = get_kmean_lbl(HH_pdc, N_cluster, u1='u1', u2='u2') joblib.dump(kmap, f'{PRETRAIN}/kmap_k{N_cluster}e{EXACT}.sav') HH_pdc.to_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv', index=False) else: HH_pdc = pd.read_csv(f'{PRETRAIN}/HH_pdh_b{base}e{EXACT}.csv') if PREDICT_ALL: print(f'=================PREDICTING ALL {num} LABEL==============') if not PREPRO_NORM: mask = np.loadtxt(f'{PRETRAIN}/mask_all.txt') if not PREPRO_HH: stream = np.loadtxt(f'{PRETRAIN}/stream_b{base}.txt') pred_k = get_pred_stream(stream, mask, HH_pdc, f'k{N_cluster}', val='HH', bg=0, color=0, sgn=1) pred_k = pred_k.reshape((num, 1004, 1344)) print( f'=================SAVING PREDICTION of ALL {num} LABEL==============' ) np.savetxt(f'{PRETRAIN}/pred_k{N_cluster}e{EXACT}.txt', pred_k) else: print(f'=================PREDICTING id{pidx} LABEL==============') if not PREPRO_NORM: mask0 = np.loadtxt(f'{PRETRAIN}/mask{pidx}.txt') if not PREPRO_HH: stream0 = np.loadtxt(f'{PRETRAIN}/stream_b{base}{pidx}.txt') pred_k = get_pred_stream(stream0, mask0, HH_pdc, f'k{N_cluster}', val='HH', bg=0, color=0, sgn=1) pred_k = pred_k.reshape((1004, 1344)) print( f'=================SAVING PREDICTION of id{pidx} LABEL==============' ) np.savetxt( f'{PRETRAIN}/pred_k{N_cluster}{pidx}_f{name}b{base}sm1c3sige{EXACT}.txt', pred_k) if UPLOAD_SCI: username = '******' password = '******' # password = getpass.getpass() sciserver_token = Authentication.login(username, password) CasJobs.uploadPandasDataFrameToTable( dataFrame=HH_pdc, tableName=f'b{base}sm{SMTH}f{name}sig3e{EXACT}_v1', context="MyDB")
import json import time import sys from io import StringIO import requests import pandas from SciServer import Authentication, Config userNames = ['matlab', 'recount']; userPasswords = ['matlab', 'recount']; userTokens = []; for i in range(len(userNames)): Authentication.login(userNames[i],userPasswords[i]); token = Authentication.getKeystoneToken(); userTokens.append(token);
elif os.path.isdir(str(payload)): recursiveDirSync(payload, target) else: print('This is supposed to be unreachable code - toUploadPath failed in checking input path') exit() if __name__ == '__main__': parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Upload data to SciDrive") # subparsers = parser.add_subparsers(help='commands') parser.add_argument('-u', '--username', type=Username, help='Specify username', default=Username.DEFAULT) parser.add_argument('-p', '--password', type=Password, help='Specify password', default=Password.DEFAULT) parser.add_argument('-l', '--localpath', type=ToUploadPath, help='Specify path to file or folder to upload', default=ToUploadPath.DEFAULT) parser.add_argument('-r', '--remotepath', type=SciDrivePath, help='Specify destination folderpath on SciDrive to upload files/folder to', default=SciDrivePath.DEFAULT) args = parser.parse_args() token1 = Authentication.login(str(args.username), str(args.password)) uploadSync(args.localpath, args.remotepath)
def save_obj(obj, name): with open(name + '.pkl', 'wb') as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) def load_obj(name): with open(name + '.pkl', 'rb') as f: return pickle.load(f) #You need to create a SciServer account Authentication_loginName = 'Robing' Authentication_loginPassword = '******' token = Authentication.login(Authentication_loginName, Authentication_loginPassword) #FIRST match query - perhaps input as separate text file later on? #This webapge is good to check your SQL syntax if you're having problems: http://skyserver.sdss.org/dr13/en/tools/search/form/searchform.aspx query = """ SELECT top 1000 sp.ra,sp.dec,sp.z,sp.class,sp.subclass, sp.psfmag_u-sp.extinction_u AS mag_u, sp.psfmag_g-sp.extinction_g AS mag_g, sp.psfmag_r-sp.extinction_r AS mag_r, sp.psfmag_i-sp.extinction_i AS mag_i, sp.psfmag_z-sp.extinction_z AS mag_z, w.w1mpro AS w1, w.w2mpro AS w2, w.w3mpro AS w3, w.w4mpro AS w4,
def do(self, user='******', password='******', search=1, path_to_model='YSE_App\\data_ingest\\RF_model.sav'): """ Predicts photometric redshifts from RA and DEC points in SDSS An outline of the algorithem is: first pull from SDSS u,g,r,i,z magnitudes from SDSS; should be able to handle a list/array of RA and DEC place u,g,r,i,z into a vector, append the derived information into the data array predict the information from the model return the predictions in the same order to the user inputs: Ra: list or array of len N, right ascensions of target galaxies in decimal degrees Dec: list or array of len N, declination of target galaxies in decimal degrees search: float, arcmin tolerance to search for the object in SDSS Catalogue path_to_model: str, filepath to saved model for prediction Returns: predictions: array of len N, photometric redshift of input galaxy """ nowdate = datetime.datetime.utcnow() - datetime.timedelta(1) from django.db.models import Q #HAS To Remain Here, I dunno why print('Entered the photo_z cron') #save time b/c the other cron jobs print a time for completion transients = (Transient.objects.filter(Q(host__photo_z__isnull=True))) #print('Number of test transients:', len(transients)) RA = [] #Needs to be list b/c don't know how many hosts are None DEC = [] outer_mask = [ ] #create an integer index mask that we will place values into because some hosts dont have a RA and DEC assigned for i, transient_obj in enumerate(transients): if transient_obj.host != None: RA.append(transient_obj.host.ra) DEC.append(transient_obj.host.dec) outer_mask.append(i) #provides integer index mask outer_mask = np.array(outer_mask) #make that an array N_outer = len(transients) #gives size of returned array Ra = np.array(RA) Dec = np.array(DEC) N = len(Ra) #gives size of query array Q = N // 1000 #decompose the length of transients needing classification if N % 1000 != 0: Q = Q + 1 #catch remainder and start a new batch total_job = [] #store all pandas job dataframes for j in range(Q): #iterate over batches if j == (Q - 1): Ra_batch = Ra[j * 1000:((j + 1) * 1000 + N % 1000)] #grab batch remainder Dec_batch = Dec[j * 1000:((j + 1) * 1000 + N % 1000)] else: Ra_batch = Ra[j * 1000:(j + 1) * 1000] #other wise grab batch of 1000 Dec_batch = Dec[j * 1000:(j + 1) * 1000] hold = [ ] #a list for holding the strings that I want to place into an sql query for val in range(len(Ra_batch)): string = '({},{},{}),|'.format(str(val), str(Ra[val]), str(Dec[val])) hold.append(string) #Now construct the full query sql = "CREATE TABLE #UPLOAD(|id INT PRIMARY KEY,|up_ra FLOAT,|up_dec FLOAT|)|INSERT INTO #UPLOAD| VALUES|" for data in hold: sql = sql + data #there is a comma that needs to be deleted from the last entry for syntax to work sql = sql[0:(len(sql) - 2)] + '|' #append the rest to it sql = sql + "SELECT|p.u,p.g,p.r,p.i,p.z|FROM #UPLOAD as U|OUTER APPLY dbo.fGetNearestObjEq((U.up_ra),(U.up_dec),{}) as N|LEFT JOIN Galaxy AS p ON N.objid=p.objid".format( str(search)) #change all | to new line: when we change to Unix system will need to change this new line sql = sql.replace('|', '\n') #login, change to some other credentials later Authentication.login('awe2', 'StandardPassword') job = CasJobs.executeQuery( sql, 'DR15', 'pandas') #this lines sends and retrieves the result print('Query {} of {} complete'.format(j + 1, Q)) job['u-g'] = job['u'].values - job['g'].values job['g-r'] = job['g'].values - job['r'].values job['r-i'] = job['r'].values - job['i'].values job['i-z'] = job['i'].values - job['z'].values job['u_over_z'] = job['u'].values / job['z'].values total_job.append(job) #print('left the query loop') query_result = pd.concat(total_job) #now feed to a RF model for prediction X = query_result.values #load the model, will need to change the path later model = pickle.load(open(path_to_model, 'rb')) #Need to deal with NANs now since many objects are outside the SDSS footprint, later models will learn to deal with this #ideas: need to retain a mask of where the nans are in the row mask = np.invert((query_result.isna().any(1).values )) #true was inside SDSS footprint #also will want this mask in indices so we can insert the predicted data correctly indices = [] for i, val in enumerate(mask): if val == True: indices.append(i) predictions = model.predict((X[mask, :])) #make nan array with size of what user asked for return_me = np.ones(N) * np.nan #now replace nan with the predictions in order return_me[indices] = predictions #something is wrong here!, line works inside a try statement but not outside. raises no error for some reason...? return_me_outer = np.ones(N_outer) * np.nan return_me_outer[outer_mask] = return_me #print('debug: made it here') print('time taken:', datetime.datetime.utcnow() - nowdate) print('uploading now') for t, pz in zip(transients, return_me): #print('1') host = t.host #print('2') host.photo_z = pz #print('3') host.save() #print('4') print('time taken with upload:', datetime.datetime.utcnow() - nowdate)