def tgas(flag=None): """ Get path to the Gaia TGAS DR1 files, download if files not found :return: List of file path :rtype: list :History: 2017-Oct-13 - Written - Henry Leung (University of Toronto) """ # Check if dr arguement is provided, if none then use default fulllist = [] # Check if directory exists folderpath = os.path.join(gaia_env(), 'Gaia/gdr1/tgas_source/fits/') urlbase = 'http://cdn.gea.esac.esa.int/Gaia/gdr1/tgas_source/fits/' if not os.path.exists(folderpath): os.makedirs(folderpath) hash_filename = 'MD5SUM.txt' full_hash_filename = os.path.join(folderpath, hash_filename) if not os.path.isfile(full_hash_filename): urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename) hash_list = np.loadtxt(full_hash_filename, dtype='str').T for i in range(0, 16, 1): filename = f'TgasSource_000-000-0{i:0{2}d}.fits' fullfilename = os.path.join(folderpath, filename) urlstr = urlbase + filename file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)] # Check if files exists if os.path.isfile(fullfilename) and flag is None: checksum = md5_checksum(fullfilename) # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too if checksum != file_hash and len(file_hash) != 0: print(checksum) print(file_hash) print('File corruption detected, astroNN attempting to download again') tgas(flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: # progress bar with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: # Download urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) checksum = md5_checksum(fullfilename) if checksum != file_hash and len(file_hash) != 0: print('File corruption detected, astroNN attempting to download again') tgas(flag=1) print(f'Downloaded Gaia DR1 TGAS ({i:d} of 15) file catalog successfully to {fullfilename}') fulllist.extend([fullfilename]) return fulllist
def test_checksum(self): import astroNN from astroNN.shared.downloader_tools import md5_checksum, sha1_checksum, sha256_checksum anderson2017_path = os.path.join(os.path.dirname(astroNN.__path__[0]), 'astroNN', 'data', 'anderson_2017_dr14_parallax.npz') md5_pred = md5_checksum(anderson2017_path) sha1_pred = sha1_checksum(anderson2017_path) sha256_pred = sha256_checksum(anderson2017_path) # read answer hashed by Windows Get-FileHash self.assertEqual(md5_pred, '9C714F5FE22BB7C4FF9EA32F3E859D73'.lower()) self.assertEqual(sha1_pred, '733C0227CF93DB0CD6106B5349402F251E7ED735'.lower()) self.assertEqual(sha256_pred, '36C265C907F440114D747DA21D2A014D32B5E442D541F183C0EE862F5865FD26'.lower())
def gaia_source(dr=None, flag=None): """ NAME: gaia_source PURPOSE: download the gaia_source files INPUT: dr (int): Gaia DR, example dr=1 flag (int): 0: normal, 1: force to re-download OUTPUT: list of file path HISTORY: 2017-Oct-13 - Written - Henry Leung (University of Toronto) 2017-Nov-26 - Update - Henry Leung (University of Toronto) """ dr = gaia_default_dr(dr=dr) fulllist = [] if dr == 1: # Check if directory exists folderpath = os.path.join(gaia_env(), 'Gaia/gdr1/gaia_source/fits/') urlbase = 'http://cdn.gea.esac.esa.int/Gaia/gdr1/gaia_source/fits/' if not os.path.exists(folderpath): os.makedirs(folderpath) hash_filename = 'MD5SUM.txt' full_hash_filename = os.path.join(folderpath, hash_filename) if not os.path.isfile(full_hash_filename): urllib.request.urlretrieve(urlbase + hash_filename, full_hash_filename) hash_list = np.loadtxt(full_hash_filename, dtype='str').T for j in range(0, 20, 1): for i in range(0, 256, 1): filename = f'GaiaSource_000-0{j:0{2}d}-{i:0{3}d}.fits' urlstr = urlbase + filename fullfilename = os.path.join(folderpath, filename) file_hash = (hash_list[0])[np.argwhere( hash_list[1] == filename)] # Check if files exists if os.path.isfile(fullfilename) and flag is None: checksum = md5_checksum(fullfilename) # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too if checksum != file_hash and len(file_hash) != 0: print(checksum) print(file_hash) print( 'File corruption detected, astroNN attempting to download again' ) gaia_source(dr=dr, flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: # progress bar with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) checksum = md5_checksum(fullfilename) if checksum != file_hash and len(file_hash) != 0: print( 'File corruption detected, astroNN attempting to download again' ) gaia_source(dr=dr, flag=1) print( f'Downloaded Gaia DR{dr} Gaia Source ({(j * 256 + i):d} of {(256 * 20 + 112):d}) ' f'file catalog successfully to {fullfilename}') fulllist.extend([fullfilename]) for i in range(0, 111, 1): filename = f'GaiaSource_000-020-{i:0{3}d}.fits' urlstr = urlbase + filename fullfilename = os.path.join(folderpath, filename) file_hash = (hash_list[0])[np.argwhere(hash_list[1] == filename)] # Check if files exists if os.path.isfile(fullfilename) and flag is None: checksum = md5_checksum(fullfilename) # In some rare case, the hash cant be found, so during checking, check len(file_has)!=0 too if checksum != file_hash and len(file_hash) != 0: print(checksum) print(file_hash) print( 'File corruption detected, astroNN attempting to download again' ) gaia_source(dr=dr, flag=1) else: print(fullfilename + ' was found!') elif not os.path.isfile(fullfilename) or flag == 1: # progress bar with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=urlstr.split('/')[-1]) as t: urllib.request.urlretrieve(urlstr, fullfilename, reporthook=t.update_to) checksum = md5_checksum(fullfilename) if checksum != file_hash and len(file_hash) != 0: print( 'File corruption detected, astroNN attempting to download again' ) gaia_source(dr=dr, flag=1) print( f'Downloaded Gaia DR{dr} Gaia Source ({(20 * 256 + i):d} of {(256 * 20 + 112):d}) file ' f'catalog successfully to {fullfilename}') fulllist.extend([fullfilename]) else: raise ValueError('gaia_source() only supports Gaia DR1 Gaia Source') return fulllist