Exemplo n.º 1
0
    def _download_and_collect_entries(self,
                                      query_result,
                                      client=None,
                                      path=None,
                                      progress=False):
        if client is None:
            client = VSOClient()

        paths = client.get(query_result, path).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemplo n.º 2
0
 def _download_and_collect_entries(self, query_result, client=None,
         path=None, progress=False):
     if client is None:
         client = VSOClient()
     for block in query_result:
         paths = client.get([block], path).wait(progress=progress)
         for path in paths:
             qr_entry = tables.DatabaseEntry._from_query_result_block(block)
             file_entries = list(
                 tables.entries_from_file(path, self.default_waveunit))
             for entry in file_entries:
                 entry.source = qr_entry.source
                 entry.provider = qr_entry.provider
                 entry.physobs = qr_entry.physobs
                 entry.fileid = qr_entry.fileid
                 entry.observation_time_start =\
                     qr_entry.observation_time_start
                 entry.observation_time_end = qr_entry.observation_time_end
                 entry.instrument = qr_entry.instrument
                 entry.size = qr_entry.size
                 entry.wavemin = qr_entry.wavemin
                 entry.wavemax = qr_entry.wavemax
                 entry.path = path
                 entry.download_time = datetime.utcnow()
                 yield entry
Exemplo n.º 3
0
    def _download_and_collect_entries(self, query_result, client=None,
            path=None, progress=False):
        if client is None:
            client = VSOClient()

        paths = client.get(query_result, path).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemplo n.º 4
0
    def _download_and_collect_entries(self, query_result, **kwargs):

        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE'))

        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))

        if client is None:
            client = VSOClient()

        paths = client.get(query_result, path, methods).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemplo n.º 5
0
    def _download_and_collect_entries(self, query_result, **kwargs):

        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        methods = kwargs.pop('methods', ('URL-FILE_Rice', 'URL-FILE'))

        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))

        if client is None:
            client = VSOClient()

        paths = client.get(query_result, path, methods).wait(progress=progress)

        for (path, block) in zip(paths, query_result):
            qr_entry = tables.DatabaseEntry._from_query_result_block(block)

            if os.path.isfile(path):
                entries = tables.entries_from_file(path, self.default_waveunit)
            elif os.path.isdir(path):
                entries = tables.entries_from_dir(path, self.default_waveunit)
            else:
                raise ValueError('The path is neither a file nor directory')

            for entry in entries:
                entry.source = qr_entry.source
                entry.provider = qr_entry.provider
                entry.physobs = qr_entry.physobs
                entry.fileid = qr_entry.fileid
                entry.observation_time_start = qr_entry.observation_time_start
                entry.observation_time_end = qr_entry.observation_time_end
                entry.instrument = qr_entry.instrument
                entry.size = qr_entry.size
                entry.wavemin = qr_entry.wavemin
                entry.wavemax = qr_entry.wavemax
                entry.path = path
                entry.download_time = datetime.utcnow()
                yield entry
Exemplo n.º 6
0
    def download(self, *query, **kwargs):
        """download(*query, client=sunpy.net.vso.VSOClient(), path=None, progress=False)
        Search for data using the VSO interface (see
        :meth:`sunpy.net.vso.VSOClient.query`). If querying the VSO results in
        no data, no operation is performed. Concrete, this means that no entry
        is added to the database and no file is downloaded. Otherwise, the
        retrieved search result is used to download all files that belong to
        this search result. After that, all the gathered information (the one
        from the VSO query result and the one from the downloaded FITS files)
        is added to the database in a way that each FITS header is represented
        by one database entry.

        """
        if not query:
            raise TypeError('at least one attribute required')
        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))
        if client is None:
            client = VSOClient()
        qr = client.query(*query)
        # don't do anything if querying the VSO results in no data
        if not qr:
            return
        entries = []
        for block in qr:
            paths = client.get([block], path).wait(progress=progress)
            for path in paths:
                qr_entry = tables.DatabaseEntry._from_query_result_block(block)
                file_entries = list(
                    tables.entries_from_file(path, self.default_waveunit))
                for entry in file_entries:
                    entry.source = qr_entry.source
                    entry.provider = qr_entry.provider
                    entry.physobs = qr_entry.physobs
                    entry.fileid = qr_entry.fileid
                    entry.observation_time_start =\
                        qr_entry.observation_time_start
                    entry.observation_time_end = qr_entry.observation_time_end
                    entry.instrument = qr_entry.instrument
                    entry.size = qr_entry.size
                    entry.wavemin = qr_entry.wavemin
                    entry.wavemax = qr_entry.wavemax
                    entry.path = path
                    entry.download_time = datetime.utcnow()
                entries.extend(file_entries)
        dump = serialize.dump_query(and_(*query))
        (dump_exists, ), = self.session.query(
            exists().where(tables.JSONDump.dump == tables.JSONDump(dump).dump))
        if dump_exists:
            # dump already exists in table jsondumps -> edit instead of add
            # update all entries with the fileid `entry.fileid`
            for entry in entries:
                old_entry = self.session.query(tables.DatabaseEntry).filter_by(
                    fileid=entry.fileid).first()
                if old_entry is not None:
                    attrs = [
                        'source', 'provider', 'physobs',
                        'observation_time_start', 'observation_time_end',
                        'instrument', 'size', 'wavemin', 'wavemax',
                        'download_time'
                    ]
                    kwargs = dict((k, getattr(entry, k)) for k in attrs)
                    cmd = commands.EditEntry(old_entry, **kwargs)
                    if self._enable_history:
                        self._command_manager.do(cmd)
                    else:
                        cmd()
        else:
            self.add_many(entries)
            # serialize the query and save the serialization in the database
            # for two reasons:
            #   1. to avoid unnecessary downloading in future calls of
            #      ``fetch``
            #   2. to know whether to add or to edit entries in future calls of
            #      ``download`` (this method)
            self.session.add(tables.JSONDump(dump))
Exemplo n.º 7
0
    def download(self, *query, **kwargs):
        """download(*query, client=sunpy.net.vso.VSOClient(), path=None, progress=False)
        Search for data using the VSO interface (see
        :meth:`sunpy.net.vso.VSOClient.query`). If querying the VSO results in
        no data, no operation is performed. Concrete, this means that no entry
        is added to the database and no file is downloaded. Otherwise, the
        retrieved search result is used to download all files that belong to
        this search result. After that, all the gathered information (the one
        from the VSO query result and the one from the downloaded FITS files)
        is added to the database in a way that each FITS header is represented
        by one database entry.

        """
        if not query:
            raise TypeError('at least one attribute required')
        client = kwargs.pop('client', None)
        path = kwargs.pop('path', None)
        progress = kwargs.pop('progress', False)
        if kwargs:
            k, v = kwargs.popitem()
            raise TypeError('unexpected keyword argument {0!r}'.format(k))
        if client is None:
            client = VSOClient()
        qr = client.query(*query)
        # don't do anything if querying the VSO results in no data
        if not qr:
            return
        entries = []
        for block in qr:
            paths = client.get([block], path).wait(progress=progress)
            for path in paths:
                qr_entry = tables.DatabaseEntry._from_query_result_block(block)
                file_entries = list(
                    tables.entries_from_file(path, self.default_waveunit))
                for entry in file_entries:
                    entry.source = qr_entry.source
                    entry.provider = qr_entry.provider
                    entry.physobs = qr_entry.physobs
                    entry.fileid = qr_entry.fileid
                    entry.observation_time_start =\
                        qr_entry.observation_time_start
                    entry.observation_time_end = qr_entry.observation_time_end
                    entry.instrument = qr_entry.instrument
                    entry.size = qr_entry.size
                    entry.wavemin = qr_entry.wavemin
                    entry.wavemax = qr_entry.wavemax
                    entry.path = path
                    entry.download_time = datetime.utcnow()
                entries.extend(file_entries)
        dump = serialize.dump_query(and_(*query))
        (dump_exists,), = self.session.query(
            exists().where(tables.JSONDump.dump == tables.JSONDump(dump).dump))
        if dump_exists:
            # dump already exists in table jsondumps -> edit instead of add
            # update all entries with the fileid `entry.fileid`
            for entry in entries:
                old_entry = self.session.query(
                    tables.DatabaseEntry).filter_by(fileid=entry.fileid).first()
                if old_entry is not None:
                    attrs = [
                        'source', 'provider', 'physobs',
                        'observation_time_start', 'observation_time_end',
                        'instrument', 'size', 'wavemin', 'wavemax',
                        'download_time']
                    kwargs = dict((k, getattr(entry, k)) for k in attrs)
                    cmd = commands.EditEntry(old_entry, **kwargs)
                    if self._enable_history:
                        self._command_manager.do(cmd)
                    else:
                        cmd()
        else:
            self.add_many(entries)
            # serialize the query and save the serialization in the database
            # for two reasons:
            #   1. to avoid unnecessary downloading in future calls of
            #      ``fetch``
            #   2. to know whether to add or to edit entries in future calls of
            #      ``download`` (this method)
            self.session.add(tables.JSONDump(dump))
Exemplo n.º 8
0
def find_sdo_files(directory, wavelength='', time_limits=None, cadence=12, download=None, double_check='Yes'):
    """***This function no longer work. It will be updated or removed soon.***

    Checks a directory for missing files from downloading AIA images and can check/download the 
    files from the missing time.

    ***This function may need to run several times. It depends on how well the files are downloaded***
    
    Parameters
    ----------
    directory : Str
            A string of the path to the files to be checked.
    
    wavelength : Str
            The wavelength of the files to check. Only important if download = 'yes' or 'auto'. For HMI 
            files then can have 'los_magnetic_field', 'intensity', etc.
            Default: ''

    time_limits : list
            A list of two entries for the start and end time of the observation. To check if any files
            were missed before the first and after the last file you have.
            Default: None

    cadence : Int
            An integer number of seconds that should be the temporal seperation of the files.
            Default: 12

    download : Str
            Indicates whether missing files should be searched for/downloaded. If set to None then there
            will be a prompt to ask, enter 'Yes' or 'No'. Setting to 'auto' will search for the data 
            automatically without user input.
            Default: None

    double_check : Str
            After checking for more files to download check again - without downloading - to see if 
            there are any still missing, e.g. 'Yes' or 'No'. Can also have 'recursive' which keeps checking 
            all files are found in case there is a large time gap in the middle *** BEWARE INFINITIES ***.
            Default: 'Yes'
            
    Returns
    -------
    A list of the files with no friends cadence seconds after them.
    """

    # this function no longer works at the moment
    print("This function no longer work. It will be updated or removed soon.")
    return

    files_list = list(os.listdir(directory))
    files = [ f for f in files_list if f.endswith('.fits')]
    files.sort()
    
    if files == []:
        empty_but_download = input('The folder provided does not appear to have any \'.fits\' files within it. \nDo you want to download within the time range given in the form \"%Y-%m-%d %H:%M:%S\" (this will only work for SDO/AIA files at the moment)? ')
        if empty_but_download == 'Yes':
            client = VSOClient()
            query_response = client.query_legacy(tstart=time_limits[0], tend=time_limits[1], instrument='AIA', wave=wavelength)
            n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading
            #Download the first two from ROB to /tmp folder and wait for download to complete
            results = client.get(query_response[0:n], path=directory, site='rob')
            fs = results.wait()
            still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='No')
            return
        else:
            assert files != [], f'No .fits files in {directory}.' #make sure there are files in the first place

    no_friends = [] #files that do not have a friend within the cadence time after it
    t_of_no_friends = [] #time of files that have no friends
    t_end_of_no_firends = []

    for f in range(len(files)-2): #don't want to look at the last one as it will never have anything after it anyway
        time_0 = datetime.datetime.strptime(files[f][4:19], '%Y%m%d_%H%M%S')
        time_1 = datetime.datetime.strptime(files[f+1][4:19], '%Y%m%d_%H%M%S')
        if time_0 <= time_1 <= time_0 + timedelta(seconds=cadence): #if there is a file <=12s ahead move on
            continue
        else: #if there is not a file <=12s ahead add it to the no friends list
            no_friends.append(files[f])
            t_of_no_friends.append(time_0) 
            t_end_of_no_firends.append(time_1) 

    if (download == None) and (len(t_of_no_friends) > 0 ):  
            download = input('Would you like the times of the missing files checked (yea or nay)? ')
    
            if download in ['No', 'no', 'N', 'n', 'Nope', 'nope', 'Nay', 'nay']:
                download = 'No'
            elif download in ['Yes', 'yes', 'Y', 'y', 'Yip', 'yip', 'Yea', 'yea']:
                download = 'Yes'

    if len(t_of_no_friends) > 0:
        print('There are ', len(t_of_no_friends), ' time intervals of missing files.')

    if (download == 'Yes'):

        client = VSOClient()

        if len(t_of_no_friends) > 0:
            start_times = [t.strftime("%Y-%m-%d %H:%M:%S") for t in t_of_no_friends]
            #search a minute ahead
            end_times = [t.strftime("%Y-%m-%d %H:%M:%S") for t in t_end_of_no_firends]
            for ts, te in zip(start_times, end_times):
                if files[0][0:3] == 'aia':
                    query_response = client.query_legacy(tstart=ts, tend=te, instrument='AIA', wave=wavelength)
                elif files[0][0:3] == 'hmi':
                    query_response = client.query_legacy(tstart=st, tend=te, instrument='HMI', physobs=wavelength)
                n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading

                #Download the first two from ROB to /tmp folder and wait for download to complete
                results = client.get(query_response[0:n], path=directory, site='rob')
                fs = results.wait()

        if time_limits != None:
            time_first = datetime.datetime.strptime(files[0][4:19], '%Y%m%d_%H%M%S')
            time_last = datetime.datetime.strptime(files[-1][4:19], '%Y%m%d_%H%M%S')
        
            time_limits_first = datetime.datetime.strptime(time_limits[0], "%Y-%m-%d %H:%M:%S")
            time_limits_last = datetime.datetime.strptime(time_limits[1], "%Y-%m-%d %H:%M:%S")

            if time_first - time_limits_first >= timedelta(seconds=cadence): #if the diff between the start time given and the first file's starting time 
                print('Checking start time-gap.')
                if files[0][0:3] == 'aia':
                    query_response = client.query_legacy(tstart=time_limits[0], tend=time_first.strftime("%Y-%m-%d %H:%M:%S"), instrument='AIA', wave=wavelength)
                elif files[0][0:3] == 'hmi':
                    query_response = client.query_legacy(tstart=time_limits[0], tend=time_first.strftime("%Y-%m-%d %H:%M:%S"), instrument='HMI', physobs=wavelength)
                n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading
                #Download the first two from ROB to /tmp folder and wait for download to complete
                results = client.get(query_response[0:n], path=directory, site='rob')
                fs = results.wait()
            if time_limits_last - time_last  >= timedelta(seconds=cadence): #if the diff between the end time given and the end file's starting time 
                print('Checking end time-gap.')
                if files[0][0:3] == 'aia':
                    query_response = client.query_legacy(tstart=time_last.strftime("%Y-%m-%d %H:%M:%S"), tend=time_limits[1], instrument='AIA', wave=wavelength)
                elif files[0][0:3] == 'hmi':
                    query_response = client.query_legacy(tstart=time_last.strftime("%Y-%m-%d %H:%M:%S"), tend=time_limits[1], instrument='HMI', physobs=wavelength)
                n = len(query_response) - 1 #will be used to index the list 'query_response' when downloading
                #Download the first two from ROB to /tmp folder and wait for download to complete
                results = client.get(query_response[0:n], path=directory, site='rob')
                fs = results.wait()
            
    duplicates = glob.glob(directory + '*.*.fits') #removes files that downloaded twice
    for each_file_path in duplicates:
        os.remove(each_file_path)

    if no_friends == []: #if there arent any files to check for then don't double check or anything, just stop
        print('All files here!')
        return
    
    if double_check == 'No':
        print(f'Here are files without friends {cadence} seconds ahead of them from directory \n{directory}:')
        print(no_friends)
        print('Please wait a few minutes and try this function again, it depends on the servers sometimes.')
        return no_friends
    elif double_check == 'Yes': #double check to see if we have all the files
        still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='No')
    elif double_check == 'recursive':
        still_no_friends = find_sdo_files(directory, wavelength, time_limits=time_limits, download='Yes', double_check='recursive')
    return still_no_friends