示例#1
0
def test_urlQueryParser():
    assert anc.urlQueryParser('www.somepage.foo', {
        'foo': 'bar',
        'page': 1
    }) in [
        'www.somepage.foo?foo=bar&page=1', 'www.somepage.foo?page=1&foo=bar'
    ]
示例#2
0
    def catch(self, sensor, osvtype='POE', start=None, stop=None):
        """
        check a server for files

        Parameters
        ----------
        sensor: str or list
            The S1 mission(s):
             - 'S1A'
             - 'S1B'
             - ['S1A', 'S1B']
        osvtype: {'POE', 'RES'}
            the type of orbit files required
        start: str
            the date to start searching for files in format YYYYmmddTHHMMSS
        stop: str
            the date to stop searching for files in format YYYYmmddTHHMMSS

        Returns
        -------
        list
            the URLs of the remote OSV files
        """
        # a dictionary for storing the url arguments
        query = {}

        if osvtype == 'POE':
            query['product_type'] = 'AUX_POEORB'
        elif osvtype == 'RES':
            query['product_type'] = 'AUX_RESORB'
        else:
            raise RuntimeError("osvtype must be either 'POE' or 'RES'")

        if sensor in ['S1A', 'S1B']:
            query['sentinel1__mission'] = sensor
        elif sorted(sensor) == ['S1A', 'S1B']:
            pass
        else:
            raise RuntimeError('unsupported input for parameter sensor')

        # the collection of files to be returned
        collection = []
        # set the defined date or the date of the first existing OSV file otherwise
        # two days are added/subtracted from the defined start and stop dates since the
        # online query does only allow for searching the start time; hence, if e.g.
        # the start date is 2018-01-01T000000, the query would not return the corresponding
        # file, whose start date is 2017-12-31 (V20171231T225942_20180102T005942)
        if start is not None:
            date_start = datetime.strptime(
                start, '%Y%m%dT%H%M%S').strftime('%Y-%m-%dT%H:%M:%S')
        else:
            date_start = '2014-07-31'
        # set the defined date or the current date otherwise
        if stop is not None:
            date_stop = datetime.strptime(
                stop, '%Y%m%dT%H%M%S').strftime('%Y-%m-%dT%H:%M:%S')
        else:
            date_stop = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')

        # append the time frame to the query dictionary
        query['validity_start__gte'] = date_start
        query['validity_stop__lte'] = date_stop
        print('searching for new {} files'.format(osvtype))
        target = urlQueryParser(self.url, query).replace('%3A', ':')
        print(target)
        while target is not None:
            response = requests.get(target, timeout=self.timeout).json()
            remotes = [item['remote_url'] for item in response['results']]
            collection += remotes
            target = response['next']
        if osvtype == 'RES' and self.maxdate('POE', 'stop') is not None:
            collection = [
                x for x in collection
                if self.date(x, 'start') > self.maxdate('POE', 'stop')
            ]
        return collection
示例#3
0
    def catch(self, sensor, osvtype='POE', start=None, stop=None):
        """
        check a server for files

        Parameters
        ----------
        sensor: str or list
            The S1 mission(s):
             - 'S1A'
             - 'S1B'
             - ['S1A', 'S1B']
        osvtype: {'POE', 'RES'}
            the type of orbit files required
        start: str
            the date to start searching for files
        stop: str
            the date to stop searching for files

        Returns
        -------
        list
            the URLs of the remote OSV files
        """
        address, outdir = self._typeEvaluate(osvtype)
        # a dictionary for storing the url arguments
        query = {'page': 1}
        # a list of pages to be searched; will be extended during url readout
        pages = [1]
        
        if sensor in ['S1A', 'S1B']:
            query['sentinel1__mission'] = sensor
        elif sorted(sensor) == ['S1A', 'S1B']:
            pass
        else:
            raise RuntimeError('unsupported input for parameter sensor')
        
        # the collection of files to be returned
        files = []
        # set the defined date or the date of the first existing OSV file otherwise
        # two days are added/subtracted from the defined start and stop dates since the
        # online query does only allow for searching the start time; hence, if e.g.
        # the start date is 2018-01-01T000000, the query would not return the corresponding
        # file, whose start date is 2017-12-31 (V20171231T225942_20180102T005942)
        if start is not None:
            date_start = datetime.strptime(start, '%Y%m%dT%H%M%S')
            date_start = (date_start - timedelta(days=2)).strftime('%Y-%m-%d')
        else:
            date_start = '2014-07-31'
        # set the defined date or the current date otherwise
        if stop is not None:
            date_stop = datetime.strptime(stop, '%Y%m%dT%H%M%S')
            date_stop = (date_stop + timedelta(days=2)).strftime('%Y-%m-%d')
        else:
            date_stop = time.strftime('%Y-%m-%d')
        
        # pattern for scanning urlopen response for links to OSV files
        pattern_url = 'http.*{}'.format(self.pattern)
        
        # append the time frame to the query dictionary
        query['validity_start'] = '{0}..{1}'.format(date_start, date_stop)
        print('searching for new {} files'.format(osvtype))
        # iterate through the url pages and look for files
        while len(pages) > 0:
            # parse the url
            subaddress = urlQueryParser(address, query)
            # read the remote content
            try:
                response = urlopen(subaddress, context=self.sslcontext).read().decode('utf-8')
                print(subaddress)
            except IOError as e:
                raise RuntimeError(e)
            if query['page'] == 1:
                # read all existing pages from the url return of the first page
                pages_str = re.findall('page=[0-9]+', response)
                pages = list(set([int(x.strip('page=')) for x in pages_str]))
            else:
                # delete the page from the list of pages yet to be searched
                del pages[pages.index(query['page'])]
            # list all osv files found on the page
            remotes = sorted(set(re.findall(pattern_url, response)))
            # do a more accurate filtering of the time stamps
            if start is not None:
                remotes = [x for x in remotes if self.date(x, 'stop') >= start]
            if stop is not None:
                remotes = [x for x in remotes if self.date(x, 'start') <= stop]
            # filter files already existing in the files collection
            selection = [x for x in remotes if x not in files]
            if len(selection) >= 0:
                # append the found files to the collection
                files += selection
            # increment the url page
            query['page'] += 1
        # in case the type 'RES' is selected then only return those files covering
        # a time period not covered by any POE file
        if osvtype == 'RES':
            files = [x for x in files if self.date(x, 'start') > self.maxdate('POE', 'stop')]
        return files
    def catch(self, osvtype='POE', start=None, stop=None):
        """
        check a server for files

        Parameters
        ----------
        osvtype: {'POE', 'RES'}
            the type of orbit files required
        start: str
            the date to start searching for files
        stop: str
            the date to stop searching for files

        Returns
        -------
        list
            the URLs of the remote OSV files
        """
        address, outdir = self._typeEvaluate(osvtype)
        # a dictionary for storing the url arguments
        query = {'page': 1}
        # the collection of files to be returned
        files = []
        # set the defined date or the date of the first existing OSV file otherwise
        if start is not None:
            date_start = datetime.strptime(
                start, '%Y%m%dT%H%M%S').strftime('%Y-%m-%d')
        else:
            date_start = '2014-08-22'
        # set the defined date or the current date otherwise
        if stop is not None:
            date_stop = datetime.strptime(stop,
                                          '%Y%m%dT%H%M%S').strftime('%Y-%m-%d')
        else:
            date_stop = time.strftime('%Y-%m-%d')

        # pattern for scanning urlopen response for links to OSV files
        pattern_url = 'http.*{}'.format(self.pattern)

        # append the time frame to the query dictionary
        query['validity_start'] = '{0}..{1}'.format(date_start, date_stop)
        print('searching for new {} files'.format(osvtype))
        # iterate through the url pages and look for files
        while True:
            # parse the url
            subaddress = urlQueryParser(address, query)
            # read the remote content
            try:
                response = urlopen(
                    subaddress, context=self.sslcontext).read().decode('utf-8')
                print(subaddress)
            except IOError as e:
                raise RuntimeError(e)
            # list all osv files found on the page
            remotes = sorted(set(re.findall(pattern_url, response)))
            # do a more accurate filtering of the time stamps
            if start is not None:
                remotes = [x for x in remotes if self.date(x, 'stop') > start]
            # filter files already existing in the files collection
            selection = [x for x in remotes if x not in files]
            # stop the loop if no more files are found on the current url page
            if len(selection) == 0:
                break
            else:
                # append the found files to the collection and increment the url page
                files += selection
                query['page'] += 1
        # in case the type 'RES' is selected then only return those files covering
        # a time period not covered by any POE file
        if osvtype == 'RES':
            files = [
                x for x in files
                if self.date(x, 'stop') > self.maxdate('POE', 'stop')
            ]
        return files