Пример #1
0
def testExtractDates_usingPattern():
    # Standard pattern
    s = Scraper('data/%Y/%m/%d/fits/swap/swap_00174_fd_%Y%m%d_%H%M%S.fts.gz')
    testURL = 'data/2014/05/14/fits/swap/swap_00174_fd_20140514_200135.fts.gz'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    # Not-full repeated pattern
    s = Scraper('data/%Y/fits/swap/swap_00174_fd_%Y%m%d_%H%M%S.fts.gz')
    testURL = 'data/2014/fits/swap/swap_00174_fd_20140514_200135.fts.gz'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
Пример #2
0
def testExtractDates_usingPattern():
    # Standard pattern
    s = Scraper('data/%Y/%m/%d/fits/swap/swap_00174_fd_%Y%m%d_%H%M%S.fts.gz')
    testURL = 'data/2014/05/14/fits/swap/swap_00174_fd_20140514_200135.fts.gz'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    # Not-full repeated pattern
    s = Scraper('data/%Y/fits/swap/swap_00174_fd_%Y%m%d_%H%M%S.fts.gz')
    testURL = 'data/2014/fits/swap/swap_00174_fd_20140514_200135.fts.gz'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
Пример #3
0
def testExtractDates_notSeparators_andSimilar():
    s = Scraper('data/%Y/Jun%b%d_%H%M%S')
    testURL = 'data/2014/JunJun14_200135'
    timeURL = parse_time((2014, 6, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    testURL = 'data/2014/JunMay14_200135'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    # and testing with the month afterwards
    s = Scraper('data/%Y/%dJun%b_%H%M%S')
    testURL = 'data/2014/14JunJun_200135'
    timeURL = parse_time((2014, 6, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
Пример #4
0
def testExtractDates_notSeparators_andSimilar():
    s = Scraper('data/%Y/Jun%b%d_%H%M%S')
    testURL = 'data/2014/JunJun14_200135'
    timeURL = parse_time((2014, 6, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    testURL = 'data/2014/JunMay14_200135'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
    # and testing with the month afterwards
    s = Scraper('data/%Y/%dJun%b_%H%M%S')
    testURL = 'data/2014/14JunJun_200135'
    timeURL = parse_time((2014, 6, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
Пример #5
0
Файл: eve.py Проект: lamby/sunpy
 def _get_time_for_url(self, urls):
     eve = Scraper(BASEURL)
     times = list()
     for url in urls:
         t0 = eve._extractDateURL(url)
         # hard coded full day as that's the normal.
         times.append(TimeRange(t0, t0 + datetime.timedelta(days=1)))
     return times
Пример #6
0
 def _get_time_for_url(self, urls):
     eve = Scraper(BASEURL)
     times = list()
     for url in urls:
         t0 = eve._extractDateURL(url)
         # hard coded full day as that's the normal.
         times.append(TimeRange(t0, t0 + datetime.timedelta(days=1)))
     return times
Пример #7
0
 def _get_time_for_url(self, urls):
     freq = urls[0].split('/')[-1][0:3]  # extract the frequency label
     crawler = Scraper(BASEURL, freq=freq)
     times = list()
     for url in urls:
         t0 = crawler._extractDateURL(url)
         # hard coded full day as that's the normal.
         times.append(TimeRange(t0, t0 + TimeDelta(1*u.day)))
     return times
Пример #8
0
def testExtractDates_notSeparators():
    s = Scraper('data/%Y/%m/swap%m%d_%H%M%S')
    testURL = 'data/2014/05/swap0514_200135'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
Пример #9
0
def testExtractDates_notSeparators():
    s = Scraper('data/%Y/%m/swap%m%d_%H%M%S')
    testURL = 'data/2014/05/swap0514_200135'
    timeURL = parse_time((2014, 5, 14, 20, 1, 35))
    assert s._extractDateURL(testURL) == timeURL
Пример #10
0
    def _get_url_for_timerange(self, timerange):
        """
        Returns a list of urls corresponding to a given time-range.
        """
        result = list()
        # Validate time range srs generated daily since 1996
        cur_year = Time.now().datetime.year
        req_start_year = timerange.start.datetime.year
        req_end_year = timerange.end.datetime.year

        # Return early if both start and end are less than or greater than limits
        if req_start_year <= req_end_year < self.MIN_YEAR \
                or req_end_year >= req_start_year > cur_year:
            return result

        # No point in searching below the min or above max years
        start_year = max(req_start_year, self.MIN_YEAR)
        end_year = min(req_end_year, cur_year)

        # Search for tarballs for all years in the query
        tarball_timerange = TimeRange(f'{start_year}-01-01',
                                      f'{end_year}-12-31 23:59:59.999')
        tarball_urls = dict()
        tarball_scraper = Scraper(self.BASE_URL + '%Y/%Y_SRS.tar.gz')
        tarballs = tarball_scraper.filelist(tarball_timerange)
        max_tarball_year = None
        for tb_url in tarballs:
            date = tarball_scraper._extractDateURL(tb_url)
            year = date.to_datetime().year
            max_tarball_year = year
            tarball_urls[year] = tb_url
            log.debug('SRS tarball found for year %d', year)

        # Create a new time range for the times not covered by tarballs, have to assume tarballs
        # cover a year, and look for individual srs file for this time range.
        srs_urls = dict()
        min_file_year = max_tarball_year if max_tarball_year else start_year
        min_file_date = (datetime(max_tarball_year, 12, 31, 23, 59, 59)
                         if max_tarball_year else datetime(
                             start_year, 1, 1, 0, 0, 0))
        max_file_date = min(timerange.end.datetime, Time.now().datetime)
        if min_file_date < max_file_date:
            file_timerange = TimeRange(f'{min_file_year}-01-01', max_file_date)
            srsfile_scraper = Scraper(self.BASE_URL + '%Y/SRS/%Y%m%dSRS.txt')
            srsfiles = srsfile_scraper.filelist(file_timerange)
            for srs_url in srsfiles:
                date = srsfile_scraper._extractDateURL(srs_url)
                srs_urls[(date.datetime.year, date.datetime.month,
                          date.datetime.day)] = srs_url
                log.debug('SRS file found for year %d', date)

        # Now iterate over all days and if the day is in a year we have a tarball for or a day there
        # is a individual srs file add to the result with corresponding extdict
        for day in timerange.get_dates():
            day_ymd = (int(day.strftime('%Y')), int(day.strftime('%m')),
                       int(day.strftime('%d')))
            extdict = {
                'year': day_ymd[0],
                'month': day_ymd[1],
                'day': day_ymd[2]
            }
            if self.MIN_YEAR <= day_ymd[0] <= cur_year:
                if day_ymd[0] in tarball_urls.keys():
                    result.append((extdict, tarball_urls[day_ymd[0]]))
                elif day_ymd in srs_urls.keys():
                    result.append((extdict, srs_urls[day_ymd]))

        return result