Exemplo n.º 1
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(
            old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        new_list = [x for x in curr_list if x not in old_list]

        ret['start_time'] = get_time_from_filename(new_list[0][0],
                                                   date_extraction_pattern,
                                                   date_pattern)
        ret['end_time'] = get_time_from_filename(
            new_list[len(new_list) - 1][0], date_extraction_pattern,
            date_pattern)

        ret['new_files'] = new_list
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
Exemplo n.º 2
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        new_list = [x for x in curr_list if x not in old_list]

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
Exemplo n.º 3
0
    def _constraints_for_new_request(cls, config):
        """
        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
        @retval constraints dictionary
        """
        old_list = get_safe(config, 'new_data_check') or []

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        #Not exactly the prettiest method, but here goes:
        #old_list comes in as a list of lists: [[]]
        #curr_list comes in as a list of tuples: [()]
        #each needs to be a set of tuples for set.difference to work properly
        #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new
        #files that are available

        curr_set = set(tuple(x) for x in curr_list)
        old_set = set(tuple(x) for x in old_list)

        #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly
        new_list = list(curr_set.difference(old_set))

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0],
                                                   date_extraction_pattern,
                                                   date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[-1][0],
                                                 date_extraction_pattern,
                                                 date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        log.debug('constraints_for_new_request: {0}'.format(ret))

        return ret
Exemplo n.º 4
0
    def _constraints_for_new_request(cls, config):
#        """
#        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
#        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
#        """
#        # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen
#        max_rec = get_safe(config, 'max_records', 1)
#        return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],}

        old_list = get_safe(config, 'new_data_check') or []

        ret = {}
        base_url = get_safe(config,'ds_params.base_url')
        list_pattern = get_safe(config,'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        #Not exactly the prettiest method, but here goes:
        #old_list comes in as a list of lists: [[]]
        #curr_list comes in as a list of tuples: [()]
        #each needs to be a set of tuples for set.difference to work properly
        #set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new
        #files that are available

        curr_set = set(tuple(x) for x in curr_list)
        old_set = set(tuple(x) for x in old_list)

        #new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly
        new_list = list(curr_set.difference(old_set))

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        log.debug('constraints_for_new_request: {0}'.format(ret))

        return ret
Exemplo n.º 5
0
    def _constraints_for_historical_request(cls, config):
        """
        Returns a list of new file names in the given directory
        @param config dictionary of configuration parameters
        @retval list of new file names
        """
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        for x in curr_list:
            curr_time = get_time_from_filename(x[0], date_extraction_pattern,
                                               date_pattern)
            if start_time <= curr_time <= end_time:
                new_list.append(x)

        return {'new_files': new_list}
    def _constraints_for_new_request(cls, config):
        """
        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
        @retval constraints dictionary
        """
        old_list = get_safe(config, "new_data_check") or []

        ret = {}
        base_url = get_safe(config, "ds_params.base_url")
        list_pattern = get_safe(config, "ds_params.list_pattern")
        date_pattern = get_safe(config, "ds_params.date_pattern")
        date_extraction_pattern = get_safe(config, "ds_params.date_extraction_pattern")

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        # Not exactly the prettiest method, but here goes:
        # old_list comes in as a list of lists: [[]]
        # curr_list comes in as a list of tuples: [()]
        # each needs to be a set of tuples for set.difference to work properly
        # set.difference returns a list of tuples that appear in curr_list but not old_list, providing the new
        # files that are available

        curr_set = set(tuple(x) for x in curr_list)
        old_set = set(tuple(x) for x in old_list)

        # new_list = [tuple(x) for x in curr_list if list(x) not in old_list] - removed because it wasn't working properly
        new_list = list(curr_set.difference(old_set))

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config["set_new_data_check"] = curr_list

        # The new_list is the set of new files - these will be processed
        ret["new_files"] = new_list
        ret["start_time"] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret["end_time"] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern)
        ret["bounding_box"] = {}
        ret["vars"] = []

        log.debug("constraints_for_new_request: {0}".format(ret))

        return ret
Exemplo n.º 7
0
    def test_get_time_from_filename(self, basename_mock, re_mock, mktime_mock):
        basename_mock.return_value = 'test_data/ruv'
        retval = MagicMock()
        retval.groups.return_value = ('2012', '06', '06', '12', '00')
        re_mock.return_value = retval

        mktime_mock.return_value = 1338998400.0
        self.assertEqual(get_time_from_filename(file_name='test_data/ruv/RDLm_SEAB_2012_06_06_1200.ruv',
                         date_extraction_pattern='RDLm_SEAB_([\d]{4})_([\d]{2})_([\d]{2})_([\d]{2})([\d]{2}).ruv',
                         date_pattern='%Y %m %d %H %M'), 1338998400.0)
Exemplo n.º 8
0
    def _constraints_for_new_request(cls, config):
#        """
#        Returns a constraints dictionary with 'array_len' and 'count' assigned random integers
#        @param config Dict of configuration parameters - may be used to generate the returned 'constraints' dict
#        """
#        # Make sure the array_len is at least 1 larger than max_rec - so chunking is always seen
#        max_rec = get_safe(config, 'max_records', 1)
#        return {'array_len':npr.randint(max_rec+1,max_rec+10,1)[0],}

        old_list = get_safe(config, 'new_data_check') or []

        ret = {}
        base_url = get_safe(config,'ds_params.base_url')
        list_pattern = get_safe(config,'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)
        # Determine which files are new
        log.warn('curr_list:{0}'.format(curr_list))
        log.warn('old_list:{0}'.format(old_list))

        new_list = [tuple(x) for x in curr_list if list(x) not in old_list]

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[-1][0], date_extraction_pattern, date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        log.warn('constraints_for_new_request: {0}'.format(ret))

        return ret
Exemplo n.º 9
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        new_list = [x for x in curr_list if x not in old_list]

        ret['start_time'] = get_time_from_filename(new_list[0][0], date_extraction_pattern, date_pattern)
        ret['end_time'] = get_time_from_filename(new_list[len(new_list) - 1][0], date_extraction_pattern, date_pattern)

        ret['new_files'] = new_list
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
Exemplo n.º 10
0
    def _constraints_for_new_request(cls, config):
        old_list = get_safe(config, 'new_data_check') or []
        # CBM: Fix this when the DotList crap is sorted out
        old_list = list(
            old_list)  # NOTE that the internal tuples are also DotList objects

        ret = {}
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        curr_list = list_file_info(base_url, list_pattern)

        # Determine which files are new
        new_list = [x for x in curr_list if x not in old_list]

        if len(new_list) is 0:
            raise NoNewDataWarning()

        # The curr_list is the new new_data_check - used for the next "new data" evaluation
        config['set_new_data_check'] = curr_list

        # The new_list is the set of new files - these will be processed
        ret['new_files'] = new_list
        ret['start_time'] = get_time_from_filename(new_list[0][0],
                                                   date_extraction_pattern,
                                                   date_pattern)
        ret['end_time'] = get_time_from_filename(
            new_list[len(new_list) - 1][0], date_extraction_pattern,
            date_pattern)
        ret['bounding_box'] = {}
        ret['vars'] = []

        return ret
Exemplo n.º 11
0
    def _constraints_for_historical_request(cls, config):
        base_url = get_safe(config,'ds_params.base_url')
        list_pattern = get_safe(config,'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        for x in curr_list:
            curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern)
            if start_time <= curr_time <= end_time:
                new_list.append(x)

        return {'new_files':new_list}
Exemplo n.º 12
0
    def _constraints_for_historical_request(cls, config):
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(
            config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        for x in curr_list:
            curr_time = get_time_from_filename(x[0], date_extraction_pattern,
                                               date_pattern)
            if start_time <= curr_time <= end_time:
                new_list.append(x)

        return {'new_files': new_list}
Exemplo n.º 13
0
    def _constraints_for_historical_request(cls, config):
        """
        Returns a list of new file names in the given directory
        @param config dictionary of configuration parameters
        @retval list of new file names
        """
        base_url = get_safe(config, 'ds_params.base_url')
        list_pattern = get_safe(config, 'ds_params.list_pattern')
        date_pattern = get_safe(config, 'ds_params.date_pattern')
        date_extraction_pattern = get_safe(config, 'ds_params.date_extraction_pattern')

        start_time = get_safe(config, 'constraints.start_time')
        end_time = get_safe(config, 'constraints.end_time')

        new_list = []
        curr_list = list_file_info(base_url, list_pattern)

        for x in curr_list:
            curr_time = get_time_from_filename(x[0], date_extraction_pattern, date_pattern)
            if start_time <= curr_time <= end_time:
                new_list.append(x)

        return {'new_files': new_list}