示例#1
0
    def fetch_state(self, state):
        ''' Fetch data for a single state, returning a tuple of
        (fetched_result, parsed_data)

        If there's no query for the state: return (None, _)
        '''
        logging.debug("Fetching: %s", state)
        res = None

        queries = self.sources.queries_for(state)
        if not queries:
            return res, {}

        results = []
        mapping = self.sources.mapping_for(state)
        for query in queries:
            # TODO: make a better mapping here
            try:
                if query['type'] in ['arcgis', 'json', 'ckan', 'soda']:
                    res = request_and_parse(query['url'], query['params'])
                elif query['type'] in ['csv']:
                    res = request_csv(query['url'],
                                      query['params'],
                                      header=query.get('header', True),
                                      encoding=query.get('encoding'))
                elif query['type'] in ['html']:
                    res = request(query['url'], query['params'])
                elif query['type'] in ['html:soup']:
                    res = request_soup(query['url'], query['params'])
                elif query['type'] in ['pandas', 'xls', 'xlsx']:
                    res = request_pandas(query)
                results.append(res)
            except Exception:
                logging.error("{}: Failed to fetch {}".format(
                    state, query['url']),
                              exc_info=True)
                raise

        processed_results = []
        if state in self.extras:
            processed_results = self.extras[state](results, mapping)
        else:
            for i, result in enumerate(results):
                if queries[i].get('type') == 'arcgis':
                    partial = extract_arcgis_attributes(result, mapping, state)
                else:
                    # This is a guess; getting an unknown top level object
                    partial = extract_attributes(
                        result, queries[i].get('data_path', []), mapping,
                        state)
                processed_results.append(partial)

        data = self._aggregate_state_results(state, processed_results, mapping)
        return results, data
def handle_ca(res, mapping, queries):
    # need to cumsum
    mapped = []
    for query, result in zip(queries, res):
        # extract also maps
        items = extract_attributes(result, query.data_path, mapping, 'CA')
        df = prep_df(
            items,
            mapping).sort_index(na_position='first').drop(columns=TS).cumsum()
        df = df.loc[df.index.notna()]
        add_query_constants(df, query)
        df[TS] = df.index
        mapped.extend(df.to_dict(orient='records'))

    return mapped
def process_source_responses(source, results):
    processed_results = []
    if source.extras:
        processed_results = source.extras(results, source.mapping)
    else:
        for i, result in enumerate(results):
            query = source.queries[i]
            if query.type == 'arcgis':
                partial = extract_arcgis_attributes(result, source.mapping,
                                                    source.name)
            else:
                # This is a guess; getting an unknown top level object
                partial = extract_attributes(result, query.data_path,
                                             source.mapping, source.name)
            processed_results.append(partial)

    data = _aggregate_state_results(source, processed_results)
    return data