def collect_references_files(): """Retrieve locally CSV files in use for dynamic territories' resources.""" REFERENCES_PATH = references.root if not os.path.exists(REFERENCES_PATH): os.makedirs(REFERENCES_PATH) if current_app.config.get('ACTIVATE_TERRITORIES'): # TERRITORY_DATASETS is a dict of dicts and we want all values # that are resource based to collect related files. territory_classes = [ territory_class for territory_dict in TERRITORY_DATASETS.values() for territory_class in territory_dict.values() if issubclass(territory_class, ResourceBasedTerritoryDataset) ] for territory_class in territory_classes: dataset = Dataset.objects.get(id=territory_class.dataset_id) for resource in dataset.resources: if str(resource.id) != str(territory_class.resource_id): continue filename = resource.url.split('/')[-1] reference_path = references.path(filename) log.info('Found reference: %s', reference_path) if os.path.exists(reference_path): log.info('Reference already downloaded') continue log.info('Downloading from: %s', resource.url) with codecs.open(reference_path, 'w', encoding='utf8') as fd: r = requests.get(resource.url, stream=True) for chunk in r.iter_content(chunk_size=1024): fd.write(chunk.decode('latin-1')) # TODO: detect? log.info('Done')
def compute_territory_dataset(territory, dataset, resource_id): """ Dynamically generate a CSV file about the territory from a national one. The file targeted by the resource MUST be downloaded within the `references` folder with the original name prior to call that view. The GET paramaters `territory_attr` and `csv_column` are used to determine which attribute MUST match the given column of the CSV. """ args = multi_to_dict(request.args) if 'territory_attr' not in args or 'csv_column' not in args: return abort(404) if not hasattr(territory, args['territory_attr']): return abort(400) for resource in dataset.resources: if resource.id == resource_id: break resource_path = references.path(resource.url.split('/')[-1]) match = getattr(territory, args['territory_attr']).encode('utf-8') csvfile_out = StringIO.StringIO() with open(resource_path, 'rb') as csvfile_in: reader = csv.DictReader(csvfile_in, delimiter=str(';')) writer = csv.DictWriter(csvfile_out, fieldnames=reader.fieldnames) writer.writerow(dict(zip(writer.fieldnames, writer.fieldnames))) for row in reader: if row[args['csv_column']].encode('utf-8') == match: writer.writerow(row) csvfile_out.seek(0) # Back to 0 otherwise the file is served empty. attachment_filename = '{territory_name}_{resource_name}.csv'.format( territory_name=territory.name, resource_name=resource.title.replace(' ', '_')) return send_file(csvfile_out, as_attachment=True, attachment_filename=attachment_filename)
def collect_references_files(): """Retrieve locally CSV files in use for dynamic territories' resources.""" REFERENCES_PATH = references.root if not os.path.exists(REFERENCES_PATH): os.makedirs(REFERENCES_PATH) if current_app.config.get('ACTIVATE_TERRITORIES'): from udata.models import TERRITORY_DATASETS for territory_class in TERRITORY_DATASETS.values(): if not issubclass(territory_class, ResourceBasedTerritoryDataset): continue dataset = Dataset.objects.get(id=territory_class.dataset_id) for resource in dataset.resources: if resource.id == territory_class.resource_id: break filename = resource.url.split('/')[-1] reference_path = references.path(filename) if os.path.exists(reference_path): continue with codecs.open(reference_path, 'w', encoding='utf8') as fd: r = requests.get(resource.url, stream=True) for chunk in r.iter_content(chunk_size=1024): fd.write(chunk.decode('latin-1')) # TODO: detect? log.info('Done')