def generate_joint_dataset_and_showcase(wfpfood_url, downloader, folder,
                                        countriesdata):
    """Generate single joint datasets and showcases containing data for all countries.
    """
    title = 'Global Food Prices Database (WFP)'
    logger.info('Creating joint dataset: %s' % title)
    slugified_name = 'wfp-food-prices'

    df = joint_dataframe(wfpfood_url, downloader, countriesdata)

    if len(df) <= 1:
        logger.warning('Dataset "%s" is empty' % title)
        return None, None

    dataset = Dataset({'name': slugified_name, 'title': title})
    dataset.set_maintainer(
        "9957c0e9-cd38-40f1-900b-22c91276154b")  # Orest Dubay
    #    dataset.set_maintainer("154de241-38d6-47d3-a77f-0a9848a61df3")
    dataset.set_organization("3ecac442-7fed-448d-8f78-b385ef6f84e7")

    maxmonth = (100 * df.mp_year + df.mp_month).max() % 100
    dataset.set_dataset_date("%04d-01-01" % df.mp_year.min(),
                             "%04d-%02d-15" % (df.mp_year.max(), maxmonth),
                             "%Y-%m-%d")
    dataset.set_expected_update_frequency("weekly")
    dataset.add_country_locations(sorted(df.adm0_name.unique()))
    dataset.add_tags(tags)

    file_csv = join(folder, "WFPVAM_FoodPrices.csv")
    df.to_csv(file_csv, index=False)
    resource = Resource({
        'name':
        title,
        'description':
        "Word Food Programme – Food Prices  Data Source: WFP Vulnerability Analysis and Mapping (VAM)."
    })
    resource.set_file_type('csv')  # set the file type to eg. csv
    resource.set_file_to_upload(file_csv)
    dataset.add_update_resource(resource)

    showcase = Showcase({
        'name':
        '%s-showcase' % slugified_name,
        'title':
        'Global Food Prices',
        'notes':
        "Interactive data visualisation of WFP's Food Market Prices dataset",
        'url':
        "https://data.humdata.org/organization/wfp#interactive-data",
        'image_url':
        "https://docs.humdata.org/wp-content/uploads/wfp_food_prices_data_viz.gif"
    })
    showcase.add_tags(tags)

    dataset.update_from_yaml()
    dataset['notes'] = dataset[
        'notes'] % 'Global Food Prices data from the World Food Programme covering'
    dataset.create_in_hdx()
    showcase.create_in_hdx()
    showcase.add_dataset(dataset)
    dataset.get_resource().create_datastore_from_yaml_schema(
        yaml_path="wfp_food_prices.yml", path=file_csv)
    logger.info('Finished joint dataset')

    return dataset, showcase
示例#2
0
def make_hdx_entries(start_date, **params):
    logger.info('Adding any datasets created or updated after %s' %
                start_date.date().isoformat())

    # Connect to the database
    connection = pymysql.connect(**params)
    try:
        with connection.cursor() as cursor:
            # Read all countries
            sql = "SELECT * FROM `area`"
            cursor.execute(sql)
            unosatCountryCodes = dict()
            for unosatCountryCode in cursor:
                unosatCountryCodes[unosatCountryCode[
                    'id_area']] = unosatCountryCode['area_iso3']
            # Read a multiple records
            sql = "SELECT * FROM `product` WHERE NOT (GDB_Link LIKE '' AND SHP_Link LIKE '') AND (product_archived IS FALSE) AND (product_created>%s or updated>%s)"
            cursor.execute(sql, (start_date, start_date))
            if not cursor.rowcount:
                raise UNOSATError('No db results found')
            batch = get_uuid()
            for unosatDBEntry in cursor:
                if not unosatDBEntry:
                    raise UNOSATError('Empty row in db!')
                productID = str(unosatDBEntry['id_product'])
                logger.info('Processing UNOSAT product %s' % productID)
                logger.debug(unosatDBEntry)
                id_area = unosatDBEntry['id_area']
                iso3 = unosatCountryCodes[id_area]
                product_glide = unosatDBEntry['product_glide']
                # logger.info('product_glide = %s' % product_glide)
                typetag = product_glide[:2]
                product_description = unosatDBEntry['product_description']
                if '-' in product_glide:
                    glideiso3 = product_glide.split('-')[3]
                    product_description = '**Glide code: %s**  %s' % (
                        product_glide, product_description)
                else:
                    glideiso3 = product_glide[10:13]
                    product_description = '**UNOSAT code: %s**  %s' % (
                        product_glide, product_description)

                if iso3 != glideiso3:
                    raise UNOSATError(
                        'UNOSAT id_area=%s, area_iso3=%s does not match glide iso3=%s'
                        % (id_area, iso3, glideiso3))

                # Dataset variables
                title = unosatDBEntry['product_title']
                slugified_name = slugify(title)
                if len(slugified_name) > 90:
                    slugified_name = slugified_name.replace(
                        'satellite-detected-', '')
                    slugified_name = slugified_name.replace(
                        'estimation-of-', '')
                    slugified_name = slugified_name.replace('geodata-of-',
                                                            '')[:90]
                event_type = standardEventTypesDict[typetag]
                tags = ['geodata']
                if event_type:
                    tags.append(event_type)

                dataset = Dataset({
                    'name': slugified_name,
                    'title': title,
                    'notes': product_description
                })
                dataset.set_maintainer('83fa9515-3ba4-4f1d-9860-f38b20f80442')
                dataset.add_country_location(iso3)
                dataset.add_tags(tags)
                dataset.set_expected_update_frequency('Never')
                dataset.set_dataset_date_from_datetime(
                    unosatDBEntry['product_created'])

                gdb_link = unosatDBEntry['GDB_Link']
                bitsgdb = gdb_link.split('/')
                shp_link = unosatDBEntry['SHP_Link']
                bitsshp = shp_link.split('/')

                resources = [{
                    'name': bitsgdb[len(bitsgdb) - 1],
                    'format': 'zipped geodatabase',
                    'url': gdb_link,
                    'description': 'Zipped geodatabase',
                }, {
                    'name': bitsshp[len(bitsshp) - 1],
                    'format': 'zipped shapefile',
                    'url': shp_link,
                    'description': 'Zipped shapefile',
                }]

                dataset.add_update_resources(resources)
                dataset.update_from_yaml()

                showcase = Showcase({
                    'name':
                    '%s-showcase' % slugified_name,
                    'title':
                    'Static PDF Map',
                    'notes':
                    'Static viewing map for printing.',
                    'url':
                    'https://unosat-maps.web.cern.ch/unosat-maps/%s/%s' %
                    (unosatDBEntry['product_folder'],
                     unosatDBEntry['product_url1']),
                    'image_url':
                    'https://unosat-maps.web.cern.ch/unosat-maps/%s/%s' %
                    (unosatDBEntry['product_folder'],
                     unosatDBEntry['product_img'])
                })
                showcase.add_tags(tags)

                dataset.create_in_hdx(remove_additional_resources=True,
                                      hxl_update=False,
                                      updated_by_script='UNOSAT',
                                      batch=batch)
                showcase.create_in_hdx()
                showcase.add_dataset(dataset)

                with open('publishlog.txt', 'a+') as f:
                    f.write('%s,%s\n' % (productID, dataset.get_hdx_url()))
                    f.close()
    finally:
        connection.close()