示例#1
0
def test_load_from_name():
    """ Test source using force_all config. """

    ckan = RemoteCKAN(url='https://catalog.data.gov')
    ckan.set_destination(ckan_url='http://ckan:5000',
                         ckan_api_key='0602d7ed-1517-40a0-a92f-049d724962df')

    print('Getting harvest source ...')

    name = 'doi-open-data'
    full_hs = ckan.get_full_harvest_source(hs={'name': name})
    ckan.create_harvest_source(data=full_hs)
    assert 'created' in ckan.harvest_sources[name].keys()
    assert ckan.harvest_sources[name]['created']
    assert 'updated' in ckan.harvest_sources[name].keys()
    assert not ckan.harvest_sources[name]['updated']
    assert 'error' in ckan.harvest_sources[name].keys()
    assert not ckan.harvest_sources[name]['error']

    print(ckan.harvest_sources[name])

    # check the force_all config
    cfg = ckan.harvest_sources[name]['ckan_package']['config']
    cfg_data = json.loads(cfg)
    assert type(cfg_data['force_all']) == bool
    assert cfg_data['force_all']
def test_load_from_url():
    """ Test with some previous harvester already saved
        Use a pytest cassette so real requests are not required. 
        We import 3 harvest sources (so they already exists) 
        and then run this test with 6 sources. """

    ckan = RemoteCKAN(url='https://catalog.data.gov')
    ckan.set_destination(ckan_url='http://*****:*****@fdic.gov\r\[email protected]'
    assert expected_email_list in [
        extra['value'] for extra in extras if extra['key'] == 'email_list'
    ]

    extras = ckan.organizations['fcc-gov'].get('extras', [])
    expected_email_list = '[email protected]\r\[email protected]'
    assert expected_email_list in [
        extra['value'] for extra in extras if extra['key'] == 'email_list'
    ]

    assert len(ckan.groups), 1
    assert 'local' in ckan.groups
    assert ckan.groups['local']['display_name'] == 'Local Government'

    print(
        'Finished: {} harvest sources. {} Added, {} already exists, {} failed'.
        format(total, created, updated, errors))

    assert total == len(ckan.harvest_sources)
    assert created == 4
    assert updated == 3
    assert errors == 0
示例#3
0
parser.add_argument(
    "--wait_for_create",
    type=int,
    default=5,
    help="Wait this number of seconds between API calls to prevent timeout")

args = parser.parse_args()

if (args.destination_api_key is None):
    api_key_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                '../../api.key')
    api_key = open(api_key_file).read().rstrip()
    args.destination_api_key = api_key

ckan = RemoteCKAN(url=args.origin_url, user_agent=args.user_agent)
ckan.set_destination(ckan_url=args.destination_url,
                     ckan_api_key=args.destination_api_key)

# define the final list of sources to import (from type o a list)
sources_to_import = []

if args.names is not None:
    # we get a list of names from a file or list of source names
    if os.path.isfile(args.names):
        f = open(args.names)
        names = f.read().splitlines()
        f.close()
    else:
        names = args.names.split(',')

    if args.offset > 0:
        names = names[args.offset:]
示例#4
0
def import_groups(origin_url, user_agent, destination_url, 
                  destination_api_key, groups='ALL', skip_groups=''):
    ckan = RemoteCKAN(url=origin_url, user_agent=user_agent)
    ckan.set_destination(ckan_url=destination_url, ckan_api_key=destination_api_key)

    groups_processed = []
    groups_skipped = []
    not_found = []
    already_in_group = []
    added_to_group = []
    failed_to_add = [] 

    if groups == 'ALL':
        groups = ckan.get_group_list()
    else:
        groups = groups.split(',')

    for group in groups:
        print('Group Found {}'.format(group))

        if group in skip_groups.split(','):
            print('Skipping group')
            groups_skipped.append(group)
            continue

        groups_processed.append(group)
        
        # create this group at destination
        ckan.create_group(group)
        
        # get all datasets from this group and (if exist) add dataset to this group
        packages = ckan.get_datasets_in_group(group_name=group)
        for package in packages:
            name = package['name']
            # if this dataset exists in the new CKAN instance we need to update to add this group
            package = ckan.get_full_package(name_or_id=name, url=destination_url)
            if package is None:
                print('Package not found {}'.format(name))
                not_found.append({'group': group, 'dataset_name': name})
                continue
            
            # check if the groups already exist at the destination package
            if group in [grp['name'] for grp in package.get('groups', [])]:
                print('Group {} already exists for {}'.format(group, name))
                already_in_group.append(package['name'])
                continue
            
            # TODO update the dataset at the new environment to set the group
            package_update_url = f'{destination_url}/api/3/action/package_update'
            print(' ** Updating package {}'.format(name))

            package["groups"].append({'name': group})

            updated, status, error = ckan.request_ckan(url=package_update_url, method='POST', data=package)
            if updated:
                added_to_group.append(package['name'])
            else:
                failed_to_add.append(package['name'])

            print(' ** Updated ** Status {} ** Error {} **'.format(status, error))

    if len(ckan.errors) > 0:
        print('*******\nWITH ERRORS\n*******')
        print('\n\t'.join(ckan.errors))

    print('Datasets not found: {}'.format(len(not_found)))
    for nf in not_found:
        print('\tDataset {} at group {}'.format(nf['dataset_name'], nf['group']))

    print('Final results:')
    ret = {
        "groups_processed": groups_processed,
        "groups_skipped": groups_skipped,
        "not_found": not_found,
        "already_in_group": already_in_group,
        "added_to_group": added_to_group,
        "failed_to_add":failed_to_add 
    }

    print(ret)
    return ret