Exemplo n.º 1
0
def create_to_registry(package_id):
        '''
        Create new dataset in registry with the values from OG

        Called by:
        * main()
        Call:
        * get_data_from_url()
        * replace_branch_and_data_steward()
        * replace_regions()
        *
        '''
        with open("Data//fieldsAdded.json") as json_fp:
            add_fields = json.load(json_fp)
        og_data = get_data_from_url(package_id, "open_gov_url")
        for k,v in add_fields.items():
            og_data[k] = add_fields[k]
        #Post to registry
        replace_branch_and_data_steward(og_data)
        replace_regions(og_data)
        default_resource_date_published(og_data)
        reg_site = os.getenv("registry_url")
        registry_key = os.getenv("registry_api_key")
        rckan = RemoteCKAN(reg_site, apikey=registry_key)
        try:
            ret = rckan.call_action("package_create", data_dict=og_data)
        except Exception as e:
            print(e)
            return False
        return True
Exemplo n.º 2
0
    def __init__(self, data_transfer_config, logger):
        super().__init__(data_transfer_config, logger)

        self.from_infra = data_transfer_config['from_source']['located_at']
        self.to_infra = data_transfer_config['to_target']['located_at']

        if 'croupier.nodes.CKAN_dataset' in self.from_infra['type_hierarchy']:
            self.direction = 'download'
            self.ckan_dataset = self.from_infra
            self.ckan_resource = data_transfer_config['from_source'][
                'resource']
        elif 'croupier.nodes.CKAN_dataset' in self.to_infra['type_hierarchy']:
            self.direction = 'upload'
            self.ckan_dataset = self.to_infra
            self.ckan_resource = data_transfer_config['to_target']['resource']
        else:
            logger.error(
                'CKANAPI Data Transfer must have a "CKAN_dataset" as one of its endpoints'
            )
            raise Exception

        self.dataset_info = self.ckan_dataset['dataset_info']
        self.endpoint = self.ckan_dataset['endpoint']
        self.apikey = self.ckan_dataset['credentials']['auth-header']
        self.api = RemoteCKAN(self.endpoint, apikey=self.apikey)
        if not self.dataset_info['package_id']:
            self._find_dataset()

        try:
            self.api.action.site_read()
        except ServerIncompatibleError:
            self.logger.error(
                'Could not connect to CKAN server, non valid endpoint')
Exemplo n.º 3
0
def test_post():
    package_id = "e328838f-3bfc-4d86-9cc5-23de0b549c91"
    apicall = "api/3/action/package_show"
    q_param="?id=" +package_id
    site = os.getenv("registry_url")
    registry_key = os.getenv("registry_api_key")
    ret_as_string = query_with_get(site, apicall, q_param, apikey=registry_key)
    res_as_dict = json.loads(ret_as_string)['result']
    data_as_dict = res_as_dict
    data_as_dict['title'] = res_as_dict['title']  + str(randint(0,99))
    del data_as_dict['id']
    del data_as_dict['revision_id']
    if "aafc_subject" in data_as_dict:
        del data_as_dict['aafc_subject']

    data_as_d = {}
    with open("Data//fromAlexis.json") as json_fp:
        data_as_d = json.load(json_fp)


    # do a post
    registry_key = os.getenv("registry_api_key")
    rckan = RemoteCKAN(site, apikey=registry_key)

    try:
        ret = rckan.call_action("package_create", data_dict=data_as_d)#data_as_dict )

    except Exception as e:
        pass
    #post_to_site(site, "api/3/action/package_create",data_as_dict,registry_key)

    pass
Exemplo n.º 4
0
 def __init__(self):
     self.ua = (
         'waarismijnstemlokaal/1.0 (+https://waarismijnstemlokaal.nl/)')
     self.ckanapi = RemoteCKAN(app.config['CKAN_URL'],
                               apikey=app.config['CKAN_API_KEY'],
                               user_agent=self.ua).action
     self.elections = app.config['CKAN_CURRENT_ELECTIONS']
     self.resources_metadata = self._get_resources_metadata()
Exemplo n.º 5
0
def get_n_post(package_id):
    """
    Called by main()
    Call get_data_from_reg()
    Get an data from registry , modify and post it to OG
    :param package_id:
    :return:
    """
    og_data = get_data_from_reg(package_id)

    # replace
    for k, v in to_replace.items():
        og_data[k] = to_replace[k]

    # Use branch and data steward values to populate metadata_contact field (necessary in sync_with_og script).
    data_steward = og_data["data_steward_email"].strip()
    branch = og_data["organization"]["description"].split('|')
    branch_en = branch[0].strip()
    branch_fr = branch[1].strip()
    to_add = {
        "metadata_contact": {
            "fr":
            "Gouvernement du Canada; Agriculture et Agroalimentaire Canada, " +
            branch_fr + ", " + data_steward,
            "en":
            "Government of Canada; Agriculture and Agri-Food Canada, " +
            branch_en + ", " + data_steward
        }
    }

    for k, v in to_add.items():
        og_data[k] = to_add[k]

    # remove
    for k in to_remove:
        del og_data[k]

    og_site = os.getenv("open_gov_registry_url")
    og_key = os.getenv("open_gov_registry_api_key")
    rckan = RemoteCKAN(og_site, apikey=og_key)

    # First try to create new package.
    # If package create fails, it's possible that the package already exists
    # Try to update the package. If both of these actions fail, return false.
    try:
        ret = rckan.call_action("package_create", data_dict=og_data)
    except Exception as e1:
        print(e1)
        try:
            ret = rckan.call_action("package_update", data_dict=og_data)
        except Exception as e2:
            print(e2)
            return False
        return True

    return True
 def setUp(self):
     self.ckan_portal = RemoteCKAN(self.portal_url, apikey=self.apikey)
     for dataset in self.test_datasets:
         try:
             self.ckan_portal.call_action('dataset_purge',
                                          data_dict={'id': dataset['id']})
         except NotFound:
             continue
     for dataset in self.test_datasets:
         self.ckan_portal.call_action('package_create', data_dict=dataset)
Exemplo n.º 7
0
def export_to_ckan(arguments):
    input_dir = arguments['--input']
    input_dir = input_dir if path.isabs(input_dir) else path.abspath(input_dir)
    ckan_host = arguments['--site']
    organization = arguments['--organization']
    api_key = arguments['--api-key']
    public = arguments['--public']

    if not ckan_host:
        ckan_host = prompt({
            'type': 'input',
            'name': 'ckan_host',
            'message': 'Inform your CKAN API Site: '
        })['ckan_host']

    if not api_key:
        api_key = prompt({
            'type': 'password',
            'name': 'api_key',
            'message': 'Inform your CKAN API Key:'
        })['api_key']

    ckan = RemoteCKAN(ckan_host, apikey=api_key)
    organizations = ckan.call_action("organization_list", {"all_fields": True})

    if not organization:
        organization = prompt({
            'type': 'list',
            'name': 'organization',
            'message': 'Select the organization:',
            'choices': [o['name'] for o in organizations]
        })['organization']

    packages_cache = {}
    selected_org = next(
        filter(lambda o: o['name'] == organization, organizations))
    for meta_file in filter(lambda f: f.endswith("meta.json"),
                            tqdm(os.listdir(input_dir))):

        original_file = meta_file.replace('.meta.json', '')
        original_file = path.join(input_dir, original_file)
        meta_file = path.join(input_dir, meta_file)

        if path.exists(original_file):

            with open(meta_file, 'r', encoding='latin-1') as mfp:
                meta_data = json.load(fp=mfp)

            export_file(ckan, original_file, meta_data, selected_org, public,
                        packages_cache)

        else:
            print(f"{original_file} not found. Deleting meta the data file.")
            os.remove(meta_file)
Exemplo n.º 8
0
def search(query, datatype=None, apikey=apikey):
    """Takes a string input and returns search results matching existing CKAN objects (organisation, dataset, resource, user).
    
    Arguments:
    query -- a single search term 
             For 'resource' datatypes this defaults to searching the resource name. 
             Additional arguments can be added using the following syntax: 'query1 field2:query2'
             Valid resource fields can be used as field terms for the search.
    datatype -- organisation, project, dataset, resource, user (default None)
    apikey -- valid CKAN API key (default None)
              Private datasets will only be shown to authorised API keys 
    """
    # Create empty list object to contain API call results
    d = []
    # Get datatype if not specified by user
    if datatype == None:
        datatype = input(
            'Are you looking for an organisation, project, dataset, resource or user?\n\n'
        ).lower().strip()
    # Check for datatype organisation or project
    if datatype == 'organisation' or datatype == 'project':
        d = RemoteCKAN(url, apikey).action.organization_autocomplete(q=query)
    # Check for dataytpe dataset
    elif datatype == 'dataset':
        resources = RemoteCKAN(
            url, apikey).action.package_search(q=query)['results']
        for r in range(len(resources)):
            d.append({k: resources[r][k] for k in ('name', 'title')})
    # Check for datatype resource
    elif datatype == 'resource':
        query = ''.join(['name:', query])
        resources = RemoteCKAN(
            url,
            apikey).action.resource_search(query=query.split(" "))['results']
        for r in range(len(resources)):
            d.append({
                k: resources[r][k]
                for k in ('description', 'format', 'id', 'last_modified',
                          'name', 'package_id', 'revision_id')
            })
    # Check for datatype user
    elif datatype == 'user':
        d = RemoteCKAN(url, apikey).action.user_autocomplete(q=query)
    else:
        print(
            'Please try a different search query and type a valid data type. This can be an organisation, project, dataset, resource or user.\n'
        )
    # Format results dataframe to be returned
    if len(d) == 0:
        print('Cannot find %s for this search term' % datatype)
    elif len(d[0]) > len(d) > 0:
        return (pd.DataFrame(d).T)
    elif len(d) > 0:
        return (pd.DataFrame(d))
    def __call__(self):
        endpoint = getattr(self.api.action, self.action)
        metadata = {}
        for item_id in set(self.entity_ids):
            try:
                metadata[item_id] = endpoint(id=item_id)
            except errors.NotFound:
                url = f"action={self.action} id={item_id}"
                logging.error(f"CKAN API NotFound error: {url}")
            RemoteCKAN.close(self.api)

        return metadata
Exemplo n.º 10
0
def get_data_from_url(package_id, url):
    #site = os.getenv("registry_url")
    site = os.getenv(url)
    rckan = RemoteCKAN(site)

    data_as_d = {"id":package_id}
    try:
        ret = rckan.call_action("package_show", data_dict=data_as_d)#data_as_dict )
    except Exception as e:
    # if no data exists yet, return empty
        ret = []

    return ret
Exemplo n.º 11
0
    def __init__(self, url, apikey=None):

        self.apikey = apikey
        self.remote = RemoteCKAN(url, apikey=self.apikey)
        try:
            res = self.call_action('site_read')
        except Exception:
            raise CkanReadError()
        # else:
        logger.info('Open CKAN connection with api key: {}'.format(apikey))
        if not res:
            self.close()
            raise CkanApiError()
Exemplo n.º 12
0
def scidm_resources_download(token, package_id):
    scidm = RemoteCKAN('https://scidm.nchc.org.tw', apikey=token)

    # 遍尋包含的資料
    name_list = []
    url_list = []
    for data in scidm.call_action('package_show',
                                  {'id': package_id})['resources']:
        name_list.append(data['name'])
        url_list.append(data['url'])

    download_data = pd.DataFrame({'name': name_list, 'url': url_list})

    return download_data
Exemplo n.º 13
0
def show(name, datatype=None, apikey=apikey):
    """Takes a string input and returns an existing CKAN object (organisation, dataset or resource).
    
    Arguments:
    name -- valid name or id of CKAN data object. 
            For 'resource' datatypes this must be the 36 digit 'id'.       
    datatype -- organisation, project, dataset or resource (default None)
    apikey -- a valid CKAN API key. Private datasets will only be shown to authorised API keys 
                (default None)
    """
    d = {}  #create empty dict object to contain API call results
    if datatype == None:
        datatype = input(
            'Is this an organisation, a project, a dataset or a resource?\n\n'
        ).lower().strip()

    if datatype == 'organisation' or datatype == 'project':
        if name == 'all':
            d = RemoteCKAN(url, apikey).action.organization_list()
        else:
            try:
                d = RemoteCKAN(url, apikey).action.organization_show(
                    id=name,
                    include_datasets=True,
                    include_groups=False,
                    include_tags=False,
                    include_followers=False,
                    include_users=False)
            except Exception:
                print(
                    'This is neither a valid organisation nor a valid project')
    elif datatype == 'dataset':
        try:
            d = RemoteCKAN(url, apikey).action.package_show(id=name)
        except Exception:
            print('This is not a valid dataset')
    elif datatype == 'resource':
        try:
            d = RemoteCKAN(url, apikey).action.resource_show(id=name)
        except Exception:
            print(
                'This is not a valid resource. Check that you are using the resource id, not the name, and try again.'
            )
    else:
        print(
            'Oops. Use a valid name and type a valid data type. This can be an organisation, project, dataset or resource.'
        )

    if len(d) > 0: return pd.Series(d)
Exemplo n.º 14
0
    def save(self, *args, **kwargs):
        from ckanapi import RemoteCKAN
        configuracao_sistema = ConfiguracaoSistema.objects.first()
        ckan = RemoteCKAN(configuracao_sistema.url_ckan,
                          apikey=configuracao_sistema.token_ckan)

        try:
            if not self.id:
                # Criando a Organização no CKAN
                retorno = ckan.action.organization_create(
                    name=self.slug,
                    title=self.nome,
                    description=self.descricao,
                    image_url=self.url_logomarca)

                self.id_ckan = retorno.get('id')
            else:
                # Atualizando a Organização no CKAN
                retorno = ckan.action.organization_patch(
                    id=self.id_ckan,
                    name=self.slug,
                    title=self.nome,
                    description=self.descricao,
                    image_url=self.url_logomarca)

                self.id_ckan = retorno.get('id')
        except:
            pass

        super(Organizacao, self).save(*args, **kwargs)
Exemplo n.º 15
0
def add_groups(new_hri_docs, ckan_source="https://hri.dataportaali.com/data"):

    group_map = cats_to_groups_map()

    c = RemoteCKAN(ckan_source, user_agent='ckanapiexample/1.0')
    groups = c.action.group_list(all_fields=True)
    groups_data = dict(
        ((g['name'], g) for g in groups)
        )

    yes_groups = set()
    no_groups = set()

    for doc in new_hri_docs:

        cats = get_cat(doc)
        doc_groups = []

        for cat in cats:

            gdata = groups_data.get(group_map.get(cat))

            if gdata:
                doc_groups.append(gdata)

        if doc_groups:
            doc['groups'] = doc_groups
Exemplo n.º 16
0
def package_search(url, organization):

    obj = {'api': 'package_search', 'count': 0, 'results': []}

    try:
        apiserver = RemoteCKAN(url)

        doLoop = True
        pos = 0
        count = 100
        while doLoop:
            rs = apiserver.action.package_search(
                q="organization:{}".format(organization),
                start=pos,
                rows=count)
            if rs:
                n = len(rs['results'])
                if n > 0:
                    obj['results'].extend(rs['results'])
                    pos += n
                else:
                    doLoop = False

            else:
                doLoop = False

        obj['count'] = len(obj['results'])

    except Exception as e:
        print('package_search ERROR:', e)

    return obj
Exemplo n.º 17
0
def getMetadata(key, value, id):
    try:
        ckan = RemoteCKAN(key, user_agent=ua)
        show = ckan.action.package_show(id=value)
        writeMeta(show, id, value)
    except:
        print('\n Terjadi kesalahan')
Exemplo n.º 18
0
    def _handle_messages(self, config, messages):
        self.log.debug(f'{self.group_name} | reading {len(messages)} messages')
        ckan_instance = self._job_ckan(config=config)
        server_url = ckan_instance.definition.get('url')
        api_key = ckan_instance.definition.get('key')
        ckan_remote = RemoteCKAN(server_url, apikey=api_key)
        count = 0
        records = []
        topic = None
        for msg in messages:
            topic = msg.topic
            schema = msg.schema
            if schema != self._schemas.get(topic):
                self.log.info(f'{self._id} Schema change on {topic}')
                self._schemas[topic] = schema
                fields, definition_names = extract_fields_from_schema(schema)
                fields = prepare_fields_for_resource(fields, definition_names)
                self._topic_fields[topic] = fields
            else:
                self.log.debug('Schema unchanged.')
            records.append(msg.value)
            resource = self.submit_artefacts(topic, schema, ckan_remote)
            count += 1

        if resource:
            self._create_resource_in_datastore(resource, ckan_remote)
            self.send_data_to_datastore(self._topic_fields[topic], records,
                                        resource, ckan_remote)
        self.log.info(f'processed {count} {topic} docs')
Exemplo n.º 19
0
def main():
    ini_config = ConfigParser()
    ini_config.read('geogratis.ini')
    remote_ckan_url = ini_config.get('ckan', 'ckan.url')

    factory = MetadataDatasetModelGeogratisFactory()

    # Create CKAN API connector to the portal
    ckan_portal = RemoteCKAN(
        remote_ckan_url, user_agent='converter/1.0 http://open.canada.ca/data')

    # Page through the datasets on

    session = connect_to_database()
    last_id = 0

    try:
        while True:
            ckan_records = ckan_portal.action.package_search(
                q=
                'extras_collection:geogratis AND extras_org_title_at_publication:"Natural Resources Canada"',
                rows=100,
                start=last_id)
            if not ckan_records:
                break
            else:
                for r in ckan_records['results']:
                    rp = session.query(GeogratisRecord).filter(
                        GeogratisRecord.uuid == r['name']).all()
                    if not rp:
                        print r['name']
            last_id += 100
    except Exception, e:
        print >> stderr, e.message
        pass
Exemplo n.º 20
0
 def test_resource_upload_unicode_param(self):
     uname = b't\xc3\xab\xc3\x9ft resource'.decode('utf-8')
     with RemoteCKAN(TEST_CKAN) as ckan:
         res = ckan.call_action(
             'test_upload', {'option': uname},
             files={'upload': StringIO(NUMBER_THING_CSV)})
     self.assertEqual(res.get('option'), uname)
    def ckan_api_staging(self):
        user_agent = None
        CKAN_API_KEY = "put_api_key"
        CKAN_URL = os.environ.get("CKAN_URL",
                                  "https://staging.basedosdados.org")

        return RemoteCKAN(CKAN_URL, user_agent=user_agent, apikey=CKAN_API_KEY)
def get_ckan_record(record_id):
    """
    Retrieve a CKAN dataset record from a remote CKAN portal
    :param record_id: Unique Identifier for the dataset - For Open Canada, these are always UUID's
    :return: The CKAN package, or an empty dict if the dataset could not be retrieved
    """

    remote_ckan_url = Config.get('ckan', 'remote_url')
    user_agent = Config.get('web', 'user_agent')
    with RemoteCKAN(remote_ckan_url, user_agent=user_agent) as ckan_instance:
        package_record = {}
        try:
            package_record = ckan_instance.action.package_show(id=record_id)

        except NotFound:
            # This is a new record!
            logger.info(
                'get_ckan_record(): Cannot find record {0}'.format(record_id))
        except requests.exceptions.ConnectionError as ce:
            logger.error(
                'get_ckan_record(): Fatal connection error {0}'.format(
                    ce.message))
            exit(code=500)

        return package_record
Exemplo n.º 23
0
    def portal_update_worker(self, source):
        """
        a process that accepts package ids on stdin which are passed to
        the package_show API on the remote CKAN instance and compared
        to the local version of the same package.  The local package is
        then created, updated, deleted or left unchanged.  This process
        outputs that action as a string 'created', 'updated', 'deleted'
        or 'unchanged'
        """
        registry = RemoteCKAN(source)
        portal = LocalCKAN()
        now = datetime.now()

        for package_id in iter(sys.stdin.readline, ''):
            try:
                data = registry.action.package_show(id=package_id.strip())
                source_pkg = data['result']
            except NotAuthorized:
                source_pkg = None

            _trim_package(source_pkg)

            if source_pkg:
                # treat unpublished packages same as deleted packages
                if not source_pkg['portal_release_date'] or isodate(
                        source_pkg['portal_release_date'], None) > now:
                    source_pkg = None

            try:
                # don't pass user in context so deleted packages
                # raise NotAuthorized
                target_pkg = portal.call_action('package_show',
                    {'id':package_id.strip()}, {})
            except (NotFound, NotAuthorized):
                target_pkg = None

            _trim_package(target_pkg)

            if target_pkg is None and source_pkg is None:
                result = 'unchanged'
            elif target_pkg is None:
                # CREATE
                portal.action.package_create(**source_pkg)
                result = 'created'
            elif source_pkg is None:
                # DELETE
                portal.action.package_delete(id=package_id.strip())
                result = 'deleted'
            elif source_pkg == target_pkg:
                result = 'unchanged'
            else:
                # UPDATE
                portal.action.package_update(**source_pkg)
                result = 'updated'

            sys.stdout.write(result + '\n')
            try:
                sys.stdout.flush()
            except IOError:
                break
def delete_ckan_record(package_id):
    """
    Remove a dataset and its associated resource from CKAN
    :param package_id:
    :return: Nothing
    """

    # First, verify and get the resource ID
    package_record = get_ckan_record(package_id)
    if len(package_record) == 0:
        logger.warn("delete_ckan_record(): cannot find record ID {0}".format(
            package_id))
        return

    # Get rid of the resource
    remote_ckan_url = Config.get('ckan', 'remote_url')
    remote_ckan_api = Config.get('ckan', 'remote_api_key')
    user_agent = Config.get('web', 'user_agent')

    with RemoteCKAN(remote_ckan_url,
                    user_agent=user_agent,
                    apikey=remote_ckan_api) as ckan_instance:
        try:
            delete_blob(
                ckan_container, 'resources/{0}/{1}'.format(
                    package_record['resources'][0]['id'],
                    package_record['resources'][0]['name'].lower()))
            ckan_instance.action.package_delete(id=package_record['id'])
            ckan_instance.action.dataset_purge(id=package_record['id'])
            logger.info("Deleted expired CKAN record {0}".format(
                package_record['id']))
        except Exception as ex:
            logger.error("delete_ckan_record(): {0}".format(ex.message))
Exemplo n.º 25
0
    def __init__(self, datasets_dir=None):
        """Default constructor that initializes an instance of CKAN.

        Besides the ckan instance, initializes a dict to register the paths of
        downloaded data in the following form
          {
            "packageX": {
               "resourceX1": "path to the file",
               ...
            }
          }

        Args:
            datasets_dir - the path of the local directory where datasets
                           (packages) should be saved. Defaults to the value of
                           the DATASETS environment variable.
        """
        if datasets_dir:
            self.datasets_dir = datasets_dir
        else:
            try:
                self.datasets_dir = os.environ['DATASETS']
            except KeyError:
                current_dir = Path().absolute()
                self.datasets_dir = str(current_dir.parent) + '/datasets'

        self.ckan = RemoteCKAN(CvmClient.CVM_URL)
        self.local_files = {}
Exemplo n.º 26
0
class Action(ActionRuntime):

    category = "Data Sourcing"

    required_config = """
        url:     # URL for the CKAN endpoint
        action:  # CKAN action to perform
    """

    optional_config = """
        ignore_not_found: False     # Ignore collection not found errors
    """

    def on_start(self, config):
        self.ckan = RemoteCKAN(config["url"])

    def on_input(self, item):
        action = self.config["action"]
        args = self.config.get("arguments", {})
        try:
            data = self.ckan.call_action(action, args)
        except errors.NotFound:
            if self.config["ignore_not_found"]:
                return
            else:
                raise
        if isinstance(data, list):
            for new_item in data:
                self.put(new_item)
        else:
            self.put(data)
Exemplo n.º 27
0
def get_ckan_record(record_id):
    """
    Retrieve a CKAN dataset record from a remote CKAN portal
    :param record_id: Unique Identifier for the dataset - For Open Canada, these are always UUID's
    :return: The CKAN package, or an empty dict if the dataset could not be retrieved
    """

    remote_ckan_url = Config.get('ckan', 'remote_url')
    user_agent = Config.get('web', 'user_agent')
    with RemoteCKAN(remote_ckan_url, user_agent=user_agent) as ckan_instance:
        package_record = {}
        try:
            package_record = ckan_instance.action.package_show(id=record_id)

        except NotFound:
            # This is a new record!
            cprint('Record {0} does not exist'.format(record_id), 'yellow')
        except requests.exceptions.ConnectionError as ce:
            cprint('get_ckan_record(): Fatal connection error {0}'.format(
                ce.message),
                   'red',
                   attrs=['blink'])
            exit(code=500)
        except CKANAPIError as ne:
            cprint(
                'get_ckan_record(): Unexpected error {0}'.format(ne.message),
                'yellow')

        return package_record
Exemplo n.º 28
0
def main():
    ini_config = ConfigParser()
    ini_config.read('geogratis.ini')
    remote_ckan_url = ini_config.get('ckan', 'ckan.url')

    args = argparser.parse_args()
    factory = MetadataDatasetModelGeogratisFactory()

    now_str = datetime.now().strftime('%Y-%m-%dT%H:%M:%S.000Z')

    # Create CKAN API connector to the portal
    ckan_portal = RemoteCKAN(
        remote_ckan_url, user_agent='converter/1.0 http://open.canada.ca/data')

    # Potentially doing a VERY large ORM query. If we don't limit the read, then SQLAlchemy will try to pull
    # everything into memory. Therefore the query must be paged. Paging requires keeping track of the sequential
    # record ID's

    session = connect_to_database()
    last_id = 0
    scan_date = None
    setting = get_setting('last_conversion_run')
    if args.since != '':
        try:
            scan_date = datetime.fromtimestamp(
                time.mktime(time.strptime(args.since, '%Y-%m-%d')))
        except ValueError:
            logging.error("Incorrect since date format. Use YYYY-MM-DD")
            session.close()
            exit()
        except Exception, e:
            logging.error(e.message)
            session.close()
            exit()
Exemplo n.º 29
0
 def __init__(self, config):
     self.c = config
     self.registry = RemoteCKAN(config.api_url, apikey=config.api_key)
     with urllib.request.urlopen('https://raw.githubusercontent.com/depositar/' + \
                                 'ckanext-data-depositario/master/ckanext/' + \
                                 'data_depositario/schemas/dataset.yaml') as url:
         self.schema = yaml.load(url.read())
Exemplo n.º 30
0
def get_package_parameter(site, package_id, parameter, API_key=None):
    # Some package parameters you can fetch from the WPRDC with
    # this function are:
    # 'geographic_unit', 'owner_org', 'maintainer', 'data_steward_email',
    # 'relationships_as_object', 'access_level_comment',
    # 'frequency_publishing', 'maintainer_email', 'num_tags', 'id',
    # 'metadata_created', 'group', 'metadata_modified', 'author',
    # 'author_email', 'state', 'version', 'department', 'license_id',
    # 'type', 'resources', 'num_resources', 'data_steward_name', 'tags',
    # 'title', 'frequency_data_change', 'private', 'groups',
    # 'creator_user_id', 'relationships_as_subject', 'data_notes',
    # 'name', 'isopen', 'url', 'notes', 'license_title',
    # 'temporal_coverage', 'related_documents', 'license_url',
    # 'organization', 'revision_id'
    try:
        ckan = RemoteCKAN(site, apikey=API_key)
        metadata = ckan.action.package_show(id=package_id)
        desired_string = metadata[parameter]
        #print("The parameter {} for this package is {}".format(parameter,metadata[parameter]))
    except:
        raise RuntimeError(
            "Unable to obtain package parameter '{}' for package with ID {}".
            format(parameter, package_id))

    return desired_string
Exemplo n.º 31
0
    def copy_datasets(self, remote, package_ids=None):
        """
        a process that accepts package ids on stdin which are passed to
        the package_show API on the remote CKAN instance and compared
        to the local version of the same package.  The local package is
        then created, updated, deleted or left unchanged.  This process
        outputs that action as a string 'created', 'updated', 'deleted'
        or 'unchanged'
        """
        if self.options.push_apikey and not self.options.fetch:
            registry = LocalCKAN()
            portal = RemoteCKAN(remote, apikey=self.options.push_apikey)
        elif self.options.fetch:
            registry = RemoteCKAN(remote)
            portal = LocalCKAN()
        else:
            print "exactly one of -f or -a options must be specified"
            return

        now = datetime.now()

        if not package_ids:
            package_ids = iter(sys.stdin.readline, '')

        for package_id in package_ids:
            package_id = package_id.strip()
            reason = None
            target_deleted = False
            try:
                source_pkg = registry.action.package_show(id=package_id)
            except NotAuthorized:
                source_pkg = None
            except (CKANAPIError, urllib2.URLError), e:
                sys.stdout.write(json.dumps([package_id, 'source error',
                    unicode(e.args)]) + '\n')
                raise
            if source_pkg and source_pkg['state'] == 'deleted':
                source_pkg = None

            if source_pkg and source_pkg['type'] != 'dataset':
                # non-default dataset types ignored
                source_pkg = None

            _trim_package(source_pkg)

            if source_pkg and not self.options.mirror:
                # treat unpublished packages same as deleted packages
                if not source_pkg['portal_release_date']:
                    source_pkg = None
                    reason = 'release date not set'
                elif isodate(source_pkg['portal_release_date'], None) > now:
                    source_pkg = None
                    reason = 'release date in future'

            try:
                target_pkg = portal.call_action('package_show',
                    {'id':package_id})
            except (NotFound, NotAuthorized):
                target_pkg = None
            except (CKANAPIError, urllib2.URLError), e:
                sys.stdout.write(json.dumps([package_id, 'target error',
                    unicode(e.args)]) + '\n')
                raise
Exemplo n.º 32
0
 def test_good_oldstyle(self):
     ckan = RemoteCKAN(TEST_CKAN)
     self.assertEqual(
         ckan.action.organization_list(),
         ['aa', 'bb', 'cc'])
     ckan.close()
Exemplo n.º 33
0
    def post(self, request, publisher_id):
        user = request.user.organisationuser
        iati_user_id = user.iati_user_id
        publisher = Publisher.objects.get(pk=publisher_id)

        source_url = request.data.get('source_url', None)

        if not source_url:
            raise exceptions.APIException(detail="no source_url provided")

        user = request.user
        organisationuser = user.organisationuser
        api_key = organisationuser.iati_api_key
        client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)

        # TODO: should this be the name? - 2017-02-20
        source_name = '{}-organisations'.format(publisher.name)

        # get all published organisations, except for the ones that are just
        # modified
        organisations = Organisation.objects.filter(
            ready_to_publish=True,
            publisher=publisher
        )

        try:
            orgList = client.call_action('organization_list_for_user', {})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't get organisation list for user".format(
                    iati_user_id
                ))

        primary_org_id = orgList[0]['id']

        try:
            # sync main datasets to IATI registry
            registry_dataset = client.call_action('package_create', {
                "resources": [
                    {"url": source_url}
                ],
                "name": source_name,
                "filetype": "organisation",
                "date_updated": datetime.now().strftime('%Y-%m-%d %H:%M'),
                "organisation_count": organisations.count(),
                "title": source_name,
                "owner_org": primary_org_id,
                "url": source_url,
            })

        except Exception as e:
            # try to recover from case when the dataset already exists (just
            # update it instead)

            old_package = client.call_action('package_show', {
                "name_or_id": source_name,
            })

            if not old_package:
                raise exceptions.APIException(
                    detail="Failed publishing dataset"
                )

            registry_dataset = client.call_action('package_update', {
                "id": old_package.get('id'),
                "resources": [
                    {"url": source_url}
                ],
                "name": source_name,
                "filetype": "organisation",
                "date_updated": datetime.now().strftime('%Y-%m-%d %H:%M'),
                "organisation_count": organisations.count(),
                "title": source_name,
                "owner_org": primary_org_id,
                "url": source_url,
            })

        # 0. create_or_update Dataset object
        dataset = Dataset.objects.get(
            filetype=2,
            publisher=publisher,
            added_manually=True,
        )

        dataset.iati_id = registry_dataset['id']
        dataset.name = source_name
        dataset.title = source_name
        dataset.source_url = source_url
        dataset.is_parsed = False
        dataset.save()

        #  update the affected organisations flags
        organisations.update(
            published=True, modified=False, ready_to_publish=True)

        # remove the old datasets from the registry
        # TODO: query the registry to remove a dataset - 2017-01-16
        # TODO: remove old datasets locally as well - 2017-01-16

        # TODO: Or just ask the user to remove the old datasets by hand?
        # - 2017-02-20

        # return Dataset object
        serializer = DatasetSerializer(dataset, context={'request': request})
        return Response(serializer.data)
Exemplo n.º 34
0
    def post(self, request):

        # TODO: If verifying for the first time, OrganisationGroup and
        # OrganisationAdminGroup don't exist yet. - 2016-10-25

        # publisher = Publisher.objects.get(pk=publisher_id)
        # group = OrganisationAdminGroup.objects.get(publisher_id=publisher_id)

        user = request.user.organisationuser
        api_key = request.data.get('apiKey')
        user_id = request.data.get('userId')

        if not api_key or not user_id:
            raise exceptions.ParseError(
                detail="apiKey or userId not specified")

        client = RemoteCKAN(settings.CKAN_URL, apikey=api_key)

        try:
            client.call_action('user_show', {
                "id": user_id,
                "include_datasets": True,
            })
        except BaseException:
            raise exceptions.APIException(
                detail="user with id {} not found".format(user_id))

        # print('got user')
        # print(result)

        try:
            orgList = client.call_action('organization_list_for_user', {})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't get organisation list for user".format(user_id))

        if not len(orgList):
            raise exceptions.APIException(
                detail="This user has no organisations yet".format(user_id))

        primary_org_id = orgList[0]['id']

        try:
            primary_org = client.call_action(
                'organization_show', {"id": primary_org_id})
        except BaseException:
            raise exceptions.APIException(
                detail="Can't call organization_show for organization with id\
                        {}".format(primary_org_id))
            return Response(status=401)

        # print('got primary_org')
        # print(primary_org)

        if not primary_org:
            raise exceptions.APIException(
                detail="Can't call organization_show for organization with id\
                        {}".format(primary_org_id))

        primary_org_iati_id = primary_org.get('publisher_iati_id')
        publisher_org = get_or_none(Organisation,
                                    organisation_identifier=primary_org_iati_id
                                    )

        if not publisher_org:
            raise exceptions.APIException(
                detail="publisher_iati_id of {} not found in Organisation\
                        standard, correct this in the IATI registry".format(
                            primary_org_iati_id
                        )
                    )

        # TODO: add organisation foreign key - 2016-10-25
        publisher = Publisher.objects.update_or_create(
            iati_id=primary_org_id,
            publisher_iati_id=primary_org_iati_id,
            defaults={
                "name": primary_org.get('name'),
                "display_name": primary_org.get('display_name'),
                "organisation": publisher_org,
            }
        )

        organisation_group = OrganisationGroup.objects.get_or_create(
            publisher=publisher[0],
            defaults={
                "name": "{} Organisation Group".format(primary_org.get('name'))
            }
        )
        organisation_group[0].organisationuser_set.add(user)

        if publisher[1]:  # has been created
            organisation_admin_group = OrganisationAdminGroup.objects.\
                    get_or_create(
                        publisher=publisher[0],
                        defaults={
                            "owner": user,
                            "name": "{} Organisation Admin Group".format(
                                primary_org.get('name')
                            ),
                        }
                    )
        else:  # already exists
            organisation_admin_group = OrganisationAdminGroup.objects.\
                    get_or_create(
                        publisher=publisher[0],
                        defaults={
                            "name": "{} Organisation Admin Group".format(
                                primary_org.get('name')
                            ),
                        }
                    )
        organisation_admin_group[0].organisationuser_set.add(user)

        user.iati_api_key = api_key
        user.iati_user_id = user_id
        user.save()

        serializer = OrganisationUserSerializer(
            user,
            context={
                'request': request,
            }
        )

        return Response(serializer.data)