示例#1
0
    def _load_one_csv_file(self, name):
        path, csv_name = os.path.split(name)
        assert csv_name.endswith('.csv'), csv_name
        resource_name = csv_name[:-4]
        print resource_name
        chromo = get_chromo(resource_name)
        dataset_type = chromo['dataset_type']
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        lc = LocalCKAN()

        for org_name, records in csv_data_batch(name, chromo):
            results = lc.action.package_search(q='type:%s organization:%s' %
                                               (dataset_type, org_name),
                                               rows=2)['results']
            if not results:
                print 'type:%s organization:%s not found!' % (dataset_type,
                                                              org_name)
                return 1
            if len(results) > 1:
                print 'type:%s organization:%s multiple found!' % (
                    dataset_type, org_name)
                return 1
            for r in results[0]['resources']:
                if r['name'] == resource_name:
                    break
            else:
                print 'type:%s organization:%s missing resource:%s' % (
                    dataset_type, org_name, resource_name)
                return 1

            print '-', org_name, len(records)
            lc.action.datastore_upsert(method=method,
                                       resource_id=r['id'],
                                       records=records)
        return 0
示例#2
0
def rebuild(command_name, csv_files=None, solr_url=None, strict=True):
    """
    Implement rebuild command

    :param csv_file: path to .csv file for input
    :type csv_file: str

    :return: Nothing
    :rtype: None
    """
    clear_index(command_name, solr_url, False)

    conn = solr_connection(command_name, solr_url)
    lc = LocalCKAN()
    if csv_files:
        for csv_file in csv_files:
            print csv_file + ':'
            prev_org = None
            unmatched = None
            firstpart, filename = os.path.split(csv_file)
            assert filename.endswith('.csv')
            resource_name = filename[:-4]

            chromo = get_chromo(resource_name)
            geno = get_geno(chromo['dataset_type'])

            for org_id, records in csv_data_batch(csv_file,
                                                  chromo,
                                                  strict=strict):
                records = [
                    dict((k, safe_for_solr(v)) for k, v in row_dict.items())
                    for row_dict in records
                ]
                if org_id != prev_org:
                    unmatched = None
                try:
                    org_detail = lc.action.organization_show(id=org_id)
                except NotFound:
                    continue
                print "    {0:s} {1}".format(org_id, len(records))
                unmatched = _update_records(records, org_detail, conn,
                                            resource_name, unmatched)
    else:
        for org in lc.action.organization_list():
            count = 0
            org_detail = lc.action.organization_show(id=org)
            unmatched = None
            for resource_name, records in data_batch(org_detail['id'], lc,
                                                     command_name):
                unmatched = _update_records(records, org_detail, conn,
                                            resource_name, unmatched)
                count += len(records)
            print org, count

    print "commit"
    conn.commit()
示例#3
0
def rebuild(command_name, csv_files=None, solr_url=None):
    """
    Implement rebuild command

    :param csv_file: path to .csv file for input
    :type csv_file: str

    :return: Nothing
    :rtype: None
    """
    clear_index(command_name, solr_url, False)

    conn = solr_connection(command_name, solr_url)
    lc = LocalCKAN()
    if csv_files:
        for csv_file in csv_files:
            print csv_file + ':'
            prev_org = None
            unmatched = None
            firstpart, filename = os.path.split(csv_file)
            assert filename.endswith('.csv')
            resource_name = filename[:-4]

            chromo = get_chromo(resource_name)
            geno = get_geno(chromo['dataset_type'])

            for org_id, records in csv_data_batch(csv_file, chromo):
                records = [dict((k, safe_for_solr(v)) for k, v in
                            row_dict.items()) for row_dict in records]
                if org_id != prev_org:
                    unmatched = None
                try:
                    org_detail = lc.action.organization_show(id=org_id)
                except NotFound:
                    continue
                print "    {0:s} {1}".format(org_id, len(records))
                unmatched = _update_records(
                    records, org_detail, conn, resource_name, unmatched)
    else:
        for org in lc.action.organization_list():
            count = 0
            org_detail = lc.action.organization_show(id=org)
            unmatched = None
            for resource_name, records in data_batch(org_detail['id'], lc, command_name):
                unmatched = _update_records(
                    records, org_detail, conn, resource_name, unmatched)
                count += len(records)
            print org, count

    print "commit"
    conn.commit()
示例#4
0
    def _rebuild(self, csv_files=None, solr_url=None, strict=True):
        """
        Implement rebuild command

        :param csv_files: sequence of paths to .csv files for input
        :type csv_files: sequence of str

        :return: Nothing
        :rtype: None
        """
        self._clear_index(solr_url, False)

        conn = solr_connection('ati', solr_url)
        lc = LocalCKAN()
        if csv_files:
            for csv_file in csv_files:
                print csv_file + ':'
                firstpart, filename = os.path.split(csv_file)
                assert filename.endswith('.csv')
                resource_name = filename[:-4]

                chromo = get_chromo(resource_name)
                geno = get_geno(chromo['dataset_type'])
                assert geno.get('target_dataset') == TARGET_DATASET

                for org_id, records in csv_data_batch(csv_file,
                                                      chromo,
                                                      strict=strict):
                    records = [
                        dict((k, safe_for_solr(v))
                             for k, v in row_dict.items())
                        for row_dict in records
                    ]
                    try:
                        org_detail = lc.action.organization_show(id=org_id)
                    except NotFound:
                        continue
                    print "    {0:s} {1}".format(org_id, len(records))
                    _update_records(records, org_detail, conn)
        else:
            for org_id in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org_id)
                for resource_name, records in data_batch(
                        org_detail['id'], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org_id, count

        print "commit"
        conn.commit()
示例#5
0
    def _load_one_csv_file(self, name):
        path, csv_name = os.path.split(name)
        assert csv_name.endswith('.csv'), csv_name
        resource_name = csv_name[:-4]
        print resource_name
        chromo = get_chromo(resource_name)
        dataset_type = chromo['dataset_type']
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        lc = LocalCKAN()

        for org_name, records in csv_data_batch(name, chromo):
            results = lc.action.package_search(
                q='type:%s organization:%s' % (dataset_type, org_name),
                include_private=True,
                rows=2)['results']
            if not results:
                print 'type:%s organization:%s not found!' % (
                    dataset_type, org_name)
                return 1
            if len(results) > 1:
                print 'type:%s organization:%s multiple found!' % (
                    dataset_type, org_name)
                return 1
            for res in results[0]['resources']:
                if res['name'] == resource_name:
                    break
            else:
                print 'type:%s organization:%s missing resource:%s' % (
                    dataset_type, org_name, resource_name)
                return 1

            # convert list values to lists
            list_fields = [f['datastore_id']
                for f in chromo['fields'] if f['datastore_type'] == '_text']
            if list_fields:
                for r in records:
                    for k in list_fields:
                        if not r[k]:
                            r[k] = []
                        else:
                            r[k] = r[k].split(',')

            print '-', org_name, len(records)
            lc.action.datastore_upsert(
                method=method,
                resource_id=res['id'],
                records=records)
        return 0
示例#6
0
    def _rebuild(self, csv_files=None, solr_url=None):
        """
        Implement rebuild command

        :param csv_files: sequence of paths to .csv files for input
        :type csv_files: sequence of str

        :return: Nothing
        :rtype: None
        """
        self._clear_index(solr_url, False)

        conn = solr_connection('ati', solr_url)
        lc = LocalCKAN()
        if csv_files:
            for csv_file in csv_files:
                print csv_file + ':'
                firstpart, filename = os.path.split(csv_file)
                assert filename.endswith('.csv')
                resource_name = filename[:-4]

                chromo = get_chromo(resource_name)
                geno = get_geno(chromo['dataset_type'])
                assert geno.get('target_dataset') == TARGET_DATASET

                for org_id, records in csv_data_batch(csv_file, chromo):
                    records = [dict((k, safe_for_solr(v)) for k, v in
                            row_dict.items()) for row_dict in records]
                    try:
                        org_detail = lc.action.organization_show(id=org_id)
                    except NotFound:
                        continue
                    print "    {0:s} {1}".format(org_id, len(records))
                    _update_records(records, org_detail, conn)
        else:
            for org_id in lc.action.organization_list():
                count = 0
                org_detail = lc.action.organization_show(id=org_id)
                for resource_name, records in data_batch(org_detail['id'], lc, TARGET_DATASET):
                    _update_records(records, org_detail, conn)
                    count += len(records)
                print org_id, count

        print "commit"
        conn.commit()
示例#7
0
    def _load_one_csv_file(self, name):
        path, csv_name = os.path.split(name)
        assert csv_name.endswith('.csv'), csv_name
        resource_name = csv_name[:-4]
        print resource_name
        chromo = get_chromo(resource_name)
        dataset_type = chromo['dataset_type']
        method = 'upsert' if chromo.get('datastore_primary_key') else 'insert'
        lc = LocalCKAN()

        for org_name, records in csv_data_batch(name, chromo):
            results = lc.action.package_search(
                q='type:%s organization:%s' % (dataset_type, org_name),
                rows=2)['results']
            if not results:
                print 'type:%s organization:%s not found!' % (
                    dataset_type, org_name)
                return 1
            if len(results) > 1:
                print 'type:%s organization:%s multiple found!' % (
                    dataset_type, org_name)
                return 1
            for r in results[0]['resources']:
                if r['name'] == resource_name:
                    break
            else:
                print 'type:%s organization:%s missing resource:%s' % (
                    dataset_type, org_name, resource_name)
                return 1

            print '-', org_name, len(records)
            lc.action.datastore_upsert(
                method=method,
                resource_id=r['id'],
                records=records)
        return 0