예제 #1
0
        CKAN = CKAN_CLIENT(options.host, options.auth)
        ## UP = UPLOADER(CKAN, OUT, options.outdir,options.fromdate)
        UP = Uploader(CKAN, options.ckan_check, HandleClient, cred, OUT,
                      options.outdir, options.fromdate, options.host)
    if (options.identifier):
        list = [options.identifier]
        listtext = 'given by option -i (%d id\'s)' % len(list)
    elif (options.list):
        f = open(options.list, 'r')
        list = f.readlines()
        f.close()
        listtext = 'got from file %s (%d id\'s)' % (options.list, len(list))
    elif (options.community):
        ##UP.purge_group(options.community)
        UP.get_packages(options.community)
        ##HEW??? UP.get_group_list(options.community)
        print "--- Start get community list from CKAN---\n"
        list = UP.package_list.keys()
        ##clist = UP.get_packages(options.community).keys()
        ##print clist
        listtext = 'got from CKAN community %s, stored in file %s-id.list  (%d id\'s)' % (
            options.community, options.community, len(list))
        cf = open('%s-id.list' % options.community, 'w')
        cf.write("\n".join(list))
        cf.close()
        ##print UP.package_list.keys()
    else:
        print 'ERROR : one of the otptions -c COMMUNITY, -i IDENTIFIER or -l LIST must be given'
        sys.exit()
예제 #2
0
def process_delete(OUT, dir, options):
    print("###JM# Don't use this function. It is not up to date.")
    return False

    # create CKAN object
    CKAN = CKAN_CLIENT(options.iphost, options.auth)
    UP = Uploader(CKAN, OUT, options.outdir)

    ##HEW-D-ec credentials,ec = None,None

    # create credentials
    try:
        cred = b2handle.clientcredentials.PIDClientCredentials.load_from_JSON(
            'credentials_11098')
    except Exception:
        logging.critical(
            "[CRITICAL] %s Could not create credentials from credstore %s" %
            (options.handle_check))
        p.print_help()
        sys.exit(-1)
    else:
        logging.debug(
            "Create handle client instance to add uuid to handle server")

    for delete_file in glob.glob(dir + '/*.del'):
        community, mdprefix = os.path.splitext(
            os.path.basename(delete_file))[0].split('-')

        logging.info('\n## Deleting datasets from community "%s" ##' %
                     (community))

        # get packages from the group in CKAN:
        UP.get_packages(community)

        # open the delete file and loop over its lines:
        file_content = ''
        try:
            f = open(delete_file, 'r')
            file_content = f.read()
            f.close()
        except IOError:
            logging.critical("Cannot read data from '{0}'".format(delete_file))
            f.close
        else:
            # rename the file in a crash backup file:
            os.rename(delete_file, delete_file + '.crash-backup')

        results = {'count': 0, 'ecount': 0, 'tcount': 0, 'time': 0}

        # use a try-except-finally environment to gurantee that no deleted metadata information will be lost:
        try:
            logging.info('    |   | %-4s | %-50s | %-50s |\n    |%s|' %
                         ('#', 'oai identifier', 'CKAN identifier', "-" * 116))

            deletestart = time.time()

            for line in file_content.split('\n'):
                # ignore empty lines:
                if not line:
                    continue

                results['tcount'] += 1
                subset, identifier = line.split('\t')

                # dataset name uniquely generated from oai identifier
                uid = uuid.uuid5(uuid.NAMESPACE_DNS,
                                 identifier.encode('ascii', 'replace'))
                ds = str(uid)

                # output:
                logging.info('    | d | %-4d | %-50s | %-50s |' %
                             (results['tcount'], identifier, ds))

                ### CHECK STATUS OF DATASET IN CKAN AND PID:
                # status of data set
                dsstatus = "unknown"

                # check against handle server
                handlestatus = "unknown"
                ##HEW-D-ec???  pid = credentials.prefix + "/eudat-jmd_" + ds
                pid = "11098/eudat-jmd_" + ds_id
                pidRecord["CHECKSUM"] = client.get_value_from_handle(
                    pid, "CHECKSUM")

                if (pidRecord["CHECKSUM"] == None):
                    logging.debug(
                        "        |-> Can not access pid %s to get checksum" %
                        (pid))
                    handlestatus = "new"
                else:
                    logging.debug("        |-> pid %s exists" % (pid))
                    handlestatus = "exist"

                # check against CKAN database
                ckanstatus = 'unknown'
                ckanstatus = UP.check_dataset(ds, None)

                delete = 0
                # depending on handle status delete record from B2FIND
                if (handlestatus == "new"
                        and ckanstatus == "new"):  # no action required
                    logging.info('        |-> %s' % ('No deletion required'))
                else:
                    delete = UP.delete(ds, ckanstatus)
                    if (delete == 1):
                        logging.info('        |-> %s' %
                                     ('Deletion was successful'))
                        results['count'] += 1

                        # delete handle (to keep the symmetry between handle and B2FIND server)
                        if (handlestatus == "exist"):
                            logging.info(
                                "        |-> Delete handle %s with checksum %s"
                                % (pid, pidRecord["CHECKSUM"]))
                            try:
                                client.delete_handle(pid)
                            except GenericHandleError as err:
                                logging.error('[ERROR] Unexpected Error: %s' %
                                              err)
                            except Exception:
                                logging.error('[ERROR] Unexpected Error:')

                        else:
                            logging.info(
                                "        |-> No action (deletion) required for handle %s"
                                % pid)
                    else:
                        logging.info('        |-> %s' % ('Deletion failed'))
                        results['ecount'] += 1
        except Exception:
            logging.error('[ERROR] Unexpected Error')
            logging.error('You find the ids of the deleted metadata in "%s"' %
                          (delete_file + '.crash-backup'))
            raise
        else:
            # all worked fine you can remove the crash-backup file:
            os.remove(delete_file + '.crash-backup')

        deletetime = time.time() - deletestart
        results['time'] = deletetime

        # save stats:
        OUT.save_stats(community + '-' + mdprefix, subset, 'd', results)
예제 #3
0
            sys.exit(-1)

        CKAN = CKAN_CLIENT(options.host,options.auth)
        ## UP = UPLOADER(CKAN, OUT, options.outdir,options.fromdate)
        UP = Uploader(CKAN,options.ckan_check,HandleClient,cred,OUT,options.outdir,options.fromdate,options.host)
    if (options.identifier):
             list = [ options.identifier ]
             listtext='given by option -i (%d id\'s)' % len(list) 
    elif (options.list):
             f = open(options.list,'r')
             list = f.readlines()
             f.close()
             listtext='got from file %s (%d id\'s)' % (options.list,len(list)) 
    elif (options.community):
             ##UP.purge_group(options.community)
             UP.get_packages(options.community)
             ##HEW??? UP.get_group_list(options.community)
             print "--- Start get community list from CKAN---\n"
             list = UP.package_list.keys()
             ##clist = UP.get_packages(options.community).keys()
             ##print clist
             listtext='got from CKAN community %s, stored in file %s-id.list  (%d id\'s)' % (options.community,options.community,len(list)) 
             cf = open('%s-id.list' % options.community,'w')
             cf.write("\n".join(list))
             cf.close()
             ##print UP.package_list.keys()
    else:
            print 'ERROR : one of the otptions -c COMMUNITY, -i IDENTIFIER or -l LIST must be given'
            sys.exit()

    ##HEW-Tprint '%s list ' % list
예제 #4
0
def process_delete(OUT, dir, options):
    print ("###JM# Don't use this function. It is not up to date.")
    return False

    # create CKAN object                       
    CKAN = CKAN_CLIENT(options.iphost,options.auth)
    UP = Uploader(CKAN,OUT,options.outdir)
    
    ##HEW-D-ec credentials,ec = None,None

    # create credentials
    try:
        cred = b2handle.clientcredentials.PIDClientCredentials.load_from_JSON('credentials_11098')
    except Exception:
        logging.critical("[CRITICAL] %s Could not create credentials from credstore %s" % (options.handle_check))
        p.print_help()
        sys.exit(-1)
    else:
        logging.debug("Create handle client instance to add uuid to handle server")

    for delete_file in glob.glob(dir+'/*.del'):
        community, mdprefix = os.path.splitext(os.path.basename(delete_file))[0].split('-')
        
        logging.info('\n## Deleting datasets from community "%s" ##' % (community))
        
        # get packages from the group in CKAN:
        UP.get_packages(community)
        
        # open the delete file and loop over its lines:
        file_content = ''
        try:
            f = open(delete_file, 'r')
            file_content = f.read()
            f.close()
        except IOError :
            logging.critical("Cannot read data from '{0}'".format(delete_file))
            f.close
        else:
            # rename the file in a crash backup file:
            os.rename(delete_file,delete_file+'.crash-backup')
        
        results = {
            'count':0,
            'ecount':0,
            'tcount':0,
            'time':0
        }

        # use a try-except-finally environment to gurantee that no deleted metadata information will be lost:
        try:
            logging.info('    |   | %-4s | %-50s | %-50s |\n    |%s|' % ('#','oai identifier','CKAN identifier',"-" * 116))
            
            deletestart = time.time()
     
            for line in file_content.split('\n'):
                # ignore empty lines:
                if not line:
                    continue
                   
                results['tcount'] += 1
                subset, identifier = line.split('\t')
         
                # dataset name uniquely generated from oai identifier
                uid = uuid.uuid5(uuid.NAMESPACE_DNS, identifier.encode('ascii','replace'))
                ds = str(uid)

                # output:
                logging.info('    | d | %-4d | %-50s | %-50s |' % (results['tcount'],identifier,ds))

                ### CHECK STATUS OF DATASET IN CKAN AND PID:
                # status of data set
                dsstatus="unknown"
         
                # check against handle server
                handlestatus="unknown"
                ##HEW-D-ec???  pid = credentials.prefix + "/eudat-jmd_" + ds
                pid = "11098/eudat-jmd_" + ds_id
                pidRecord["CHECKSUM"] = client.get_value_from_handle(pid, "CHECKSUM")

                if (pidRecord["CHECKSUM"] == None):
                  logging.debug("        |-> Can not access pid %s to get checksum" % (pid))
                  handlestatus="new"
                else:
                  logging.debug("        |-> pid %s exists" % (pid))
                  handlestatus="exist"

                # check against CKAN database
                ckanstatus = 'unknown'                  
                ckanstatus=UP.check_dataset(ds,None)

                delete = 0
                # depending on handle status delete record from B2FIND
                if ( handlestatus == "new" and ckanstatus == "new") : # no action required
                    logging.info('        |-> %s' % ('No deletion required'))
                else:
                    delete = UP.delete(ds,ckanstatus)
                    if (delete == 1):
                        logging.info('        |-> %s' % ('Deletion was successful'))
                        results['count'] +=  1
                        
                        # delete handle (to keep the symmetry between handle and B2FIND server)
                        if (handlestatus == "exist"):
                           logging.info("        |-> Delete handle %s with checksum %s" % (pid,pidRecord["CHECKSUM"]))
                           try:
                               client.delete_handle(pid)
                           except GenericHandleError as err:
                               logging.error('[ERROR] Unexpected Error: %s' % err)
                           except Exception:
                               logging.error('[ERROR] Unexpected Error:')

                        else:
                           logging.info("        |-> No action (deletion) required for handle %s" % pid)
                    else:
                        logging.info('        |-> %s' % ('Deletion failed'))
                        results['ecount'] += 1
        except Exception:
            logging.error('[ERROR] Unexpected Error')
            logging.error('You find the ids of the deleted metadata in "%s"' % (delete_file+'.crash-backup'))
            raise
        else:
            # all worked fine you can remove the crash-backup file:
            os.remove(delete_file+'.crash-backup')
            
        deletetime=time.time()-deletestart
        results['time'] = deletetime
        
        # save stats:
        OUT.save_stats(community+'-'+mdprefix,subset,'d',results)