def task_run_core(): """Run the indexing task. The row argument is the BibSched task queue row, containing if, arguments, etc. Return 1 in case of success and 0 in case of failure. """ if not task_get_option("run"): task_set_option("run", [name[0] for name in run_sql("SELECT name from rnkMETHOD")]) for key in task_get_option("run"): task_sleep_now_if_required(can_stop_too=True) write_message("") filename = configuration.get(key + '.cfg', '') write_message("Getting configuration from file: %s" % filename, verbose=9) config = ConfigParser.ConfigParser() try: config.readfp(open(filename)) except StandardError: write_message("Cannot find configuration file: %s. " "The rankmethod may also not be registered using " "the BibRank Admin Interface." % filename, sys.stderr) raise #Using the function variable to call the function related to the #rank method cfg_function = config.get("rank_method", "function") func_object = globals().get(cfg_function) if func_object: func_object(key) else: write_message("Cannot run method '%s', no function to call" % key) return True
def task_submit_check_options(): """Last checks and updating on the options...""" if not (task_has_option('all') or task_has_option('collection') or task_has_option('field') or task_has_option('pattern') or task_has_option('matching') or task_has_option('recids')): task_set_option('last', 1) return True
def task_submit_elaborate_specific_parameter(key, value, opts, args): """Usual 'elaboration' of task specific parameters adapted to the bibexport task.""" if key in ("-w", "--wjob"): task_set_option("wjob", value) else: return False return True
def parse_option(key, value, dummy, args): """Parse command line options""" if args: # There should be no standalone arguments for any refextract job # This will catch args before the job is shipped to Bibsched raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-a', '--new'): task_set_option('new', True) elif key in ('-m', '--modified'): task_set_option('modified', True) elif key == '--rebuild': task_set_option('rebuild', True) elif key in ('-c', '--collections'): collections = task_get_option('collections') if not collections: collections = set() task_set_option('collections', collections) collections.update(split_cli_ids_arg(value)) elif key in ('-r', '--recids'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_cli_ids_arg(value)) return True
def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key, checks its meaning and returns True if has elaborated the key. Possible keys: """ if key in ('-d', '--documents'): task_set_option('documents', "documents") return True elif key in ('-m', '--metadata'): task_set_option('metadata', "metadata") return True return False
def task_submit_check_options(): if not task_get_option('logs') and \ not task_get_option('tempfiles') and \ not task_get_option('guests') and \ not task_get_option('bibxxx') and \ not task_get_option('documents') and \ not task_get_option('cache') and \ not task_get_option('tasks') and \ not task_get_option('check-tables') and \ not task_get_option('optimise-tables'): task_set_option('sessions', True) return True
def _task_submit_elaborate_specific_parameter(key, value, opts, args): """Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ('-n', '--number'): bibtask.task_get_option(\1) = value return True return False """ # Recid option if key in ("-i", "--recid"): try: value = int(value) except ValueError: bibtask.write_message("The value specified for --recid must be a " "valid integer, not '%s'." % value, stream=sys.stderr, verbose=0) if not _recid_exists(value): bibtask.write_message("ERROR: '%s' is not a valid record ID." % value, stream=sys.stderr, verbose=0) return False recids = bibtask.task_get_option('recids') if recids is None: recids = [] recids.append(value) bibtask.task_set_option('recids', recids) # Collection option elif key in ("-c", "--collection"): if not _collection_exists(value): bibtask.write_message("ERROR: '%s' is not a valid collection." % value, stream=sys.stderr, verbose=0) return False collections = bibtask.task_get_option("collections") collections = collections or [] collections.append(value) bibtask.task_set_option("collections", collections) # Taxonomy option elif key in ("-k", "--taxonomy"): if not _ontology_exists(value): bibtask.write_message("ERROR: '%s' is not a valid taxonomy name." % value, stream=sys.stderr, verbose=0) return False bibtask.task_set_option("taxonomy", value) elif key in ("-f", "--force"): bibtask.task_set_option("force", True) else: return False return True
def cb_parse_option(key, value, opts, args): """Parse command line options""" if args: # There should be no standalone arguments raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ("-i", "--id"): recids = task_get_option("recids") if not recids: recids = set() task_set_option("recids", recids) recids.update(split_cli_ids_arg(value)) return True
def task_submit_check_options(): if not task_get_option('logs') and \ not task_get_option('tempfiles') and \ not task_get_option('guests') and \ not task_get_option('bibxxx') and \ not task_get_option('documents') and \ not task_get_option('cache') and \ not task_get_option('tasks') and \ not task_get_option('check-tables') and \ not task_get_option('sessions') and \ not task_get_option('optimise-tables') and \ not task_get_option('bibedit-cache'): task_set_option('sessions', True) return True
def cb_parse_option(key, value, opts, args): """Parse command line options""" if args: # There should be no standalone arguments raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-i', '--id'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_cli_ids_arg(value)) return True
def task_check_options(): """ Reimplement this method for having the possibility to check options before submitting the task, in order for example to provide default values. It must return False if there are errors in the options. """ if not task_get_option('new') \ and not task_get_option('modified') \ and not task_get_option('recids') \ and not task_get_option('collections')\ and not task_get_option('reportnumbers'): print >>sys.stderr, 'Error: No records specified, you need' \ ' to specify which records to run on' return False ticket_plugins = {} all_plugins, error_messages = load_ticket_plugins() if error_messages: # We got broken plugins. We alert only for now. print >>sys.stderr, "\n".join(error_messages) if task_get_option('tickets'): # Tickets specified for ticket in task_get_option('tickets'): if ticket not in all_plugins.get_enabled_plugins(): print ticket print >>sys.stderr, 'Error: plugin %s is broken or does not exist' return False ticket_plugins[ticket] = all_plugins[ticket] elif task_get_option('all-tickets'): ticket_plugins = all_plugins.get_enabled_plugins() else: print >>sys.stderr, 'Error: No tickets specified, you need' \ ' to specify at least one ticket type to create' return False task_set_option('tickets', ticket_plugins) if not BIBCATALOG_SYSTEM: print >>sys.stderr, 'Error: no cataloging system defined' return False res = BIBCATALOG_SYSTEM.check_system() if res: print >>sys.stderr, 'Error while checking cataloging system: %s' % \ (res,) return True
def _task_submit_elaborate_specific_parameter(key, value, opts, args): """Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ('-n', '--number'): bibtask.task_get_option(\1) = value return True return False """ # Recid option if key in ("-i", "--recid"): try: value = int(value) except ValueError: bibtask.write_message("The value specified for --recid must be a " "valid integer, not '%s'." % value, stream=sys.stderr, verbose=0) if not _recid_exists(value): bibtask.write_message( "ERROR: '%s' is not a valid record ID." % value, stream=sys.stderr, verbose=0) return False recids = bibtask.task_get_option('recids') if recids is None: recids = [] recids.append(value) bibtask.task_set_option('recids', recids) # Collection option elif key in ("-c", "--collection"): if not _collection_exists(value): bibtask.write_message( "ERROR: '%s' is not a valid collection." % value, stream=sys.stderr, verbose=0) return False collections = bibtask.task_get_option("collections") collections = collections or [] collections.append(value) bibtask.task_set_option("collections", collections) # Taxonomy option elif key in ("-k", "--taxonomy"): if not _ontology_exists(value): bibtask.write_message( "ERROR: '%s' is not a valid taxonomy name." % value, stream=sys.stderr, verbose=0) return False bibtask.task_set_option("taxonomy", value) elif key in ("-f", "--force"): bibtask.task_set_option("force", True) else: return False return True
def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ('-n', '--number'): task_set_option('number', value) return True return False """ if key in ('-n', '--number'): task_set_option('number', value) return True elif key in ('-e', '--error'): task_set_option('error', True) return True return False
def _dbdump_elaborate_submit_param(key, value, dummyopts, dummyargs): """ Elaborate task submission parameter. See bibtask's task_submit_elaborate_specific_parameter_fnc for help. """ if key in ('-n', '--number'): try: task_set_option('number', int(value)) except ValueError: raise StandardError("ERROR: Number '%s' is not integer." % value) elif key in ('-o', '--output'): if os.path.isdir(value): task_set_option('output', value) else: raise StandardError("ERROR: Output '%s' is not a directory." % \ value) else: return False return True
def task_submit_elaborate_specific_parameter(key, _value, _opts, _args): """Elaborate specific CLI parameters of oairepositoryupdater""" if key in ("-r", "--report"): task_set_option("report", 1) if key in ("-d", "--detailed-report"): task_set_option("report", 2) elif key in ("-n", "--no-process"): task_set_option("no_upload", 1) elif key in ("--notimechange",): task_set_option("notimechange", 1) else: return False return True
def task_submit_elaborate_specific_parameter(key, _value, _opts, _args): """Elaborate specific CLI parameters of oairepositoryupdater""" if key in ("-r", "--report"): task_set_option("report", 1) if key in ("-d", "--detailed-report"): task_set_option("report", 2) elif key in ("-n", "--no-process"): task_set_option("no_upload", 1) elif key in ("--notimechange", ): task_set_option("notimechange", 1) else: return False return True
def task_submit_esp(key, value, opts, args): """ Checks each possible option to see if one was passed and sets the value accordingly @returns: True """ if key in ('-r', '--record'): task_set_option('record', value) if key in ('-m', '--mount'): task_set_option('mount', value) if key in ('-d', '--delete'): task_set_option('delete', value) if key in ('-D', '--delete-all'): task_set_option('delete_all', value) if key in ('--PURGE'): task_set_option('purge', True) return True
def task_submit_elaborate_specific_parameter(key, value, dummy_opts, dummy_args): """Check meaning of given string key. Eventually use the value for check. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. Example: .. code-block:: python if key in ('-n', '--number'): task_set_option('number', value) return True return False """ if key in ('-T', '--tasklet'): task_set_option('tasklet', value) return True elif key in ('-a', '--argument'): arguments = task_get_option('arguments', {}) try: key, value = value.split('=', 1) except NameError: print('ERROR: an argument must be in the form ' 'param=value, not "%s"' % (value, ), file=sys.stderr) return False arguments[key] = value task_set_option('arguments', arguments) return True elif key in ('-l', '--list-tasklets'): cli_list_tasklets() return True return False
def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key, checks its meaning and returns True if has elaborated the key. Possible keys: """ write_message(key) if key in ('-o', '--overdue-letters'): task_set_option('overdue-letters', True) elif key in ('-b', '--update-borrowers'): task_set_option('update-borrowers', True) elif key in ('-r', '--update-requests'): task_set_option('update-requests', True) else: return False return True
def main(): """Start the tool. If the command line arguments are those of the 'manual' mode, then starts a manual one-time harvesting. Else trigger a BibSched task for automated harvesting based on the OAIHarvest admin settings. """ # Let's try to parse the arguments as used in manual harvesting: try: opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:c:k:l:w:", ["output=", "verb=", "method=", "metadataPrefix=", "identifier=", "set=", "from=", "until=", "resumptionToken=", "certificate=", "key=", "user="******"password="******"workflow=", ]) # So everything went smoothly: start harvesting in manual mode if len([opt for opt, opt_value in opts if opt in ['-v', '--verb']]) > 0: # verb parameter is given http_param_dict = {} method = "POST" output = "" user = None password = None cert_file = None key_file = None sets = [] # get options and arguments for opt, opt_value in opts: if opt in ["-v", "--verb"]: http_param_dict['verb'] = opt_value elif opt in ["-m", '--method']: if opt_value == "GET" or opt_value == "POST": method = opt_value elif opt in ["-p", "--metadataPrefix"]: http_param_dict['metadataPrefix'] = opt_value elif opt in ["-i", "--identifier"]: http_param_dict['identifier'] = opt_value elif opt in ["-s", "--set"]: sets = opt_value.split() elif opt in ["-f", "--from"]: http_param_dict['from'] = opt_value elif opt in ["-u", "--until"]: http_param_dict['until'] = opt_value elif opt in ["-r", "--resumptionToken"]: http_param_dict['resumptionToken'] = opt_value elif opt in ["-o", "--output"]: output = opt_value elif opt in ["-c", "--certificate"]: cert_file = opt_value elif opt in ["-k", "--key"]: key_file = opt_value elif opt in ["-l", "--user"]: user = opt_value elif opt in ["-w", "--password"]: password = opt_value elif opt in ["-V", "--version"]: print(__revision__) sys.exit(0) else: usage(1, "Option %s is not allowed" % opt) if len(args) > 0: base_url = args[-1] if not base_url.lower().startswith('http'): base_url = 'http://' + base_url (addressing_scheme, network_location, path, dummy1, dummy2, dummy3) = urllib.parse.urlparse(base_url) secure = (addressing_scheme == "https") if (cert_file and not key_file) or \ (key_file and not cert_file): # Both are needed if one specified usage(1, "You must specify both certificate and key files") if password and not user: # User must be specified when password is given usage(1, "You must specify a username") elif user and not password: if not secure: sys.stderr.write( "*WARNING* Your password will be sent in clear!\n") try: password = getpass.getpass() except KeyboardInterrupt as error: sys.stderr.write("\n%s\n" % (error,)) sys.exit(0) getter.harvest(network_location, path, http_param_dict, method, output, sets, secure, user, password, cert_file, key_file) sys.stderr.write("Harvesting completed at: %s\n\n" % time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) return else: usage(1, "You must specify the URL to harvest") else: # verb is not given. We will continue with periodic # harvesting. But first check if URL parameter is given: # if it is, then warn directly now if len([opt for opt, opt_value in opts if opt in ['-i', '--identifier']]) == 0 \ and len(args) > 1 or \ (len(args) == 1 and not args[0].isdigit()): usage(1, "You must specify the --verb parameter") except getopt.error: # So could it be that we are using different arguments? Try to # start the BibSched task (automated harvesting) and see if it # validates pass # BibSched mode - periodical harvesting # Note that the 'help' is common to both manual and automated # mode. num_of_critical_parameter = 0 num_of_critical_parameterb = 0 repositories = [] for opt in sys.argv[1:]: if opt in "-r" or opt in "--repository": num_of_critical_parameter += 1 elif opt in "--workflow": num_of_critical_parameterb += 1 if num_of_critical_parameter > 1 or num_of_critical_parameterb > 1: usage(1, "You can't specify twice -r or --workflow") if num_of_critical_parameter == 1: if "-r" in sys.argv: position = sys.argv.index("-r") else: position = sys.argv.index("--repository") repositories = sys.argv[position + 1].split(",") if len(repositories) > 1 and \ ("-i" in sys.argv or "--identifier" in sys.argv): usage(1, "It is impossible to harvest an identifier from several " "repositories.") if num_of_critical_parameterb == 1: position = sys.argv.index("--workflow") workflows = sys.argv[position + 1].split(",") for workflow_candidate in workflows: if workflow_candidate not in registry_workflows: usage(1, "The workflow %s doesn't exist." % workflow_candidate) if num_of_critical_parameter == 1 and num_of_critical_parameterb == 0: for name_repository in repositories: try: oaiharvest_instance = OaiHARVEST.get( OaiHARVEST.name == name_repository).one() if oaiharvest_instance.workflows not in registry_workflows: usage(1, "The repository %s doesn't have a valid workflow specified." % name_repository) except orm.exc.NoResultFound: usage(1, "The repository %s doesn't exist in our database." % name_repository) elif num_of_critical_parameter == 1 and num_of_critical_parameterb == 1: for name_repository in repositories: try: OaiHARVEST.get(OaiHARVEST.name == name_repository).one() except orm.exc.NoResultFound: usage(1, "The repository %s doesn't exist in our database." % name_repository) print("A workflow has been specified, overriding the repository one.") task_set_option("repository", None) task_set_option("dates", None) task_set_option("workflow", None) task_set_option("identifiers", None) task_init(authorization_action='runoaiharvest', authorization_msg="oaiharvest Task Submission", description=""" Harvest records from OAI sources. Manual vs automatic harvesting: - Manual harvesting retrieves records from the specified URL, with the specified OAI arguments. Harvested records are displayed on the standard output or saved to a file, but are not integrated into the repository. This mode is useful to 'play' with OAI repositories or to build special harvesting scripts. - Automatic harvesting relies on the settings defined in the OAI Harvest admin interface to periodically retrieve the repositories and sets to harvest. It also take care of harvesting only new or modified records. Records harvested using this mode are converted and integrated into the repository, according to the settings defined in the OAI Harvest admin interface. Examples: Manual (single-shot) harvesting mode: Save to /tmp/z.xml records from CDS added/modified between 2004-04-01 and 2004-04-02, in MARCXML: $ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d Automatic (periodical) harvesting mode: Schedule daily harvesting of all repositories defined in OAIHarvest admin: $ oaiharvest -s 24h Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin: $ oaiharvest -r arxiv -s 24h Harvest in 10 minutes from 'pubmed' repository records added/modified between 2005-05-05 and 2005-05-10: $ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m """, help_specific_usage='Manual single-shot harvesting mode:\n' ' -o, --output specify output file\n' ' -v, --verb OAI verb to be executed\n' ' -m, --method http method (default POST)\n' ' -p, --metadataPrefix metadata format\n' ' -i, --identifier OAI identifier\n' ' -s, --set OAI set(s). Whitespace-separated list\n' ' -r, --resuptionToken Resume previous harvest\n' ' -f, --from from date (datestamp)\n' ' -u, --until until date (datestamp)\n' ' -c, --certificate path to public certificate (in case of certificate-based harvesting)\n' ' -k, --key path to private key (in case of certificate-based harvesting)\n' ' -l, --user username (in case of password-protected harvesting)\n' ' -w, --password password (in case of password-protected harvesting)\n' 'Deamon mode (periodical or one-shot harvesting mode):\n' ' -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n' ' -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n' ' -i, --identifier OAI identifier if wished to run in as a task.\n' ' --notify-email-to Receive notifications on given email on successful upload and/or finished harvest.\n' ' --workflow specify the workflow to execute.\n' ' --create-ticket-in Provide desired ticketing queue to create a ticket in it on upload and/or finished harvest.\n' ' Requires a configured ticketing system (BibCatalog).\n', specific_params=( "r:i:d:W", ["repository=", "identifier=", "dates=", "workflow=", "notify-email-to=", "create-ticket-in="]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def task_submit_elaborate_specific_parameter(key, value, opts, dummy): """Elaborate a specific parameter of CLI bibrank.""" if key in ("-a", "--add"): task_set_option("cmd", "add") if ("-x","") in opts or ("--del","") in opts: raise StandardError, "--add incompatible with --del" elif key in ("--run", "-w"): task_set_option("run", []) run = value.split(",") for run_key in range(0, len(run)): task_get_option('run').append(run[run_key]) elif key in ("-r", "--repair"): task_set_option("cmd", "repair") elif key in ("-E", "--print-extcites"): try: task_set_option("print-extcites", int(value)) except: task_set_option("print-extcites", 10) # default fallback value task_set_option("cmd", "print-missing") elif key in ("-A", "--author-citations"): task_set_option("author-citations", "1") elif key in ("-d", "--del"): task_set_option("cmd", "del") elif key in ("-k", "--check"): task_set_option("cmd", "check") elif key in ("-S", "--stat"): task_set_option("cmd", "stat") elif key in ("-i", "--id"): task_set_option("id", task_get_option("id") + split_ranges(value)) task_set_option("last_updated", "") elif key in ("-c", "--collection"): task_set_option("collection", value) elif key in ("-R", "--rebalance"): task_set_option("quick", "no") elif key in ("-f", "--flush"): task_set_option("flush", int(value)) elif key in ("-M", "--maxmem"): task_set_option("maxmem", int(value)) if task_get_option("maxmem") < base_process_size + 1000: raise StandardError, "Memory usage should be higher than %d kB" % \ (base_process_size + 1000) elif key in ("-m", "--modified"): task_set_option("modified", get_date_range(value))#2002-10-27 13:57:26) task_set_option("last_updated", "") elif key in ("-l", "--lastupdate"): task_set_option("last_updated", "last_updated") else: return False return True
def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ['-n', '--number']: self.options['number'] = value return True return False """ if key in ('-l', '--logs'): task_set_option('logs', True) return True elif key in ('-p', '--tempfiles'): task_set_option('tempfiles', True) return True elif key in ('-g', '--guests'): task_set_option('guests', True) return True elif key in ('-b', '--bibxxx'): task_set_option('bibxxx', True) return True elif key in ('-d', '--documents'): task_set_option('documents', True) return True elif key in ('-c', '--cache'): task_set_option('cache', True) return True elif key in ('-t', '--tasks'): task_set_option('tasks', True) return True elif key in ('-k', '--check-tables'): task_set_option('check-tables', True) return True elif key in ('-o', '--optimise-tables'): task_set_option('optimise-tables', True) return True elif key in ('-a', '--all'): task_set_option('logs', True) task_set_option('tempfiles', True) task_set_option('guests', True) task_set_option('bibxxx', True) task_set_option('documents', True) task_set_option('cache', True) task_set_option('tasks', True) return True return False
def task_submit_elaborate_specific_parameter(key, value, opts, dummy_args): """Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key.""" #Load configuration if key in ('-l', '--load-config'): task_set_option('cmd', 'load') if ('-d', '') in opts or ('--dump-conf', '') in opts: raise StandardError(".. conflicting options, please add only one") #Dump configuration elif key in ('-d', '--dump_conf'): task_set_option('cmd', 'dump') #Print sorting methods elif key in ('-p', '--print-sorting-methods'): task_set_option('cmd', 'print') #Rebalance elif key in ('-R', '--rebalance'): task_set_option('cmd', 'rebalance') if ('-S', '') in opts or ('--update-sorting', '') in opts: raise StandardError(".. conflicting options, please add only one") #Update sorting elif key in ('-S', '--update-sorting'): task_set_option('cmd', 'sort') #Define methods elif key in ('-M', '--methods'): task_set_option('methods', value) #Define records elif key in ('-i', '--id'): task_set_option('recids', value) else: return False return True
def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: """ if key in ("-n", "--new-event"): task_set_option("create_event_with_id", value) elif key in ("-r", "--remove-event"): task_set_option("destroy_event_with_id", value) elif key in ("-S", "--show-events"): task_set_option("list_events", True) elif key in ("-l", "--event-label"): task_set_option("event_name", value) elif key in ("-a", "--args"): task_set_option("column_headers", value.split(',')) elif key in ("-c", "--cache-events"): task_set_option("cache_events", value.split(',')) elif key in ("-d", "--dump-config"): task_set_option("dump_config", True) elif key in ("-e", "--load-config"): task_set_option("load_config", True) else: return False return True
def task_submit_elaborate_specific_parameter( key, value, opts, args): # pylint: disable-msg=W0613 """ Elaborate specific CLI parameters of BibReformat. @param key: a parameter key to check @param value: a value associated to parameter X{Key} @return: True for known X{Key} else False. """ if key in ("-a", "--all"): task_set_option("all", 1) elif key in ("--no-missing", ): task_set_option("ignore_without", 1) elif key in ("-c", "--collection"): task_set_option("collection", value) elif key in ("-n", "--noprocess"): task_set_option("noprocess", 1) elif key in ("-f", "--field"): task_set_option("field", value) elif key in ("-p", "--pattern"): task_set_option("pattern", value) elif key in ("-m", "--matching"): task_set_option("matching", value) elif key in ("-o", "--format"): input_formats = value.split(',') # check the validity of the given output formats invalid_format = check_validity_input_formats(input_formats) if invalid_format: try: raise Exception('Invalid output format.') except Exception: # pylint: disable-msg=W0703 from invenio.ext.logging import register_exception register_exception( prefix="The given output format '%s' is not available or " "is invalid. Please try again" % (invalid_format, ), alert_admin=True) return else: # every given format is available task_set_option("format", value) elif key in ("-i", "--id"): task_set_option("recids", value) else: return False return True
def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ['-n', '--number']: self.options['number'] = value return True return False """ if key in ('-l', '--logs'): task_set_option('logs', True) return True elif key in ('-p', '--tempfiles'): task_set_option('tempfiles', True) return True elif key in ('-g', '--guests'): task_set_option('guests', True) return True elif key in ('-b', '--bibxxx'): task_set_option('bibxxx', True) return True elif key in ('-d', '--documents'): task_set_option('documents', True) return True elif key in ('-c', '--cache'): task_set_option('cache', True) return True elif key in ('-t', '--tasks'): task_set_option('tasks', True) return True elif key in ('-k', '--check-tables'): task_set_option('check-tables', True) return True elif key in ('-o', '--optimise-tables'): task_set_option('optimise-tables', True) return True elif key in ('-S', '--sessions'): task_set_option('sessions', True) return True elif key == '--bibedit-cache': task_set_option('bibedit-cache', True) return True elif key in ('-a', '--all'): task_set_option('logs', True) task_set_option('tempfiles', True) task_set_option('guests', True) task_set_option('bibxxx', True) task_set_option('documents', True) task_set_option('cache', True) task_set_option('tasks', True) task_set_option('sessions', True) task_set_option('bibedit-cache', True) return True return False
def task_parse_options(key, value, opts, args): # pylint: disable-msg=W0613 """ Must be defined for bibtask to create a task """ if args: # There should be no standalone arguments for any bibcatalog job # This will catch args before the job is shipped to Bibsched raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-a', '--new'): task_set_option('new', True) elif key in ('-m', '--modified'): task_set_option('modified', True) elif key in ('-c', '--collections'): collections = task_get_option('collections') if not collections: collections = set() task_set_option('collections', collections) for v in value.split(","): collections.update(get_collection_reclist(v)) elif key in ('-i', '--recids'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_ids(value)) elif key in ('--tickets',): tickets = task_get_option('tickets') if not tickets: tickets = set() task_set_option('tickets', tickets) for item in value.split(','): tickets.add(item.strip()) elif key in ('--all-tickets',): task_set_option('all-tickets', True) elif key in ('-q', '--query'): query = task_get_option('query') if not query: query = set() task_set_option('query', query) query.add(value) elif key in ('-r', '--reportnumbers'): reportnumbers = task_get_option('reportnumbers') if not reportnumbers: reportnumbers = set() task_set_option('reportnumbers', reportnumbers) reportnumbers.add(value) return True
def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ('-n', '--number'): self.options['number'] = value return True return False """ ## A dictionary used for mapping CLI parameters to task_option keys+- parameter_mapping = { '-p': 'profile_name', '-i': 'input', '--input': 'input', '-o': 'output', '--output': 'output', '-m': 'mode', '--mode': 'mode', '--acodec': 'acodec', '--vcodec': 'vcodec', '--abitrate': 'abitrate', '--vbitrate': 'vbitrate', '--resolution': 'size', '--passes': 'passes', '--special': 'special', '--specialfirst': 'specialfirst', '--specialsecond': 'specialsecond', '--width': 'width', '--height': 'height', '--aspect': 'aspect', '--number': 'numberof', '--positions': 'positions', '-D': 'meta_dump', '-W': 'meta_input', '--dump': 'meta_dump', '--write': 'meta_input', '--newjobfolder': 'new_job_folder', '--oldjobfolder': 'old_job_folder', '--recid': 'recid', '--collection': 'collection', '--search': 'search' } ## PASSES ## ## Transform 'passes' to integer if key in ('--passes', ): try: value = int(value) except ValueError: write_message('Value of \'--passes\' must be an integer') return False ## HEIGHT, WIDTH ## if key in ('--height', '--width'): try: value = int(value) except ValueError: write_message('Value of \'--height\' or \'--width\'' ' must be an integer') return False ## META MODE ## ## Transform meta mode values to boolean if key in ('-D', '--dump'): if not value in ("ffprobe", "mediainfo", "pbcore"): write_message( "Unknown dumping format, must be 'ffprobe', 'mediainfo' or 'pbcore'" ) return False if key in ('--substitute', ): value = True ## Transform the 'positions' parameter into a list if key in ('--positions', ): try: parsed = json.loads(value) if type(parsed) is not type(list()): write_message('Value of \'--positions\' must be a json list') return False else: value = parsed except ValueError: write_message('Value of \'--positions\' must be a json list') return False ## NUMBEROF ## ## Transform 'number' to integer if key in ('--number'): try: value = int(value) except ValueError: write_message('Value of \'--number\' must be an integer') return False ## ASPECT ## if key in ('--aspect'): try: xasp, yasp = str(value).split(':') xasp = float(xasp) yasp = float(yasp) value = xasp / yasp except: write_message('Value of \'--aspect\' must be in \'4:3\' format') return False ## RECID ## if key in ('--recid'): try: value = int(value) except ValueError: write_message('Value of \'--recid\' must be an integer') return False ## GENERAL MAPPING ## ## For all general or other parameters just use the mapping dictionary if key in parameter_mapping: task_set_option(parameter_mapping[key], value) return True return False
def cb_parse_option(key, value, opts, args): """ Must be defined for bibtask to create a task """ if args and len(args) > 0: # There should be no standalone arguments for any refextract job # This will catch args before the job is shipped to Bibsched raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-a', '--new'): task_set_option('new', True) task_set_option('no-overwrite', True) elif key in ('-m', '--modified'): task_set_option('modified', True) task_set_option('no-overwrite', True) elif key == '--inspire': msg = """The --inspire option does not exist anymore. Please set the config variable CFG_INSPIRE_SITE instead.""" raise StandardError(msg) elif key in ('--kb-reports', ): task_set_option('kb-reports', value) elif key in ('--kb-journals', ): task_set_option('kb-journals', value) elif key in ('--kb-journals-re', ): task_set_option('kb-journals-re', value) elif key in ('--kb-authors', ): task_set_option('kb-authors', value) elif key in ('--kb-books', ): task_set_option('kb-books', value) elif key in ('--kb-conferences', ): task_set_option('kb-conferences', value) elif key in ('--create-ticket', ): task_set_option('create-ticket', True) elif key in ('--no-overwrite', ): task_set_option('no-overwrite', True) elif key in ('--arxiv'): task_set_option('arxiv', True) elif key in ('-c', '--collections'): collections = task_get_option('collections') if not collections: collections = set() task_set_option('collections', collections) for v in value.split(","): collections.update(perform_request_search(c=v)) elif key in ('-i', '--id'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_ids(value)) elif key in ('-r', '--recids'): msg = """The --recids has been renamed. please use --id for specifying recids.""" raise StandardError(msg) elif key == '-f': msg = """refextract is now used to run in daemon mode only. If you would like to run reference extraction on a standalone PDF file, please use "docextract file.pdf\"""" raise StandardError(msg) return True
def task_run_core(): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. Return 1 in case of success and 0 in case of failure.""" #---------------# # Encoding Mode # #---------------# if _topt('mode') == 'encode': return encode.encode_video(input_file=_topt('input'), output_file=_topt('output'), acodec=_topt('acodec'), vcodec=_topt('vcodec'), abitrate=_topt('abitrate'), vbitrate=_topt('vbitrate'), resolution=_topt('size'), passes=_topt('passes'), special=_topt('special'), specialfirst=_topt('specialfirst'), specialsecond=_topt('specialsecond'), width=_topt('width'), height=_topt('height'), aspect=_topt('aspect'), profile=_topt('profile')) #-----------------# # Extraction Mode # #-----------------# elif _topt('mode') == 'extract': return extract.extract_frames(input_file=_topt('input'), output_file=_topt('output'), size=_topt('size'), positions=_topt('positions'), numberof=_topt('numberof'), width=_topt('width'), height=_topt('height'), aspect=_topt('aspect'), profile=_topt('profile')) #---------------# # Metadata Mode # #---------------# elif _topt('mode') == 'meta': if _topt('meta_dump') is not None: metadata.dump_metadata(input_file=_topt('input'), output_file=_topt('output'), meta_type=_topt('meta_dump')) return True elif _topt('meta_input') is not None: if type(_topt('meta_input')) is not type(dict()): the_metadata = metadata.json_decode_file( filename=_topt('meta_input')) task_set_option('meta_input', the_metadata) return metadata.write_metadata(input_file=_topt('input'), output_file=_topt('output'), metadata=_topt('meta_input')) #------------# # Batch Mode # #------------# elif _topt('mode') == 'batch': if _topt('collection'): return batch_engine.create_update_jobs_by_collection( batch_template_file=_topt('input'), collection=_topt('collection'), job_directory=_topt('new_job_dir', CFG_BIBENCODE_DAEMON_DIR_NEWJOBS)) elif _topt('search'): return batch_engine.create_update_jobs_by_search( pattern=_topt('search'), batch_template_file=_topt('input'), job_directory=_topt('new_job_dir', CFG_BIBENCODE_DAEMON_DIR_NEWJOBS)) else: return batch_engine.process_batch_job(_topt('input')) #-------------# # Daemon Mode # #-------------# elif _topt('mode') == 'daemon': return daemon.watch_directory( _topt('new_job_dir', CFG_BIBENCODE_DAEMON_DIR_NEWJOBS), _topt('old_job_dir', CFG_BIBENCODE_DAEMON_DIR_OLDJOBS))
def task_submit_elaborate_specific_parameter(key, value, opts, args): """Elaborate specific cli parameters for oaiharvest.""" if key in ("-r", "--repository"): task_set_option('repository', get_repository_names(value)) elif key in ("--workflow"): task_set_option('workflow', get_repository_names(value)) elif key in ("-i", "--identifier"): task_set_option('identifiers', get_identifier_names(value)) elif key in ("-d", "--dates"): task_set_option('dates', get_dates(value)) if value is not None and task_get_option("dates") is None: raise StandardError("Date format not valid.") elif key in ("--notify-email-to",): if email_valid_p(value): task_set_option('notify-email-to', value) else: raise StandardError("E-mail format not valid.") elif key in ("--create-ticket-in",): task_set_option('create-ticket-in', value) else: return False return True
def main(): """Start the tool. If the command line arguments are those of the 'manual' mode, then starts a manual one-time harvesting. Else trigger a BibSched task for automated harvesting based on the OAIHarvest admin settings. """ # Let's try to parse the arguments as used in manual harvesting: try: opts, args = getopt.getopt(sys.argv[1:], "o:v:m:p:i:s:f:u:r:c:k:l:w:", [ "output=", "verb=", "method=", "metadataPrefix=", "identifier=", "set=", "from=", "until=", "resumptionToken=", "certificate=", "key=", "user="******"password="******"workflow=", ]) # So everything went smoothly: start harvesting in manual mode if len([opt for opt, opt_value in opts if opt in ['-v', '--verb']]) > 0: # verb parameter is given http_param_dict = {} method = "POST" output = "" user = None password = None cert_file = None key_file = None sets = [] # get options and arguments for opt, opt_value in opts: if opt in ["-v", "--verb"]: http_param_dict['verb'] = opt_value elif opt in ["-m", '--method']: if opt_value == "GET" or opt_value == "POST": method = opt_value elif opt in ["-p", "--metadataPrefix"]: http_param_dict['metadataPrefix'] = opt_value elif opt in ["-i", "--identifier"]: http_param_dict['identifier'] = opt_value elif opt in ["-s", "--set"]: sets = opt_value.split() elif opt in ["-f", "--from"]: http_param_dict['from'] = opt_value elif opt in ["-u", "--until"]: http_param_dict['until'] = opt_value elif opt in ["-r", "--resumptionToken"]: http_param_dict['resumptionToken'] = opt_value elif opt in ["-o", "--output"]: output = opt_value elif opt in ["-c", "--certificate"]: cert_file = opt_value elif opt in ["-k", "--key"]: key_file = opt_value elif opt in ["-l", "--user"]: user = opt_value elif opt in ["-w", "--password"]: password = opt_value elif opt in ["-V", "--version"]: print(__revision__) sys.exit(0) else: usage(1, "Option %s is not allowed" % opt) if len(args) > 0: base_url = args[-1] if not base_url.lower().startswith('http'): base_url = 'http://' + base_url (addressing_scheme, network_location, path, dummy1, dummy2, dummy3) = urllib.parse.urlparse(base_url) secure = (addressing_scheme == "https") if (cert_file and not key_file) or \ (key_file and not cert_file): # Both are needed if one specified usage(1, "You must specify both certificate and key files") if password and not user: # User must be specified when password is given usage(1, "You must specify a username") elif user and not password: if not secure: sys.stderr.write( "*WARNING* Your password will be sent in clear!\n") try: password = getpass.getpass() except KeyboardInterrupt as error: sys.stderr.write("\n%s\n" % (error, )) sys.exit(0) getter.harvest(network_location, path, http_param_dict, method, output, sets, secure, user, password, cert_file, key_file) sys.stderr.write( "Harvesting completed at: %s\n\n" % time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) return else: usage(1, "You must specify the URL to harvest") else: # verb is not given. We will continue with periodic # harvesting. But first check if URL parameter is given: # if it is, then warn directly now if len([opt for opt, opt_value in opts if opt in ['-i', '--identifier']]) == 0 \ and len(args) > 1 or \ (len(args) == 1 and not args[0].isdigit()): usage(1, "You must specify the --verb parameter") except getopt.error: # So could it be that we are using different arguments? Try to # start the BibSched task (automated harvesting) and see if it # validates pass # BibSched mode - periodical harvesting # Note that the 'help' is common to both manual and automated # mode. num_of_critical_parameter = 0 num_of_critical_parameterb = 0 repositories = [] for opt in sys.argv[1:]: if opt in "-r" or opt in "--repository": num_of_critical_parameter += 1 elif opt in "--workflow": num_of_critical_parameterb += 1 if num_of_critical_parameter > 1 or num_of_critical_parameterb > 1: usage(1, "You can't specify twice -r or --workflow") if num_of_critical_parameter == 1: if "-r" in sys.argv: position = sys.argv.index("-r") else: position = sys.argv.index("--repository") repositories = sys.argv[position + 1].split(",") if len(repositories) > 1 and \ ("-i" in sys.argv or "--identifier" in sys.argv): usage( 1, "It is impossible to harvest an identifier from several " "repositories.") if num_of_critical_parameterb == 1: position = sys.argv.index("--workflow") workflows = sys.argv[position + 1].split(",") for workflow_candidate in workflows: if workflow_candidate not in registry_workflows: usage(1, "The workflow %s doesn't exist." % workflow_candidate) if num_of_critical_parameter == 1 and num_of_critical_parameterb == 0: for name_repository in repositories: try: oaiharvest_instance = OaiHARVEST.get( OaiHARVEST.name == name_repository).one() if oaiharvest_instance.workflows not in registry_workflows: usage( 1, "The repository %s doesn't have a valid workflow specified." % name_repository) except orm.exc.NoResultFound: usage( 1, "The repository %s doesn't exist in our database." % name_repository) elif num_of_critical_parameter == 1 and num_of_critical_parameterb == 1: for name_repository in repositories: try: OaiHARVEST.get(OaiHARVEST.name == name_repository).one() except orm.exc.NoResultFound: usage( 1, "The repository %s doesn't exist in our database." % name_repository) print("A workflow has been specified, overriding the repository one.") task_set_option("repository", None) task_set_option("dates", None) task_set_option("workflow", None) task_set_option("identifiers", None) task_init( authorization_action='runoaiharvest', authorization_msg="oaiharvest Task Submission", description=""" Harvest records from OAI sources. Manual vs automatic harvesting: - Manual harvesting retrieves records from the specified URL, with the specified OAI arguments. Harvested records are displayed on the standard output or saved to a file, but are not integrated into the repository. This mode is useful to 'play' with OAI repositories or to build special harvesting scripts. - Automatic harvesting relies on the settings defined in the OAI Harvest admin interface to periodically retrieve the repositories and sets to harvest. It also take care of harvesting only new or modified records. Records harvested using this mode are converted and integrated into the repository, according to the settings defined in the OAI Harvest admin interface. Examples: Manual (single-shot) harvesting mode: Save to /tmp/z.xml records from CDS added/modified between 2004-04-01 and 2004-04-02, in MARCXML: $ oaiharvest -vListRecords -f2004-04-01 -u2004-04-02 -pmarcxml -o/tmp/z.xml http://cds.cern.ch/oai2d Automatic (periodical) harvesting mode: Schedule daily harvesting of all repositories defined in OAIHarvest admin: $ oaiharvest -s 24h Schedule daily harvesting of repository 'arxiv', defined in OAIHarvest admin: $ oaiharvest -r arxiv -s 24h Harvest in 10 minutes from 'pubmed' repository records added/modified between 2005-05-05 and 2005-05-10: $ oaiharvest -r pubmed -d 2005-05-05:2005-05-10 -t 10m """, help_specific_usage='Manual single-shot harvesting mode:\n' ' -o, --output specify output file\n' ' -v, --verb OAI verb to be executed\n' ' -m, --method http method (default POST)\n' ' -p, --metadataPrefix metadata format\n' ' -i, --identifier OAI identifier\n' ' -s, --set OAI set(s). Whitespace-separated list\n' ' -r, --resuptionToken Resume previous harvest\n' ' -f, --from from date (datestamp)\n' ' -u, --until until date (datestamp)\n' ' -c, --certificate path to public certificate (in case of certificate-based harvesting)\n' ' -k, --key path to private key (in case of certificate-based harvesting)\n' ' -l, --user username (in case of password-protected harvesting)\n' ' -w, --password password (in case of password-protected harvesting)\n' 'Deamon mode (periodical or one-shot harvesting mode):\n' ' -r, --repository="repo A"[,"repo B"] \t which repositories to harvest (default=all)\n' ' -d, --dates=yyyy-mm-dd:yyyy-mm-dd \t reharvest given dates only\n' ' -i, --identifier OAI identifier if wished to run in as a task.\n' ' --notify-email-to Receive notifications on given email on successful upload and/or finished harvest.\n' ' --workflow specify the workflow to execute.\n' ' --create-ticket-in Provide desired ticketing queue to create a ticket in it on upload and/or finished harvest.\n' ' Requires a configured ticketing system (BibCatalog).\n', specific_params=("r:i:d:W", [ "repository=", "identifier=", "dates=", "workflow=", "notify-email-to=", "create-ticket-in=" ]), task_submit_elaborate_specific_parameter_fnc= task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core)
def task_submit_elaborate_specific_parameter(key, value, opts, args): """Elaborate specific cli parameters for oaiharvest.""" if key in ("-r", "--repository"): task_set_option('repository', get_repository_names(value)) elif key in ("--workflow"): task_set_option('workflow', get_repository_names(value)) elif key in ("-i", "--identifier"): task_set_option('identifiers', get_identifier_names(value)) elif key in ("-d", "--dates"): task_set_option('dates', get_dates(value)) if value is not None and task_get_option("dates") is None: raise StandardError("Date format not valid.") elif key in ("--notify-email-to", ): if email_valid_p(value): task_set_option('notify-email-to', value) else: raise StandardError("E-mail format not valid.") elif key in ("--create-ticket-in", ): task_set_option('create-ticket-in', value) else: return False return True
def task_submit_check_options(): """ NOTE: Depending on the parameters, either "BibSched mode" or plain straigh-forward execution mode is entered. """ if task_has_option("create_event_with_id"): print( webstat.create_customevent(task_get_option("create_event_with_id"), task_get_option("event_name", None), task_get_option("column_headers", []))) sys.exit(0) elif task_has_option("destroy_event_with_id"): print( webstat.destroy_customevent( task_get_option("destroy_event_with_id"))) sys.exit(0) elif task_has_option("list_events"): events = webstat._get_customevents() if len(events) == 0: print("There are no custom events available.") else: print("Available custom events are:\n") print('\n'.join([ x[0] + ": " + ((x[1] == None) and "No descriptive name" or str(x[1])) for x in events ])) sys.exit(0) elif task_has_option("cache_events"): events = task_get_option("cache_events") write_message(str(events), verbose=9) if events[0] == 'ALL': keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys() customevents_to_cache = [x[0] for x in webstat._get_customevents()] elif events[0] == 'KEYEVENTS': keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys() customevents_to_cache = [] elif events[0] == 'CUSTOMEVENTS': keyevents_to_cache = [] customevents_to_cache = [x[0] for x in webstat._get_customevents()] elif events[0] != '': keyevents_to_cache = [ x for x in webstat.KEYEVENT_REPOSITORY.keys() if x in events ] customevents_to_cache = [ x[0] for x in webstat._get_customevents() if x in events ] # Control so that we have valid event names if len(keyevents_to_cache + customevents_to_cache) == 0: # Oops, no events. Abort and display help. return False else: task_set_option("keyevents", keyevents_to_cache) task_set_option("customevents", customevents_to_cache) return True elif task_has_option("dump_config"): print("""\ [general] visitors_box = True search_box = True record_box = True bibsched_box = True basket_box = True apache_box = True uptime_box = True [webstat_custom_event_1] name = baskets param1 = action param2 = basket param3 = user [apache_log_analyzer] profile = nil nb-histogram-items-to-print = 20 exclude-ip-list = ("137.138.249.162") home-collection = "Atlantis Institute of Fictive Science" search-interface-url = "/?" detailed-record-url = "/%s/" search-engine-url = "/search?" search-engine-url-old-style = "/search.py?" basket-url = "/yourbaskets/" add-to-basket-url = "/yourbaskets/add" display-basket-url = "/yourbaskets/display" display-public-basket-url = "/yourbaskets/display_public" alert-url = "/youralerts/" display-your-alerts-url = "/youralerts/list" display-your-searches-url = "/youralerts/display" """ % CFG_SITE_RECORD) sys.exit(0) elif task_has_option("load_config"): from ConfigParser import ConfigParser conf = ConfigParser() conf.read(CFG_WEBSTAT_CONFIG_PATH) for section in conf.sections(): if section[:21] == "webstat_custom_event_": cols = [] name = "" for option, value in conf.items(section): if option == "name": name = value if option[:5] == "param": # add the column name in it's position index = int(option[-1]) - 1 while len(cols) <= index: cols.append("") cols[index] = value if name: res = run_sql( "SELECT COUNT(id) FROM staEVENT WHERE id = %s", (name, )) if res[0][0] == 0: # name does not exist, create customevent webstat.create_customevent(name, name, cols) else: # name already exists, update customevent webstat.modify_customevent(name, cols=cols) sys.exit(0) else: # False means that the --help should be displayed return False
def _dbdump_elaborate_submit_param(key, value, dummyopts, dummyargs): """ Elaborate task submission parameter. See bibtask's task_submit_elaborate_specific_parameter_fnc for help. """ if key in ('-n', '--number'): try: task_set_option('number', int(value)) except ValueError: raise StandardError("ERROR: Number '%s' is not integer." % (value,)) elif key in ('-o', '--output'): if os.path.isdir(value): task_set_option('output', value) else: raise StandardError("ERROR: Output '%s' is not a directory." % \ (value,)) elif key in ('--params',): task_set_option('params', value) elif key in ('--compress',): if not CFG_PATH_GZIP or (CFG_PATH_GZIP and not os.path.exists(CFG_PATH_GZIP)): raise StandardError("ERROR: No valid gzip path is defined.") task_set_option('compress', True) elif key in ('-S', '--slave'): if value: task_set_option('slave', value) else: if not CFG_DATABASE_SLAVE: raise StandardError("ERROR: No slave defined.") task_set_option('slave', CFG_DATABASE_SLAVE) elif key in ('--dump-on-slave-helper', ): task_set_option('dump_on_slave_helper_mode', True) elif key in ('--ignore-tables',): try: re.compile(value) task_set_option("ignore_tables", value) except re.error: raise StandardError, "ERROR: Passed string: '%s' is not a valid regular expression." % value elif key in ('--disable-workers', ): task_set_option('disable_workers', True) else: return False return True
def cb_parse_option(key, value, opts, args): """ Must be defined for bibtask to create a task """ if args and len(args) > 0: # There should be no standalone arguments for any refextract job # This will catch args before the job is shipped to Bibsched raise StandardError("Error: Unrecognised argument '%s'." % args[0]) if key in ('-a', '--new'): task_set_option('new', True) task_set_option('no-overwrite', True) elif key in ('-m', '--modified'): task_set_option('modified', True) task_set_option('no-overwrite', True) elif key in ('-i', '--inspire', ): task_set_option('inspire', True) elif key in ('--kb-reports', ): task_set_option('kb-reports', value) elif key in ('--kb-journals', ): task_set_option('kb-journals', value) elif key in ('--kb-journals-re', ): task_set_option('kb-journals-re', value) elif key in ('--kb-authors', ): task_set_option('kb-authors', value) elif key in ('--kb-books', ): task_set_option('kb-books', value) elif key in ('--kb-conferences', ): task_set_option('kb-conferences', value) elif key in ('--create-ticket', ): task_set_option('create-ticket', True) elif key in ('--no-overwrite', ): task_set_option('no-overwrite', True) elif key in ('--arxiv'): task_set_option('arxiv', True) elif key in ('-c', '--collections'): collections = task_get_option('collections') if not collections: collections = set() task_set_option('collections', collections) for v in value.split(","): collections.update(perform_request_search(c=v)) elif key in ('-r', '--recids'): recids = task_get_option('recids') if not recids: recids = set() task_set_option('recids', recids) recids.update(split_ids(value)) return True
def task_run_core(): """ Main daemon task. Returns True when run successfully. False otherwise. """ plugins = load_plugins() rules = load_rules(plugins) task_set_option('plugins', plugins) recids_for_rules = get_recids_for_rules(rules) all_recids = intbitset([]) single_rules = set() batch_rules = set() for rule_name, rule_recids in recids_for_rules.iteritems(): all_recids.union_update(rule_recids) if plugins[rules[rule_name]["check"]]["batch"]: batch_rules.add(rule_name) else: single_rules.add(rule_name) records_to_upload_holdingpen = [] records_to_upload_replace = [] for batch in iter_batches(all_recids, CFG_BATCH_SIZE): for rule_name in batch_rules: rule = rules[rule_name] rule_recids = recids_for_rules[rule_name] task_sleep_now_if_required(can_stop_too=True) records = [] for i, record_id, record in batch: if record_id in rule_recids: records.append(record) if len(records): check_records(rule, records) # Then run them trught normal rules for i, record_id, record in batch: progress_percent = int(float(i) / len(all_recids) * 100) task_update_progress("Processing record %s/%s (%i%%)." % (i, len(all_recids), progress_percent)) write_message("Processing record %s" % record_id) for rule_name in single_rules: rule = rules[rule_name] rule_recids = recids_for_rules[rule_name] task_sleep_now_if_required(can_stop_too=True) if record_id in rule_recids: check_record(rule, record) if record.amended: if record.holdingpen: records_to_upload_holdingpen.append(record) else: records_to_upload_replace.append(record) if not record.valid: submit_ticket(record, record_id) if len(records_to_upload_holdingpen) >= CFG_BATCH_SIZE: upload_amendments(records_to_upload_holdingpen, True) records_to_upload_holdingpen = [] if len(records_to_upload_replace) >= CFG_BATCH_SIZE: upload_amendments(records_to_upload_replace, False) records_to_upload_replace = [] ## In case there are still some remaining amended records if records_to_upload_holdingpen: upload_amendments(records_to_upload_holdingpen, True) if records_to_upload_replace: upload_amendments(records_to_upload_replace, False) # Update the database with the last time the rules was ran for rule in rules.keys(): update_rule_last_run(rule) return True
def task_submit_check_options(): """ NOTE: Depending on the parameters, either "BibSched mode" or plain straigh-forward execution mode is entered. """ if task_has_option("create_event_with_id"): print(webstat.create_customevent(task_get_option("create_event_with_id"), task_get_option("event_name", None), task_get_option("column_headers", []))) sys.exit(0) elif task_has_option("destroy_event_with_id"): print(webstat.destroy_customevent(task_get_option("destroy_event_with_id"))) sys.exit(0) elif task_has_option("list_events"): events = webstat._get_customevents() if len(events) == 0: print("There are no custom events available.") else: print("Available custom events are:\n") print('\n'.join([x[0] + ": " + ((x[1] == None) and "No descriptive name" or str(x[1])) for x in events])) sys.exit(0) elif task_has_option("cache_events"): events = task_get_option("cache_events") write_message(str(events), verbose=9) if events[0] == 'ALL': keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys() customevents_to_cache = [x[0] for x in webstat._get_customevents()] elif events[0] == 'KEYEVENTS': keyevents_to_cache = webstat.KEYEVENT_REPOSITORY.keys() customevents_to_cache = [] elif events[0] == 'CUSTOMEVENTS': keyevents_to_cache = [] customevents_to_cache = [x[0] for x in webstat._get_customevents()] elif events[0] != '': keyevents_to_cache = [x for x in webstat.KEYEVENT_REPOSITORY.keys() if x in events] customevents_to_cache = [x[0] for x in webstat._get_customevents() if x in events] # Control so that we have valid event names if len(keyevents_to_cache + customevents_to_cache) == 0: # Oops, no events. Abort and display help. return False else: task_set_option("keyevents", keyevents_to_cache) task_set_option("customevents", customevents_to_cache) return True elif task_has_option("dump_config"): print("""\ [general] visitors_box = True search_box = True record_box = True bibsched_box = True basket_box = True apache_box = True uptime_box = True [webstat_custom_event_1] name = baskets param1 = action param2 = basket param3 = user [apache_log_analyzer] profile = nil nb-histogram-items-to-print = 20 exclude-ip-list = ("137.138.249.162") home-collection = "Atlantis Institute of Fictive Science" search-interface-url = "/?" detailed-record-url = "/%s/" search-engine-url = "/search?" search-engine-url-old-style = "/search.py?" basket-url = "/yourbaskets/" add-to-basket-url = "/yourbaskets/add" display-basket-url = "/yourbaskets/display" display-public-basket-url = "/yourbaskets/display_public" alert-url = "/youralerts/" display-your-alerts-url = "/youralerts/list" display-your-searches-url = "/youralerts/display" """ % CFG_SITE_RECORD) sys.exit(0) elif task_has_option("load_config"): from ConfigParser import ConfigParser conf = ConfigParser() conf.read(CFG_WEBSTAT_CONFIG_PATH) for section in conf.sections(): if section[:21] == "webstat_custom_event_": cols = [] name = "" for option, value in conf.items(section): if option == "name": name = value if option[:5] == "param": # add the column name in it's position index = int(option[-1]) - 1 while len(cols) <= index: cols.append("") cols[index] = value if name: res = run_sql("SELECT COUNT(id) FROM staEVENT WHERE id = %s", (name, )) if res[0][0] == 0: # name does not exist, create customevent webstat.create_customevent(name, name, cols) else: # name already exists, update customevent webstat.modify_customevent(name, cols=cols) sys.exit(0) else: # False means that the --help should be displayed return False
def task_parse_options(key, val, *_): """ Must be defined for bibtask to create a task """ if key in ("--all", "-a"): for rule_name in val.split(","): reset_rule_last_run(rule_name) elif key in ("--enable-rules", "-e"): task_set_option("enabled_rules", set(val.split(","))) elif key in ("--id", "-i"): task_set_option("record_ids", intbitset(split_cli_ids_arg(val))) elif key in ("--queue", "-q"): task_set_option("queue", val) elif key in ("--no-tickets", "-t"): task_set_option("no_tickets", True) elif key in ("--no-upload", "-b"): task_set_option("no_upload", True) elif key in ("--dry-run", "-n"): task_set_option("no_upload", True) task_set_option("no_tickets", True) elif key in ("--config", "-c"): task_set_option("config", val) else: raise StandardError("Error: Unrecognised argument '%s'." % key) return True
def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ('-n', '--number'): self.options['number'] = value return True return False """ ## A dictionary used for mapping CLI parameters to task_option keys+- parameter_mapping = { '-p': 'profile_name', '-i': 'input', '--input': 'input', '-o': 'output', '--output': 'output', '-m': 'mode', '--mode': 'mode', '--acodec': 'acodec', '--vcodec': 'vcodec', '--abitrate': 'abitrate', '--vbitrate': 'vbitrate', '--resolution': 'size', '--passes': 'passes', '--special': 'special', '--specialfirst': 'specialfirst', '--specialsecond': 'specialsecond', '--width': 'width', '--height': 'height', '--aspect': 'aspect', '--number': 'numberof', '--positions': 'positions', '-D': 'meta_dump', '-W': 'meta_input', '--dump': 'meta_dump', '--write': 'meta_input', '--newjobfolder': 'new_job_folder', '--oldjobfolder': 'old_job_folder', '--recid': 'recid', '--collection': 'collection', '--search': 'search' } ## PASSES ## ## Transform 'passes' to integer if key in ('--passes', ): try: value = int(value) except ValueError: write_message('Value of \'--passes\' must be an integer') return False ## HEIGHT, WIDTH ## if key in ('--height', '--width'): try: value = int(value) except ValueError: write_message('Value of \'--height\' or \'--width\'' ' must be an integer') return False ## META MODE ## ## Transform meta mode values to boolean if key in ('-D', '--dump'): if not value in ("ffprobe", "mediainfo", "pbcore"): write_message("Unknown dumping format, must be 'ffprobe', 'mediainfo' or 'pbcore'") return False if key in ('--substitute', ): value = True ## Transform the 'positions' parameter into a list if key in ('--positions',): try: parsed = json.loads(value) if type(parsed) is not type(list()): write_message( 'Value of \'--positions\' must be a json list' ) return False else: value = parsed except ValueError: write_message( 'Value of \'--positions\' must be a json list' ) return False ## NUMBEROF ## ## Transform 'number' to integer if key in ('--number'): try: value = int(value) except ValueError: write_message('Value of \'--number\' must be an integer') return False ## ASPECT ## if key in ('--aspect'): try: xasp, yasp = str(value).split(':') xasp = float(xasp) yasp = float(yasp) value = xasp / yasp except: write_message('Value of \'--aspect\' must be in \'4:3\' format') return False ## RECID ## if key in ('--recid'): try: value = int(value) except ValueError: write_message('Value of \'--recid\' must be an integer') return False ## GENERAL MAPPING ## ## For all general or other parameters just use the mapping dictionary if key in parameter_mapping: task_set_option(parameter_mapping[key], value) return True return False
def task_submit_elaborate_specific_parameter(key, value, opts, args): # pylint: disable-msg=W0613 """ Elaborate specific CLI parameters of BibReformat. @param key: a parameter key to check @param value: a value associated to parameter X{Key} @return: True for known X{Key} else False. """ if key in ("-a", "--all"): task_set_option("all", 1) elif key in ("--no-missing", ): task_set_option("ignore_without", 1) elif key in ("-c", "--collection"): task_set_option("collection", value) elif key in ("-n", "--noprocess"): task_set_option("noprocess", 1) elif key in ("-f", "--field"): task_set_option("field", value) elif key in ("-p", "--pattern"): task_set_option("pattern", value) elif key in ("-m", "--matching"): task_set_option("matching", value) elif key in ("-o", "--format"): input_formats = value.split(',') # check the validity of the given output formats invalid_format = check_validity_input_formats(input_formats) if invalid_format: try: raise Exception('Invalid output format.') except Exception: # pylint: disable-msg=W0703 from invenio.ext.logging import register_exception register_exception( prefix="The given output format '%s' is not available or " "is invalid. Please try again" % (invalid_format, ), alert_admin=True) return else: # every given format is available task_set_option("format", value) elif key in ("-i", "--id"): task_set_option("recids", value) else: return False return True
def task_submit_check_options(): """ Checks the tasks arguments for validity """ #----------------# # General Checks # #----------------# ## FFMPEG CONFIGURATION ## ## The status of ffmpeg should be checked before a task is submitted ## There is a minimum configuration that ffmpeg must be compiled with ## See bibencode_utils and bibencode_config config = check_ffmpeg_configuration() if config: ## Prints missing configuration string = '' for item in config: string += ('\t' + item + '\n') write_message( "FFmpeg options are missing. Please recompile and add:\n" + string) return False ## MODE ## ## Check if the mode is a valid if _topt('mode') is None: write_message('You have to specify a mode using \'-m MODE\'') return False if _topt('mode') not in CFG_BIBENCODE_VALID_MODES: write_message('%s is not a valid mode. Use one of %s' % (_topt('mode'), CFG_BIBENCODE_VALID_MODES)) return False ## INPUT ## ## Check if the input file is given and if it exists ## You should always use an absolute path to the file if _topt('mode') in ('encode', 'extract', 'meta', 'batch'): if _topt('input') is None: write_message('You must specify an input file using \'-i FILE\'') return False else: if not os.path.exists(_topt('input')): print(("The file %s does not exist" % _topt('input'))) return False ## OUTPUT ## ## Check if the output file is given and if it exists ## You should always use an absolute path to the file if _topt('mode') in ('encode', 'extract', 'meta'): if _topt('output') is None: write_message('No output file is given. Please specify with' ' \'-o NAME\'') return False #---------------# # Encoding Mode # #---------------# if _topt('mode') == 'encode': ## PROFILE ## Check for a valid profile if this is given if _topt('profile_name') is not None: if _topt('profile_name') not in get_encoding_profiles(): write_message( '%s not found in %s' % (_topt('profile_name'), CFG_BIBENCODE_PROFILES_ENCODING)) return False ## If the profile exists else: pass ## AUDIOCODEC ## ## Checks if the audiocodec is one of the predefined if _topt('acodec') is not None: if _topt('acodec') not in CFG_BIBENCODE_FFMPEG_VALID_ACODECS: write_message( '%s is not a valid audiocodec.\nAvailable codecs: %s' % (_topt('acodec'), CFG_BIBENCODE_FFMPEG_VALID_ACODECS)) return False ## VIDEOCODEC ## Checks if the videocodec is one of the predefined if _topt('vcodec') is not None: if _topt('vcodec') not in CFG_BIBENCODE_FFMPEG_VALID_VCODECS: write_message( '%s is not a valid videocodec.\nAvailable codecs: %s' % (_topt('vcodec'), CFG_BIBENCODE_FFMPEG_VALID_VCODECS)) return False ## SIZE ## ## Checks if the size is either WxH or an FFMPEG preset if _topt('size') is not None: if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')): if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES: write_message( '%s is not a valid frame size.\nEither use the' ' \'WxH\' notation or one of these values:\n%s' % (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES)) return False ## Check if both a size and vertical or horizontal resolution if (_topt('width') or _topt('height')) and _topt('size'): write_message('Options \'width\' and \'height\' can not be ' 'combined with \'resolution\'') return False ## PASSES ## ## If a number of passes is given, it should be either 1 oder 2. ## You could do an infinite number of passes with ffmpeg, ## But it will almost never make a difference above 2 passes. ## So, we currently only support 2 passes. if _topt('passes') is not None: if _topt('passes') not in (1, 2): write_message('The number of passes must be either 1 or 2') return False else: task_set_option('passes', 1) ## BITRATE ## ## Check if the given bitrate is either 1000 sth. or 1000k sth. if _topt('abitrate') is not None: pass if _topt('vbitrate') is not None: pass #-----------------# # Extraction Mode # #-----------------# elif _topt('mode') == 'extract': ## PROFILE ## ## If a profile is given, check its validity if _topt('profile_name') is not None: if _topt('profile_name') not in get_extract_profiles(): write_message( '%s not found in %s' % (_topt('profile_name'), CFG_BIBENCODE_PROFILES_EXTRACT)) return False ## If the profile exists else: pass ## You cannot give both a number and specific positions ## !!! Think about allowing both -> First extract by number, ## !!! then additionally the specific positions if (((_topt('numberof') is not None) and (_topt('positions') is not None)) or ((_topt('numberof') is None) and (_topt('positions') is None))): write_message('Please specify either a number of frames to ' 'take or specific positions') return False ## SIZE ## ## Checks if the size is either WxH or an FFMPEG specific value if _topt('size') is not None: if not CFG_BIBENCODE_FFMPEG_RE_VALID_SIZE.match(_topt('size')): if _topt('size') not in CFG_BIBENCODE_FFMPEG_VALID_SIZES: write_message( '%s is not a valid frame size.\nEither use the' '\'WxH\' notation or one of these valus:\n%s' % (_topt('size'), CFG_BIBENCODE_FFMPEG_VALID_SIZES)) return False #---------------# # Metadata Mode # #---------------# elif _topt('mode') == 'meta': ## You have to give exactly one meta suboption if not _xor(_topt('meta_input'), _topt('meta_dump')): write_message("You can either dump or write metadata") return False ## METADATA INPUT ## if _topt('meta_input') is not None: ## Check if this is either a filename (that should exist) ## or if this a jsonic metadata notation if os.path.exists(_topt('meta_input')): pass else: try: metadict = json.loads(_topt('meta_input')) task_set_option('meta_input', metadict) except ValueError: write_message( 'The value %s of the \'--meta\' parameter is ' 'neither a valid filename nor a jsonic dict' % _topt('meta_input')) return False #------------# # Batch Mode # #------------# elif _topt('mode') == 'batch': if _topt('collection') and _topt('search'): write_message('You can either use \'search\' or \'collection\'') return False elif _topt('collection'): template = json_decode_file(_topt('input')) print('\n') print("#---------------------------------------------#") print("# YOU ARE ABOUT TO UPDATE A WHOLE COLLECTION #") print("#---------------------------------------------#") print('\n') print('The selected template file contains:') pprint(template) print('\n') elif _topt('search'): template = json_decode_file(_topt('input')) message = "# YOU ARE ABOUT TO UPDATE RECORDS MATCHING '%s' #" % _topt( 'search') print('\n') print(("#" + "-" * (len(message) - 2) + "#")) print(message) print(("#" + "-" * (len(message) - 2) + "#")) print('\n') print('The selected template file contains:') pprint(template) print('\n') #-------------# # Daemon Mode # #-------------# elif _topt('mode') == 'daemon': task_set_task_param('task_specific_name', 'daemon') ## You can either give none or both folders, but not only one if _xor(_topt('new_job_folder'), _topt('old_job_folder')): write_message('When specifying folders for the daemon mode, you ' 'have to specify both the folder for the new jobs ' 'and the old ones') return False ## If every check went fine return True
def _task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually, it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. """ if key in ("--update-personid",): bibtask.task_set_option("update_personid", True) elif key in ("--record-ids", '-i'): if value.count("="): value = value[1:] value = value.split(",") bibtask.task_set_option("record_ids", value) elif key in ("--all-records",): bibtask.task_set_option("all_records", True) elif key in ("--disambiguate",): bibtask.task_set_option("disambiguate", True) elif key in ("--merge",): bibtask.task_set_option("merge", True) elif key in ("--from-scratch",): bibtask.task_set_option("from_scratch", True) else: return False return True