def parse_option(key, value, dummy, args):
    """Parse command line options"""

    if args:
        # There should be no standalone arguments for any refextract job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
    elif key == '--rebuild':
        task_set_option('rebuild', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        collections.update(split_cli_ids_arg(value))
    elif key in ('-r', '--recids'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_cli_ids_arg(value))

    return True
示例#2
0
def parse_option(key, value, dummy, args):
    """Parse command line options"""

    if args:
        # There should be no standalone arguments for any refextract job
        # This will catch args before the job is shipped to Bibsched
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-a', '--new'):
        task_set_option('new', True)
    elif key in ('-m', '--modified'):
        task_set_option('modified', True)
    elif key == '--rebuild':
        task_set_option('rebuild', True)
    elif key in ('-c', '--collections'):
        collections = task_get_option('collections')
        if not collections:
            collections = set()
            task_set_option('collections', collections)
        collections.update(split_cli_ids_arg(value))
    elif key in ('-r', '--recids'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_cli_ids_arg(value))

    return True
示例#3
0
def task_parse_options(key, val, *_):
    """ Must be defined for bibtask to create a task """

    if key in ("--all", "-a"):
        task_set_option("reset_rules", set(val.split(",")))
    elif key in ("--enable-rules", "-e"):
        task_set_option("enabled_rules", set(val.split(",")))
    elif key in ("--id", "-i"):
        task_set_option("record_ids", intbitset(split_cli_ids_arg(val)))
    elif key in ("--queue", "-q"):
        task_set_option("queue", val)
    elif key in ("--no-tickets", "-t"):
        task_set_option("no_tickets", True)
    elif key in ("--ticket-creation-policy", "-p"):
        task_set_option("ticket_creation_policy", val)
    elif key in ("--no-upload", "-b"):
        task_set_option("no_upload", True)
    elif key in ("--dry-run", "-n"):
        task_set_option("no_upload", True)
        task_set_option("no_tickets", True)
    elif key in ("--config", "-c"):
        task_set_option("config", val)
    elif key in ("--notimechange", ):
        task_set_option("notimechange", True)
    else:
        raise StandardError("Error: Unrecognised argument '%s'." % key)
    return True
示例#4
0
def cb_parse_option(key, value, opts, args):
    """Parse command line options"""
    if args:
        # There should be no standalone arguments
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-i', '--id'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_cli_ids_arg(value))

    return True
示例#5
0
def cb_parse_option(key, value, opts, args):
    """Parse command line options"""
    if args:
        # There should be no standalone arguments
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-i', '--id'):
        recids = task_get_option('recids')
        if not recids:
            recids = set()
            task_set_option('recids', recids)
        recids.update(split_cli_ids_arg(value))

    return True
示例#6
0
def parse_option(key, value, opts, args):
    """
    Elaborate task submission parameter.
    """
    if args:
        # There should be no standalone arguments
        raise StandardError("Error: Unrecognised argument '%s'." % args[0])

    if key in ('-i', '--id'):
        recids = task_get_task_param('recids')
        if not recids:
            recids = set()
        task_set_task_param('recids', recids)
        recids.update(split_cli_ids_arg(value))
    elif key in ('-a', '--all'):
        task_set_task_param('all', True)

    return True
示例#7
0
def task_run_core():
    """Run the task by fetching arguments from the BibSched task queue.

    This is what BibSched will be invoking via daemon call.
    """
    fmts = task_get_option('format', 'HB,RECJSON')
    for fmt in fmts.split(','):
        last_updated = fetch_last_updated(fmt)
        write_message("last stored run date is %s" % last_updated)

        recids = intbitset()

        if task_has_option("all"):
            recids += all_records()

        if task_has_option("last"):
            recids += outdated_caches(fmt, last_updated)

        if task_has_option('ignore_without'):
            without_fmt = intbitset()
        else:
            without_fmt = missing_caches(fmt)
            recids += without_fmt

        cli_recids = split_cli_ids_arg(task_get_option('recids', ''))
        recids += cli_recids

        query_params = {
            'collection': task_get_option('collection', ''),
            'field': task_get_option('field', ''),
            'pattern': task_get_option('pattern', ''),
            'matching': task_get_option('matching', '')
        }
        recids += query_records(query_params)

        bibreformat_task(fmt, recids, without_fmt,
                         not task_has_option('noprocess'))

    return True
示例#8
0
def task_run_core():
    """Run the task by fetching arguments from the BibSched task queue.

    This is what BibSched will be invoking via daemon call.
    """
    fmts = task_get_option('format', 'HB,RECJSON')
    for fmt in fmts.split(','):
        last_updated = fetch_last_updated(fmt)
        write_message("last stored run date is %s" % last_updated)

        recids = intbitset()

        if task_has_option("all"):
            recids += all_records()

        if task_has_option("last"):
            recids += outdated_caches(fmt, last_updated)

        if task_has_option('ignore_without'):
            without_fmt = intbitset()
        else:
            without_fmt = missing_caches(fmt)
            recids += without_fmt

        cli_recids = split_cli_ids_arg(task_get_option('recids', ''))
        recids += cli_recids

        query_params = {'collection': task_get_option('collection', ''),
                        'field': task_get_option('field', ''),
                        'pattern': task_get_option('pattern', ''),
                        'matching': task_get_option('matching', '')}
        recids += query_records(query_params)

        bibreformat_task(fmt,
                         recids,
                         without_fmt,
                         not task_has_option('noprocess'))

    return True
示例#9
0
def task_parse_options(key, val, *_):
    """ Must be defined for bibtask to create a task """

    if key in ("--all", "-a"):
        for rule_name in val.split(","):
            reset_rule_last_run(rule_name)
    elif key in ("--enable-rules", "-e"):
        task_set_option("enabled_rules", set(val.split(",")))
    elif key in ("--id", "-i"):
        task_set_option("record_ids", intbitset(split_cli_ids_arg(val)))
    elif key in ("--queue", "-q"):
        task_set_option("queue", val)
    elif key in ("--no-tickets", "-t"):
        task_set_option("no_tickets", True)
    elif key in ("--no-upload", "-b"):
        task_set_option("no_upload", True)
    elif key in ("--dry-run", "-n"):
        task_set_option("no_upload", True)
        task_set_option("no_tickets", True)
    elif key in ("--config", "-c"):
        task_set_option("config", val)
    else:
        raise StandardError("Error: Unrecognised argument '%s'." % key)
    return True
示例#10
0
def task_run_core():
    """Runs the task by fetching arguments from the BibSched task queue.  This is what BibSched will be invoking via daemon call."""

    ## initialize parameters
    if task_get_option('format'):
        fmts = task_get_option('format')
    else:
        fmts = 'HB'  # default value if no format option given
    for fmt in fmts.split(','):
        last_updated = fetch_last_updated(fmt)
        write_message("last stored run date is %s" % last_updated)

        sql = {
            "all" : """SELECT br.id FROM bibrec AS br, bibfmt AS bf
                       WHERE bf.id_bibrec = br.id AND bf.format = '%s'""" % fmt,
            "last": """SELECT br.id FROM bibrec AS br
                       INNER JOIN bibfmt AS bf ON bf.id_bibrec = br.id
                       WHERE br.modification_date >= '%(last_updated)s'
                       AND bf.format='%(format)s'
                       AND bf.last_updated < br.modification_date""" \
                            % {'format': fmt,
                               'last_updated': last_updated.strftime('%Y-%m-%d %H:%M:%S')},
            "missing"  : """SELECT br.id
                            FROM bibrec as br
                            LEFT JOIN bibfmt as bf
                            ON bf.id_bibrec = br.id AND bf.format ='%s'
                            WHERE bf.id_bibrec IS NULL
                            AND br.id BETWEEN %%s AND %%s
                         """ % fmt,
        }
        sql_queries = []
        cds_query = {}
        if task_has_option("all"):
            sql_queries.append(sql['all'])
        if task_has_option("last"):
            sql_queries.append(sql['last'])
        if task_has_option("collection"):
            cds_query['collection'] = task_get_option('collection')
        else:
            cds_query['collection'] = ""

        if task_has_option("field"):
            cds_query['field']      = task_get_option('field')
        else:
            cds_query['field']      = ""

        if task_has_option("pattern"):
            cds_query['pattern']      = task_get_option('pattern')
        else:
            cds_query['pattern']      = ""

        if task_has_option("matching"):
            cds_query['matching']      = task_get_option('matching')
        else:
            cds_query['matching']      = ""

        if task_has_option("recids"):
            recids = list(split_cli_ids_arg(task_get_option('recids')))
        else:
            recids = []

    ### sql commands to be executed during the script run
    ###
        bibreformat_task(fmt, sql, sql_queries, cds_query, task_has_option('without'), not task_has_option('noprocess'), recids)
    return True
示例#11
0
 def test_complex(self):
     self.assertEqual(split_cli_ids_arg("1-1,7,10-11,4"), set([1, 4, 7, 10, 11]))
示例#12
0
 def test_multiple(self):
     self.assertEqual(split_cli_ids_arg("1,5,7"), set([1, 5, 7]))
示例#13
0
 def test_range(self):
     self.assertEqual(split_cli_ids_arg("1-5"), set([1, 2, 3, 4, 5]))
示例#14
0
 def test_one(self):
     self.assertEqual(split_cli_ids_arg("1"), set([1]))
示例#15
0
                    recid = get_record_from_doi(doi)
                except APSHarvesterSearchError, e:
                    write_message("Error while getting recid from %s: %s" %
                                  (doi, str(e)))
                    continue
                if not recid:
                    # Record not found on the system, we harvest from APS
                    write_message("No recid found, we get record from APS")
                    recid = None
                final_record_list.append(APSRecord(recid, doi))

        if len(recids) > 0:
            write_message("Parsing record IDs...")

            # We are doing rec ids
            recids = split_cli_ids_arg(recids)
            for recid in recids:
                final_record_list.append(APSRecord(recid))

        if query:
            write_message("Performing a search query...")

            # We are doing a search query, rg=0 allows the return of all results.
            result = perform_request_search(p=query,
                                            cc=CFG_APSHARVEST_SEARCH_COLLECTION,
                                            of='id',
                                            rg=0,
                                            wl=0)
            for recid in result:
                final_record_list.append(APSRecord(recid))
示例#16
0
                    recid = get_record_from_doi(doi)
                except APSHarvesterSearchError, e:
                    write_message("Error while getting recid from %s: %s" %
                                  (doi, str(e)))
                    continue
                if not recid:
                    # Record not found on the system, we harvest from APS
                    write_message("No recid found, we get record from APS")
                    recid = None
                final_record_list.append(APSRecord(recid, doi))

        if len(parameters.get("recids")) > 0:
            write_message("Parsing record IDs...")

            # We are doing rec ids
            recids = split_cli_ids_arg(parameters.get("recids"))
            for recid in recids:
                final_record_list.append(APSRecord(recid))

        if parameters.get("query"):
            write_message("Performing a search query...")

            # We are doing a search query, rg=0 allows the return of all results.
            result = perform_request_search(p=parameters.get("query"),
                                            cc=CFG_APSHARVEST_SEARCH_COLLECTION,
                                            of='id',
                                            rg=0,
                                            wl=0)
            for recid in result:
                final_record_list.append(APSRecord(recid))
示例#17
0
def task_run_core():
    """Runs the task by fetching arguments from the BibSched task queue.  This is what BibSched will be invoking via daemon call."""

    ## initialize parameters
    if task_get_option('format'):
        fmts = task_get_option('format')
    else:
        fmts = 'HB'  # default value if no format option given
    for fmt in fmts.split(','):
        last_updated = fetch_last_updated(fmt)
        write_message("last stored run date is %s" % last_updated)

        sql = {
            "all" : """SELECT br.id FROM bibrec AS br, bibfmt AS bf
                       WHERE bf.id_bibrec = br.id AND bf.format = '%s'""" % fmt,
            "last": """SELECT br.id FROM bibrec AS br
                       INNER JOIN bibfmt AS bf ON bf.id_bibrec = br.id
                       WHERE br.modification_date >= '%(last_updated)s'
                       AND bf.format='%(format)s'
                       AND bf.last_updated < br.modification_date""" \
                            % {'format': fmt,
                               'last_updated': last_updated.strftime('%Y-%m-%d %H:%M:%S')},
            "missing"  : """SELECT br.id
                            FROM bibrec as br
                            LEFT JOIN bibfmt as bf
                            ON bf.id_bibrec = br.id AND bf.format ='%s'
                            WHERE bf.id_bibrec IS NULL
                            AND br.id BETWEEN %%s AND %%s
                         """ % fmt,
        }
        sql_queries = []
        cds_query = {}
        if task_has_option("all"):
            sql_queries.append(sql['all'])
        if task_has_option("last"):
            sql_queries.append(sql['last'])
        if task_has_option("collection"):
            cds_query['collection'] = task_get_option('collection')
        else:
            cds_query['collection'] = ""

        if task_has_option("field"):
            cds_query['field'] = task_get_option('field')
        else:
            cds_query['field'] = ""

        if task_has_option("pattern"):
            cds_query['pattern'] = task_get_option('pattern')
        else:
            cds_query['pattern'] = ""

        if task_has_option("matching"):
            cds_query['matching'] = task_get_option('matching')
        else:
            cds_query['matching'] = ""

        if task_has_option("recids"):
            recids = list(split_cli_ids_arg(task_get_option('recids')))
        else:
            recids = []

    ### sql commands to be executed during the script run
    ###
        bibreformat_task(fmt, sql, sql_queries, cds_query,
                         task_has_option('without'),
                         not task_has_option('noprocess'), recids)
    return True
示例#18
0
                    recid = get_record_from_doi(doi)
                except APSHarvesterSearchError, e:
                    write_message("Error while getting recid from %s: %s" %
                                  (doi, str(e)))
                    continue
                if not recid:
                    # Record not found on the system, we harvest from APS
                    write_message("No recid found, we get record from APS")
                    recid = None
                final_record_list.append(APSRecord(recid, doi))

        if len(recids) > 0:
            write_message("Parsing record IDs...")

            # We are doing rec ids
            recids = split_cli_ids_arg(recids)
            for recid in recids:
                final_record_list.append(APSRecord(recid))

        if query:
            write_message("Performing a search query...")

            # We are doing a search query, rg=0 allows the return of all results.
            result = perform_request_search(
                p=query,
                cc=CFG_APSHARVEST_SEARCH_COLLECTION,
                of='id',
                rg=0,
                wl=0)
            for recid in result:
                final_record_list.append(APSRecord(recid))