def test_get_modified_records(self): self.assertEqual(len(get_modified_records_since( datetime.datetime.now() - datetime.timedelta(8) )), 8) self.assertEqual(len(get_modified_records_since( datetime.datetime.now() - datetime.timedelta(2) )), 3) self.assertEqual(len(get_modified_records_since( datetime.datetime.now() - datetime.timedelta(0) )), 0)
def test_get_modified_records(self): self.assertEqual( len( get_modified_records_since(datetime.datetime.now() - datetime.timedelta(8))), 8) self.assertEqual( len( get_modified_records_since(datetime.datetime.now() - datetime.timedelta(2))), 3) self.assertEqual( len( get_modified_records_since(datetime.datetime.now() - datetime.timedelta(0))), 0)
def get_recids_for_rules(rules): """ Generates the final list of record IDs to load. @param rules dict of rules {rule_name: rule_dict} @type rules: dict of rules @return dict {rule_name: array of record IDs} """ override_record_ids = task_get_option("record_ids") recids = {} for rule_name, rule in rules.iteritems(): if "filter_pattern" in rule or "filter_collection" in rule: query = rule.get("filter_pattern", '') if "filter_collection" in rule: collections = rule["filter_collection"].split() else: collections = None write_message("Performing given search query: '%s'" % query) if collections: result = perform_request_search(p=query, of='intbitset', wl=rule.get('filter_limit', 0), f=rule.get( 'filter_field', None), c=collections) else: result = search_pattern( p=query, wl=rule.get('filter_limit', 0), f=rule.get('filter_field', None), ) else: result = intbitset(trailing_bits=True) if override_record_ids is not None: result.intersection_update(override_record_ids) else: last_run = get_rule_lastrun(rule_name) modified_recids = get_modified_records_since(last_run) if not "consider_deleted_records" in rule: modified_recids -= search_unit_in_bibxxx(p='DELETED', f='980__%', type='e') if CFG_CERN_SITE: modified_recids -= search_unit_in_bibxxx(p='DUMMY', f='980__%', type='e') result.intersection_update(modified_recids) recids[rule_name] = result return recids
def get_recids_for_rules(rules): """ Generates the final list of record IDs to load. @param rules dict of rules {rule_name: rule_dict} @type rules: dict of rules @return dict {rule_name: array of record IDs} """ override_record_ids = task_get_option("record_ids") recids = {} for rule_name, rule in rules.iteritems(): if "filter_pattern" in rule: query = rule["filter_pattern"] if "filter_collection" in rule: collections = rule["filter_collection"].split() else: collections = None write_message("Performing given search query: '%s'" % query) if collections: result = perform_request_search( p=query, of='intbitset', wl=rule.get('filter_limit', 0), f=rule.get('filter_field', None), c=collections ) else: result = search_pattern( p=query, wl=rule.get('filter_limit', 0), f=rule.get('filter_field', None), ) else: result = intbitset(trailing_bits=True) if override_record_ids is not None: result.intersection_update(override_record_ids) else: last_run = get_rule_lastrun(rule_name) modified_recids = get_modified_records_since(last_run) if not "consider_deleted_records" in rule: modified_recids -= search_unit_in_bibxxx(p='DELETED', f='980__%', type='e') if CFG_CERN_SITE: modified_recids -= search_unit_in_bibxxx(p='DUMMY', f='980__%', type='e') result.intersection_update(modified_recids) recids[rule_name] = result return recids
def _get_recids_foreach_ontology(recids=None, collections=None, taxonomy=None): """Returns an array containing hash objects containing the collection, its corresponding ontology and the records belonging to the given collection.""" rec_onts = [] # User specified record IDs. if recids: rec_onts.append({ 'ontology': taxonomy, 'collection': None, 'recIDs': recids, }) return rec_onts # User specified collections. if collections: for collection in collections: records = get_collection_reclist(collection) if records: rec_onts.append({ 'ontology': taxonomy, 'collection': collection, 'recIDs': records }) return rec_onts # Use rules found in collection_clsMETHOD. result = run_sql( "SELECT clsMETHOD.name, clsMETHOD.last_updated, " "collection.name FROM clsMETHOD JOIN collection_clsMETHOD ON " "clsMETHOD.id=id_clsMETHOD JOIN collection ON " "id_collection=collection.id") for ontology, date_last_run, collection in result: records = get_collection_reclist(collection) if records: if not date_last_run: bibtask.write_message( "INFO: Collection %s has not been previously " "analyzed." % collection, stream=sys.stderr, verbose=3) modified_records = intbitset(run_sql("SELECT id FROM bibrec")) elif bibtask.task_get_option('force'): bibtask.write_message( "INFO: Analysis is forced for collection %s." % collection, stream=sys.stderr, verbose=3) modified_records = intbitset(run_sql("SELECT id FROM bibrec")) else: modified_records = bibtask.get_modified_records_since( date_last_run) records &= modified_records if records: rec_onts.append({ 'ontology': ontology, 'collection': collection, 'recIDs': records }) else: bibtask.write_message( "WARNING: All records from collection '%s' have " "already been analyzed for keywords with ontology '%s' " "on %s." % (collection, ontology, date_last_run), stream=sys.stderr, verbose=2) else: bibtask.write_message( "ERROR: Collection '%s' doesn't contain any record. " "Cannot analyse keywords." % (collection, ), stream=sys.stderr, verbose=0) return rec_onts
def _get_recids_foreach_ontology(recids=None, collections=None, taxonomy=None): """Returns an array containing hash objects containing the collection, its corresponding ontology and the records belonging to the given collection.""" rec_onts = [] # User specified record IDs. if recids: rec_onts.append({ 'ontology': taxonomy, 'collection': None, 'recIDs': recids, }) return rec_onts # User specified collections. if collections: for collection in collections: records = get_collection_reclist(collection) if records: rec_onts.append({ 'ontology': taxonomy, 'collection': collection, 'recIDs': records }) return rec_onts # Use rules found in collection_clsMETHOD. result = run_sql("SELECT clsMETHOD.name, clsMETHOD.last_updated, " "collection.name FROM clsMETHOD JOIN collection_clsMETHOD ON " "clsMETHOD.id=id_clsMETHOD JOIN collection ON " "id_collection=collection.id") for ontology, date_last_run, collection in result: records = get_collection_reclist(collection) if records: if not date_last_run: bibtask.write_message("INFO: Collection %s has not been previously " "analyzed." % collection, stream=sys.stderr, verbose=3) modified_records = intbitset(run_sql("SELECT id FROM bibrec")) elif bibtask.task_get_option('force'): bibtask.write_message("INFO: Analysis is forced for collection %s." % collection, stream=sys.stderr, verbose=3) modified_records = intbitset(run_sql("SELECT id FROM bibrec")) else: modified_records = bibtask.get_modified_records_since(date_last_run) records &= modified_records if records: rec_onts.append({ 'ontology': ontology, 'collection': collection, 'recIDs': records }) else: bibtask.write_message("WARNING: All records from collection '%s' have " "already been analyzed for keywords with ontology '%s' " "on %s." % (collection, ontology, date_last_run), stream=sys.stderr, verbose=2) else: bibtask.write_message("ERROR: Collection '%s' doesn't contain any record. " "Cannot analyse keywords." % (collection,), stream=sys.stderr, verbose=0) return rec_onts