def save_item_correlation(self, subtype, obj_id, item_id, item_date): self.update_correlation_daterange(subtype, obj_id, item_date) # global set r_serv_metadata.sadd( 'set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), item_id) # daily r_serv_metadata.hincrby( '{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, 1) # all type r_serv_metadata.zincrby( '{}_all:{}'.format(self.correlation_name, subtype), obj_id, 1) ## object_metadata # item r_serv_metadata.sadd( 'item_{}_{}:{}'.format(self.correlation_name, subtype, item_id), obj_id) # domain if item_basic.is_crawled(item_id): domain = item_basic.get_item_domain(item_id) self.save_domain_correlation(domain, subtype, obj_id)
def add_obj_tag(object_type, object_id, tag, obj_date=None): if object_type == "item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if obj_date is None: raise ValueError("obj_date is None") # add tag r_serv_metadata.sadd('tag:{}'.format(object_id), tag) r_serv_tags.sadd('{}:{}'.format(tag, obj_date), object_id) # add domain tag if item_basic.is_crawled( object_id ) and tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': domain = item_basic.get_item_domain(object_id) add_tag("domain", tag, domain) else: r_serv_metadata.sadd('tag:{}'.format(object_id), tag) r_serv_tags.sadd('{}:{}'.format(object_type, tag), object_id)
def get_item_domain(item_id): return item_basic.get_item_domain(item_id)