def bulk(collection=None): _logger.info('Running as bulk') if COUNTER_COMPLIANT: ts = TimedSet(expired=checkdatelock) ac = AccessChecker(collection) proc_coll = get_proc_collection() proc_robots_coll = get_proc_robots_collection() for logfile in os.popen('ls %s/*' % LOG_DIR): logfile = logfile.strip() # Verifica se arquivo já foi processado. if proc_coll.find({'file_name': logfile}).count() > 0: _logger.debug('File already processe %s' % logfile) continue # Registra em base de dados de arquivos processados o novo arquivo. _logger.info("Processing: %s" % logfile) proc_coll.insert({'file_name': logfile}) rq = Local(MONGO_URI, collection) with open(logfile, 'rb') as f: log_file_line = 0 for raw_line in f: log_file_line += 1 _logger.debug("Reading line {0} from file {1}".format(str(log_file_line), logfile)) parsed_line = ac.parsed_access(raw_line) if not parsed_line: continue if COUNTER_COMPLIANT: # Counter Mode Accesses locktime = 10 if parsed_line['access_type'] == "PDF": locktime = 30 try: lockid = '_'.join([parsed_line['ip'], parsed_line['code'], parsed_line['script']]) ts.add(lockid, parsed_line['iso_datetime'], locktime) register_access(rq, parsed_line) except ValueError: continue else: # SciELO Mode Accesses register_access(rq, parsed_line) rq.send(slp=SLEEP) del(rq)
def test_expiration_custom_timeout(self): ts = TimedSet(expired=checkdatelock) ts.add('art1', '2013-05-29T00:01:01', 30) self.assertTrue(ts._items, {'art1': '29/May/2013:00:01:01'}) with self.assertRaises(ValueError): ts.add('art1', '2013-05-29T00:01:031') self.assertTrue(ts._items, {'art1': '29/May/2013:00:01:01'}) ts.add('art1', '2013-05-29T00:01:32') self.assertTrue(ts._items, {'art1': '29/May/2013:00:01:32'})