def main(args=None, stdin=sys.stdin): if args is None: args = sys.argv[1:] options, args = parser.parse_args(args) if len(args) < 1: parser.error('You must give a DB_CONNECTION string') insert_count = int(options.batch) request_tracker = RequestTracker(args[0], options.table_prefix) done = False while not done: done = True for index, line in enumerate(stdin): try: request_tracker.import_apache_line( line, default_host=options.host, default_scheme=options.scheme) except ValueError, e: print >> sys.stdout, str(e) if not index % 1000: sys.stdout.write('.') sys.stdout.flush() if index > insert_count: done = False break sys.stdout.write('writing db...\n') sys.stdout.flush() def writer(i=None, total=0): if i and not i % 1000: sys.stdout.write('.') sys.stdout.flush() if i is None: sys.stdout.write('write...') sys.stdout.flush() request_tracker.write_pending(writer) if not done: sys.stdout.write('\ncontinuing...') sys.stdout.flush() import gc gc.collect()
class StatusWatcher(object): """Middleware that tracks requests""" def __init__(self, app, db, table_prefix='', serialize_time=120, serialize_requests=100, _synchronous=False): """This wraps the `app` and saves data about each request. data is stored in `vaineye.model.RequestTracker`, instantiated with the `db` SQLAlchemy connection string. Periodically data is written to the database (every `serialize_time` seconds, or `serialize_requests` requests, whichever comes first). This writing happens in a background thread. For debugging purposes you can set `_synchronous` to True to have requests written out every request without spawning a thread.""" self.app = app self.request_tracker = RequestTracker(db, table_prefix=table_prefix) self.serialize_time = serialize_time self.serialize_requests = serialize_requests self._synchronous = _synchronous self.write_pending_lock = threading.Lock() self.last_written = time.time() self.request_count = 0 if not _synchronous: atexit.register(self.write_pending) def write_pending(self): """Write all pending requests""" if not self.write_pending_lock.acquire(False): # Someone else is currently serializing return try: self.request_tracker.write_pending() self.last_written = time.time() self.request_counts = 0 finally: self.write_pending_lock.release() def write_in_thread(self): """Write all pending requests, in a background thread""" t = threading.Thread(target=self.write_pending) t.start() def __call__(self, environ, start_response): """WSGI interface""" self.request_count += 1 if not self._synchronous and ( self.request_count > self.serialize_requests or time.time() - self.last_written > self.serialize_time): self.write_in_thread() start_time = time.time() def repl_start_response(status, headers, exc_info=None): end_time = time.time() if 'HTTP_X_REAL_IP' in environ: # key for remote ip when nginx is used environ['REMOTE_ADDR'] = environ['HTTP_X_REAL_IP'] self.request_tracker.add_request( environ=environ, start_time=start_time, end_time=end_time, status=status, response_headers=headers) if self._synchronous: self.write_pending() return start_response(status, headers, exc_info) return self.app(environ, repl_start_response)