示例#1
0
文件: replay.py 项目: Havvy/madcow
    def handle(self, *args, **kwargs):
        status = os.EX_OK
        log = get_logger('replay', stream=sys.stderr)
        try:
            if args:
                self.disable_auto_time(log, Link)
                links = Link.objects.order_by('-created')
                last_link = links[0].created if links else None

                logfiles = []
                for arg in args:
                    if os.path.isdir(arg):
                        it = self.iterlogs(arg)
                    else:
                        it = [arg]
                    for logfile in it:
                        basename = os.path.basename(logfile)
                        match = _logfile_re.search(basename)
                        if match is not None:
                            groups = match.groups()
                            logfiles.append((datetime.date(*imap(int, groups[1:])), '#' + groups[0], logfile))

                if logfiles:
                    logfiles.sort()
                    for created_date, channel, logfile in logfiles:
                        log.info('reading %s [%s, %s]', logfile, channel, created_date)
                        with open(logfile, 'rb') as fp:
                            for line in fp:
                                line = line.rstrip('\r\n')
                                match = _logline_re.search(line)
                                if match is not None:
                                    groups = match.groups()
                                    nick = groups[3] or groups[4]
                                    if nick.lower() != 'madcow':
                                        created_time = datetime.time(*imap(int, groups[:3]))
                                        created = datetime.datetime.combine(created_date, created_time)
                                        if last_link is not None and created <= last_link:
                                            log.warn('Skipping log record %s@%s: in the past', nick, created)
                                        else:
                                            for url in _url_re.findall(groups[5]):
                                                log.info('LINK: %s by %s @ %s', url, nick, created)
                                                try:
                                                    link = Link.objects.add_link(url, nick, channel, 'irc',
                                                                                created=created, modified=created)
                                                    log.info('Processed link to: %r', link)
                                                except OldMeme, old:
                                                    log.warn('link was an old meme: %s' % url)
                                                except BlackListError, exc:
                                                    log.warn('link is blacklisted: %s' % url)
示例#2
0
        def wrapper_func(*args, **kwargs):
            logger = kwargs.pop('logger', None)
            if logger is None:
                logger_kwargs = dict(default_logger_kwargs)
                logger_kwargs.setdefault('stream', kwargs.pop('log_stream', None))
                logger = get_logger(*logger_args, **logger_kwargs)

            try:
                with TrapErrors():
                    if method:
                        args[0].log = logger
                    else:
                        args = (logger,) + args
                    return wrapped_func(*args, **kwargs)

            except TrapError, exc:
                logger.error('Unhandled exception in %s', wrapped_func.func_name)
                for line in traceback.format_exception(*exc.args):
                    logger.error(text.chomp(line))
                reraise(*exc.args)
示例#3
0
        def wrapper_func(*args, **kwargs):
            logger = kwargs.pop('logger', None)
            if logger is None:
                logger_kwargs = dict(default_logger_kwargs)
                logger_kwargs.setdefault('stream',
                                         kwargs.pop('log_stream', None))
                logger = get_logger(*logger_args, **logger_kwargs)

            try:
                with TrapErrors():
                    if method:
                        args[0].log = logger
                    else:
                        args = (logger, ) + args
                    return wrapped_func(*args, **kwargs)

            except TrapError, exc:
                logger.error('Unhandled exception in %s',
                             wrapped_func.func_name)
                for line in traceback.format_exception(*exc.args):
                    logger.error(text.chomp(line))
                reraise(*exc.args)
示例#4
0
    def handle(self, *args, **kwargs):
        status = os.EX_OK
        log = get_logger('replay', stream=sys.stderr)
        try:
            if args:
                self.disable_auto_time(log, Link)
                links = Link.objects.order_by('-created')
                last_link = links[0].created if links else None

                logfiles = []
                for arg in args:
                    if os.path.isdir(arg):
                        it = self.iterlogs(arg)
                    else:
                        it = [arg]
                    for logfile in it:
                        basename = os.path.basename(logfile)
                        match = _logfile_re.search(basename)
                        if match is not None:
                            groups = match.groups()
                            logfiles.append(
                                (datetime.date(*imap(int, groups[1:])),
                                 '#' + groups[0], logfile))

                if logfiles:
                    logfiles.sort()
                    for created_date, channel, logfile in logfiles:
                        log.info('reading %s [%s, %s]', logfile, channel,
                                 created_date)
                        with open(logfile, 'rb') as fp:
                            for line in fp:
                                line = line.rstrip('\r\n')
                                match = _logline_re.search(line)
                                if match is not None:
                                    groups = match.groups()
                                    nick = groups[3] or groups[4]
                                    if nick.lower() != 'madcow':
                                        created_time = datetime.time(
                                            *imap(int, groups[:3]))
                                        created = datetime.datetime.combine(
                                            created_date, created_time)
                                        if last_link is not None and created <= last_link:
                                            log.warn(
                                                'Skipping log record %s@%s: in the past',
                                                nick, created)
                                        else:
                                            for url in _url_re.findall(
                                                    groups[5]):
                                                log.info(
                                                    'LINK: %s by %s @ %s', url,
                                                    nick, created)
                                                try:
                                                    link = Link.objects.add_link(
                                                        url,
                                                        nick,
                                                        channel,
                                                        'irc',
                                                        created=created,
                                                        modified=created)
                                                    log.info(
                                                        'Processed link to: %r',
                                                        link)
                                                except OldMeme, old:
                                                    log.warn(
                                                        'link was an old meme: %s'
                                                        % url)
                                                except BlackListError, exc:
                                                    log.warn(
                                                        'link is blacklisted: %s'
                                                        % url)
示例#5
0
class Command(BaseCommand):

    help = __doc__
    args = '<url> [url ...]'

    option_list = (
        make_option(
            '-s', '--scanner', metavar='<name>', help='scanner to test'),
        make_option(
            '-a',
            '--user-agent',
            metavar='<agent>',
            default=settings.SCANNER_USER_AGENT,
            help='use specified user agent or preset (default: %default)'),
        make_option(
            '-t',
            '--timeout',
            metavar='<seconds>',
            type='int',
            default=settings.SCANNER_TIMEOUT,
            help='network timeout for HTTP request (default: %default)'),
        make_option('-m',
                    '--max-read',
                    metavar='<bytes>',
                    type='int',
                    default=settings.SCANNER_MAX_READ,
                    help='maximum read size (default: %default)'),
        make_option('-i',
                    '--ipython',
                    dest='do_ipython',
                    default=False,
                    action='store_true',
                    help='open an ipython shell after link is processed'),
    ) + BaseCommand.option_list

    def handle(self, *urls, **kwargs):
        if not urls:
            raise CommandError('No URLs specified')

        scanner = kwargs.pop('scanner', None)
        if scanner is None:
            raise CommandError('Must specify a scanner to use')

        try:
            module = __import__('gruntle.memebot.scanner.' + scanner,
                                globals(), locals(), ['scanner'])
        except ImportError, exc:
            raise CommandError("Couldn't import %s: %s" % (scanner, exc))

        try:
            handler = module.scanner
        except AttributeError:
            raise CommandError('No scanner is configured there')

        user_agent = kwargs.pop('user_agent', None)
        if user_agent is None:
            user_agent = settings.SCANNER_USER_AGENT

        timeout = kwargs.pop('timeout', None)
        if timeout is None:
            timeout = settings.SCANNER_TIMEOUT

        max_read = kwargs.pop('max_read', None)
        if max_read is None:
            max_read = settings.SCANNER_MAX_READ

        do_ipython = kwargs.pop('do_ipython', False)

        browser = Browser(user_agent=user_agent,
                          timeout=timeout,
                          max_read=max_read)
        log = get_logger('scantest', append=True, stream=sys.stdout)
        for url in urls:
            try:
                with TrapErrors():
                    response = browser.open(url, follow_meta_redirect=True)
                    if not response.is_valid:
                        raise ValueError('Response invalid')

                    result = handler.scan(response, log, browser)

                    log.info('Success: %r', result)

            except TrapError, exc:
                log.error('Problem parsing %r', url, exc_info=exc.args)

            if do_ipython:
                ipython()