Пример #1
0
def main():
    global r
    logging.config.fileConfig(path_to_cfg)
    re.set_fallback_notification(re.FALLBACK_EXCEPTION)

    # which queues to check and the function to call
    queue_funcs = {'report': 'get_reports',
                   'spam': 'get_mod_queue',
                   'submission': 'get_new',
                   'comment': 'get_comments'}

    while True:
        try:
            r = praw.Reddit(user_agent=cfg_file.get('reddit', 'user_agent'))
            logging.info('Logging in as {0}'
                         .format(cfg_file.get('reddit', 'username')))
            r.login(cfg_file.get('reddit', 'username'),
                    cfg_file.get('reddit', 'password'))
            sr_dict, cond_dict = initialize(queue_funcs.keys())
            break
        except Exception as e:
            logging.error('ERROR: {0}'.format(e))

    run_counter = 0
    while True:
        run_counter += 1
        try:
            # only check reports every 10 runs
            # sleep afterwards in case ^C is needed
            if run_counter % 10 == 0:
                check_queues(queue_funcs, sr_dict, cond_dict)

                Condition.clear_standard_cache()
                if process_messages():
                    sr_dict, cond_dict = initialize(queue_funcs.keys(),
                                                    reload_mod_subs=False)
                logging.info('Sleeping ({0})'.format(datetime.now()))
                sleep(5)
                run_counter = 0
            else:
                check_queues({q: queue_funcs[q]
                              for q in queue_funcs
                              if q != 'report'},
                             sr_dict, cond_dict)
                if process_messages():
                    sr_dict, cond_dict = initialize(queue_funcs.keys(),
                                                    reload_mod_subs=False)
        except (praw.errors.ModeratorRequired,
                praw.errors.ModeratorOrScopeRequired,
                HTTPError) as e:
            if not isinstance(e, HTTPError) or e.response.status_code == 403:
                logging.info('Re-initializing due to {0}'.format(e))
                sr_dict, cond_dict = initialize(queue_funcs.keys())
        except KeyboardInterrupt:
            raise
        except Exception as e:
            logging.error('ERROR: {0}'.format(e))
            session.rollback()
Пример #2
0
Requirements:
 - `treelib`: `pip install treelib`

"""
#    Copyright (C) 2016 by
#    Rion Brattig Correia <*****@*****.**>
#    Ian B. Wood <*****@*****.**>
#    All rights reserved.
#    MIT license.
from treelib import Tree
try:
    import re2 as re
except ImportError:
    import re
else:
    re.set_fallback_notification(re.FALLBACK_WARNING)
from nltk.tokenize import PunktSentenceTokenizer, TweetTokenizer, sent_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer

__author__ = """\n""".join(
    ['Rion Brattig Correia <*****@*****.**>', 'Ian B. Wood <*****@*****.**>'])

__all__ = ['TermDictionaryParser']
#


class Match(object):
    """
    """
    def __init__(self, id=None, tokens=tuple, si=None, wi=None, ti=None):
        self.id = id
Пример #3
0
import eventlet
urllib2 = eventlet.import_patched('urllib2')
eventlet.monkey_patch()

unionpool = eventlet.GreenPool(10)
"""


try:
	import re2 as re
except ImportError:
	import re
	print "Using standard library regexes"
else:
	print "Using Google RE2 regexes (fallback to standard library)"
	re.set_fallback_notification(re.FALLBACK_WARNING)


import sys, os, shutil, types, time, urllib2
from Cheetah.Template import Template
from glob import glob
from model import *
from sqlobject.dberrors import DuplicateEntryError
from BeautifulSoup import BeautifulSoup

defaultencoding = 'iso-8859-1'
#defaultencoding = 'utf-8'
prefix = "http://m.assetbar.com/achewood/"

# added the explicit match for question marks to keep the urlopen() in process_monthly() from 404ing
# maybe the fine men and women at 'asset bar' changed their url scheme?
Пример #4
0
import it.
"""
from timeit import Timer
import simplejson

import re2
import re
try:
    import regex
except ImportError:
    regex = None

import os
import gzip

re2.set_fallback_notification(re2.FALLBACK_EXCEPTION)

os.chdir(os.path.dirname(__file__) or '.')

tests = {}

setup_code = """\
import re2
import re
from __main__ import tests, current_re
test = tests[%r]
"""

current_re = [None]

Пример #5
0
import it.
"""
from timeit import Timer
import simplejson

import re2
import re
try:
    import regex
except ImportError:
    regex = None

import os
import gzip

re2.set_fallback_notification(re2.FALLBACK_EXCEPTION)

os.chdir(os.path.dirname(__file__) or '.')

tests = {}

setup_code = """\
import re2
import re
from __main__ import tests, current_re
test = tests[%r]
"""

current_re = [None]

Пример #6
0
def main():
    global r
    logging.config.fileConfig(path_to_cfg)
    re.set_fallback_notification(re.FALLBACK_EXCEPTION)

    # which queues to check and the function to call
    queue_funcs = {'report': 'get_reports',
                   'spam': 'get_mod_queue',
                   'submission': 'get_new',
                   'comment': 'get_comments'}

    while True:
        try:
            r = praw.Reddit(user_agent=cfg_file.get('reddit', 'user_agent'))
            logging.info('Logging in as {0}'
                         .format(cfg_file.get('reddit', 'username')))
            r.login(cfg_file.get('reddit', 'username'),
                    cfg_file.get('reddit', 'password'))
            sr_dict = get_enabled_subreddits()
            Condition.update_standards()
            cond_dict = load_all_conditions(sr_dict, queue_funcs.keys())
            break
        except Exception as e:
            logging.error('ERROR: {0}'.format(e))

    reports_mins = int(cfg_file.get('reddit', 'reports_check_period_mins'))
    reports_check_period = timedelta(minutes=reports_mins)
    last_reports_check = time()

    while True:
        try:
            sr_dict = get_enabled_subreddits(reload_mod_subs=False)

            # if the standard conditions have changed, reinit all conditions
            if Condition.update_standards():
                logging.info('Updating standard conditions from database')
                cond_dict = load_all_conditions(sr_dict, queue_funcs.keys())

            # check reports if past checking period
            if elapsed_since(last_reports_check) > reports_check_period:
                last_reports_check = time()
                check_queues({'report': queue_funcs['report']},
                             sr_dict, cond_dict)
                             
            check_queues({q: queue_funcs[q]
                          for q in queue_funcs
                          if q != 'report'},
                         sr_dict, cond_dict)

            updated_srs = process_messages()
            if updated_srs:
                if any(sr not in sr_dict for sr in updated_srs):
                    sr_dict = get_enabled_subreddits(reload_mod_subs=True)
                else:
                    sr_dict = get_enabled_subreddits(reload_mod_subs=False)
                for sr in updated_srs:
                    update_conditions_for_sr(cond_dict,
                                             queue_funcs.keys(),
                                             sr_dict[sr])
        except (praw.errors.ModeratorRequired,
                praw.errors.ModeratorOrScopeRequired,
                HTTPError) as e:
            if not isinstance(e, HTTPError) or e.response.status_code == 403:
                logging.info('Re-initializing due to {0}'.format(e))
                sr_dict = get_enabled_subreddits()
        except KeyboardInterrupt:
            raise
        except Exception as e:
            logging.error('ERROR: {0}'.format(e))
            session.rollback()
Пример #7
0
def main():
    global r
    logging.config.fileConfig(path_to_cfg)
    re.set_fallback_notification(re.FALLBACK_EXCEPTION)

    # which queues to check and the function to call
    queue_funcs = {
        'report': 'get_reports',
        'spam': 'get_mod_queue',
        'submission': 'get_new',
        'comment': 'get_comments'
    }

    while True:
        try:
            r = praw.Reddit(user_agent=cfg_file.get('reddit', 'user_agent'))
            logging.info('Logging in as {0}'.format(
                cfg_file.get('reddit', 'username')))
            r.login(cfg_file.get('reddit', 'username'),
                    cfg_file.get('reddit', 'password'))
            sr_dict = get_enabled_subreddits()
            Condition.update_standards()
            cond_dict = load_all_conditions(sr_dict, queue_funcs.keys())
            break
        except Exception as e:
            logging.error('ERROR: {0}'.format(e))

    reports_mins = int(cfg_file.get('reddit', 'reports_check_period_mins'))
    reports_check_period = timedelta(minutes=reports_mins)
    last_reports_check = time()

    while True:
        try:
            sr_dict = get_enabled_subreddits(reload_mod_subs=False)

            # if the standard conditions have changed, reinit all conditions
            if Condition.update_standards():
                logging.info('Updating standard conditions from database')
                cond_dict = load_all_conditions(sr_dict, queue_funcs.keys())

            # check reports if past checking period
            if elapsed_since(last_reports_check) > reports_check_period:
                last_reports_check = time()
                check_queues({'report': queue_funcs['report']}, sr_dict,
                             cond_dict)

            check_queues(
                {q: queue_funcs[q]
                 for q in queue_funcs if q != 'report'}, sr_dict, cond_dict)

            updated_srs = process_messages()
            if updated_srs:
                if any(sr not in sr_dict for sr in updated_srs):
                    sr_dict = get_enabled_subreddits(reload_mod_subs=True)
                else:
                    sr_dict = get_enabled_subreddits(reload_mod_subs=False)
                for sr in updated_srs:
                    update_conditions_for_sr(cond_dict, queue_funcs.keys(),
                                             sr_dict[sr])
        except (praw.errors.ModeratorRequired,
                praw.errors.ModeratorOrScopeRequired, HTTPError) as e:
            if not isinstance(e, HTTPError) or e.response.status_code == 403:
                logging.info('Re-initializing due to {0}'.format(e))
                sr_dict = get_enabled_subreddits()
        except KeyboardInterrupt:
            raise
        except Exception as e:
            logging.error('ERROR: {0}'.format(e))
            session.rollback()