Exemplo n.º 1
0
    def fetch_all(cls, filter_expr=None, filter_value=None, sort=None):
        # TODO: Make this use less resources.  Read options online and then use profiler to test each one.

        assert (filter_expr is None)==(filter_value is None)
        import settings, helpers

        def get_query():
            query = cls.all()
            if filter_expr is not None:
                assert filter_value is not None
                query = query.filter(filter_expr, filter_value)
            if sort is not None:
                query.order(sort)
            return query

        items = get_query().fetch(EXPECTED_UPPER_BOUND)
        
        if len(items) >= EXPECTED_UPPER_BOUND:
            if settings.DEBUG:
                assert False, "Upper bound is apparently not big enough."
            else:
                helpers.log("ERROR: Upper bound is apparently not big enough.")
                items = list( get_query() )

        if cls.default_sort_key_fn is not None:
            items.sort(key=cls.default_sort_key_fn)

        return tuple(items)
Exemplo n.º 2
0
    def set_up_environment (self, settings, ontodir):
        """
        Sets up the XDG_*_HOME variables and make sure the directories exist

        Settings should be a dict mapping schema names to dicts that hold the
        settings that should be changed in those schemas. The contents dicts
        should map key->value, where key is a key name and value is a suitable
        GLib.Variant instance.
        """

        helpers.log ("[Conf] Setting test environment...")

        for var, directory in TEST_ENV_DIRS.iteritems ():
            helpers.log ("export %s=%s" %(var, directory))
            self.__recreate_directory (directory)
            os.environ [var] = directory

        for directory in EXTRA_DIRS:
            self.__recreate_directory (directory)

        if ontodir:
            helpers.log ("export %s=%s" % ("TRACKER_DB_ONTOLOGIES_DIR", ontodir))
            os.environ ["TRACKER_DB_ONTOLOGIES_DIR"] = ontodir

        for var, value in TEST_ENV_VARS.iteritems ():
            helpers.log ("export %s=%s" %(var, value))
            os.environ [var] = value

        # Previous loop should have set DCONF_PROFILE to the test location
        if settings is not None:
            self._apply_settings(settings)

        helpers.log ("[Conf] environment ready")
Exemplo n.º 3
0
def extract_sold_by(product_page_lxml):
	"""Extracts who the product is sold by given a product page html in
	unicode"""
	sold_by = u'Not Sold by Amazon'
	
	try:
		results = product_page_lxml.cssselect('div.buying')
		for search_result in results:
			search_result = search_result.text_content()
			if 'Fulfilled by Amazon' in search_result:
				sold_by = u'Fulfilled by Amazon'
				break
			elif 'Ships from and sold by Amazon.com' in search_result:
				sold_by = u'Sold by Amazon'
				break
			elif 'Ships from and sold by Amazon Digital Services' in \
			search_result:
				sold_by = u'Sold by Amazon'
				break
			else:
				continue
	except:
		helpers.log('Sold by error on page')
		sold_by = 'Error'
	
	return sold_by
Exemplo n.º 4
0
    def send_statistics():
        if GlobalVars.metasmoke_down:
            log('warning', "Metasmoke is down, not sending statistics")
            return

        GlobalVars.posts_scan_stats_lock.acquire()
        if GlobalVars.post_scan_time != 0:
            posts_per_second = GlobalVars.num_posts_scanned / GlobalVars.post_scan_time
            payload = {'key': GlobalVars.metasmoke_key,
                       'statistic': {'posts_scanned': GlobalVars.num_posts_scanned, 'api_quota': GlobalVars.apiquota,
                                     'post_scan_rate': posts_per_second}}
        else:
            payload = {'key': GlobalVars.metasmoke_key,
                       'statistic': {'posts_scanned': GlobalVars.num_posts_scanned, 'api_quota': GlobalVars.apiquota}}

        GlobalVars.post_scan_time = 0
        GlobalVars.num_posts_scanned = 0
        GlobalVars.posts_scan_stats_lock.release()

        headers = {'Content-type': 'application/json'}

        if GlobalVars.metasmoke_host is not None:
            log('info', 'Sent statistics to metasmoke: ', payload['statistic'])
            Metasmoke.post("/statistics.json",
                           data=json.dumps(payload), headers=headers)
Exemplo n.º 5
0
    def _check_batch(saved):
        if time.time() < DeletionWatcher.next_request_time:
            time.sleep(DeletionWatcher.next_request_time - time.time())

        for site, posts in saved.items():
            ids = ";".join(post_id for post_id in posts if not DeletionWatcher._ignore((post_id, site)))
            uri = "https://api.stackexchange.com/2.2/posts/{}".format(ids)
            params = {
                'site': site,
                'key': 'IAkbitmze4B8KpacUfLqkw(('
            }
            res = requests.get(uri, params=params)
            json = res.json()

            if "items" not in json:
                log('warning',
                    'DeletionWatcher API request received no items in response (code {})'.format(res.status_code))
                log('warning', res.text)
                return

            if 'backoff' in json:
                DeletionWatcher.next_request_time = time.time() + json['backoff']

            for post in json['items']:
                if time.time() - post["creation_date"] < 7200:
                    yield to_protocol_relative(post["link"]).replace("/q/", "/questions/")
Exemplo n.º 6
0
 def getRandomIdeas(questionObj, ideas, size=5):
     numIdeas = len(ideas) if ideas else 0
     if numIdeas >= size:
         return random.sample(ideas, size)
     else:
         helpers.log("WARNING: Cannot return {0} random ideas since only {1} ideas available".format(size, numIdeas))
         return []
Exemplo n.º 7
0
 def getPerson(question=None, nickname=None):
     person = None
     
     # check if person id stored in session
     # if so use to retrieve logged in user 
     session = gaesessions.get_current_session()
     person_id = session.pop("new_person_id") if session.has_key("new_person_id") else None
     if person_id:
         person = Person.get_by_id(person_id)
         # check if person id stored in session corresponds to inputs
         if not person:
             person = None
             helpers.log("WARNING: Person not found by id {0}".format(person_id))
         elif question and question != person.question:
             person = None
         elif nickname and nickname != person.nickname:
             person = None
     
     if not person:
         user = users.get_current_user()
         if question:
             if question.nicknameAuthentication:
                 # if no nickname provided, check session
                 if not nickname:
                     questionSessionValues = session.get(question.code)
                     nickname = questionSessionValues["nickname"] if questionSessionValues else None
                 if nickname:
                     person = Person.all().filter("question =", question).filter("nickname =", nickname).get()
             elif user is not None:
                 person = Person.all().filter("question =", question).filter("user ="******"question =", None).filter("user =", user).get()
     return person
Exemplo n.º 8
0
def _send_update(from_person, to_person, *updates):
    from model import Student, Teacher
    from client_id_utils import timestamp_for_client_id
#    from settings import CHANNEL_LIMIT_PER_STUDENT
    from helpers import log

    assert isinstance(to_person, (Student,Teacher)), repr(to_person)

    import datetime
    timestamp = datetime.datetime.now()
    updates_list = list(updates)
    updates_list[0]['timestamp'] = timestamp.strftime('%B %d, %Y %H:%M:%S')
    updates_json = json.dumps(updates_list)

    # Sort and dedupe client_ids by timestamp, descending
    client_ids = set(to_person.client_ids)
    key_fn = lambda client_id:timestamp_for_client_id(client_id)
    client_ids = sorted(client_ids, key=key_fn, reverse=True)
    
#    if CHANNEL_LIMIT_PER_STUDENT is not None and len(client_ids) > CHANNEL_LIMIT_PER_STUDENT and isinstance(to_person, Student):
#        client_ids = client_ids[:CHANNEL_LIMIT_PER_STUDENT]
#        log( "=> WARNING: Found %d client IDs for %r but only using %d"%(len(to_person.client_ids), to_person, CHANNEL_LIMIT_PER_STUDENT) )

    if len(client_ids)==0:
        log("=> MESSAGE NOT SENT. No current client IDs for {0}.".format(to_person))
    
    for client_id in client_ids:
        log( "client ID : %r : sent %s"%(client_id," + ".join(u["type"] for u in updates)) )
        channel.send_message(client_id, updates_json)
Exemplo n.º 9
0
def main():
    while True:
        data = read(API_URL)
        result = parse(data)
        log(repr(result))
        save_to_db(result)
        sleep(TIMEOUT)
Exemplo n.º 10
0
    def send_stats_on_post(title, link, reasons, body, username, user_link, why, owner_rep,
                           post_score, up_vote_count, down_vote_count):
        if GlobalVars.metasmoke_host is None:
            log('info', "Metasmoke location not defined, not reporting")
            return

        metasmoke_key = GlobalVars.metasmoke_key

        try:
            if len(why) > 1024:
                why = why[:512] + '...' + why[-512:]

            post = {'title': title, 'link': link, 'reasons': reasons,
                    'body': body, 'username': username, 'user_link': user_link,
                    'why': why, 'user_reputation': owner_rep, 'score': post_score,
                    'upvote_count': up_vote_count, 'downvote_count': down_vote_count}

            # Remove None values (if they somehow manage to get through)
            post = dict((k, v) for k, v in post.items() if v)

            payload = {'post': post, 'key': metasmoke_key}
            headers = {'Content-type': 'application/json'}
            requests.post(GlobalVars.metasmoke_host + "/posts.json", data=json.dumps(payload), headers=headers)
        except Exception as e:
            log('error', e)
Exemplo n.º 11
0
    def _openFile(self):
        fileName = QtGui.QFileDialog.getOpenFileName(self, "OpenImage", "src", "Bitmaps (*.bmp)")

        if fileName == "":
            h.warn("No file selected.")
            return



        h.log("Loaded!")
        #self.pixmapItem = QtGui.QPixmap(fileName)
        #item = QtGui.QGraphicsPixmapItem(self.pixmapItem)
        #self.scene.addItem(item)

        item = QtGui.QStandardItem(os.path.basename(str(fileName)))
        item.imageFileName = str(fileName)
        orgImg = Image.open(str(fileName), mode='r').convert()
        item.image = MImage.pil_to_array(orgImg.convert('L'))
        item.pixmap = QtGui.QPixmap(fileName)

        #item.setCheckable(True)

        self.model.appendRow(item)
        self.ui.imageListView.setModel(self.model)


        self._checkButtons()
Exemplo n.º 12
0
    def reload():
        commit = git_commit_info()
        censored_committer_names = GlobalVars.censored_committer_names
        if md5(commit['author'][0].encode('utf-8')).hexdigest() in censored_committer_names:
            commit['author'] = censored_committer_names[md5(commit['author'][0].encode('utf-8')).hexdigest()]
        GlobalVars.commit = commit

        GlobalVars.commit_with_author = "`{}` ({}: {})".format(
            commit['id'],
            commit['author'][0] if type(commit['author']) in {list, tuple} else commit['author'],
            commit['message'])

        GlobalVars.on_master = "HEAD detached" not in git_status()
        GlobalVars.s = "[ {} ] SmokeDetector started at [rev {}]({}/commit/{}) (running on {}, Python {})".format(
            GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
            GlobalVars.commit['id'], GlobalVars.location, platform.python_version())
        GlobalVars.s_reverted = \
            "[ {} ] SmokeDetector started in [reverted mode](" \
            "https://charcoal-se.org/smokey/SmokeDetector-Statuses#reverted-mode) " \
            "at [rev {}]({}/commit/{}) (running on {})".format(
                GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
                GlobalVars.commit['id'], GlobalVars.location)
        GlobalVars.s_norestart = "[ {} ] Blacklists reloaded at [rev {}]({}/commit/{}) (running on {})".format(
            GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
            GlobalVars.commit['id'], GlobalVars.location)
        GlobalVars.s_norestart2 = "[ {} ] FindSpam module reloaded at [rev {}]({}/commit/{}) (running on {})".format(
            GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
            GlobalVars.commit['id'], GlobalVars.location)
        GlobalVars.standby_message = \
            "[ {} ] SmokeDetector started in [standby mode](" \
            "https://charcoal-se.org/smokey/SmokeDetector-Statuses#standby-mode) " \
            "at [rev {}]({}/commit/{}) (running on {})".format(
                GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
                GlobalVars.commit['id'], GlobalVars.location)
        log('debug', "GlobalVars loaded")
Exemplo n.º 13
0
 def send(self, dstip, data):
     debug2('UDP: sending to %r port %d\n' % dstip)
     try:
         self.sock.sendto(data, dstip)
     except socket.error, e:
         log('UDP send to %r port %d: %s\n' % (dstip[0], dstip[1], e))
         return
Exemplo n.º 14
0
    def set_up_environment(self, gsettings, ontodir):
        """
        Sets up the XDG_*_HOME variables and make sure the directories exist

        gsettings is a list of triplets (schema, key, value) that will be set/unset in gsetting
        """

        assert not gsettings or type(gsettings) is list

        helpers.log("[Conf] Setting test environment...")

        for var, directory in TEST_ENV_DIRS.iteritems():
            helpers.log("export %s=%s" % (var, directory))
            self.__recreate_directory(directory)
            os.environ[var] = directory

        for directory in EXTRA_DIRS:
            self.__recreate_directory(directory)

        if ontodir:
            helpers.log("export %s=%s" % ("TRACKER_DB_ONTOLOGIES_DIR", ontodir))
            os.environ["TRACKER_DB_ONTOLOGIES_DIR"] = ontodir

        for var, value in TEST_ENV_VARS.iteritems():
            helpers.log("export %s=%s" % (var, value))
            os.environ[var] = value

        # Previous loop should have set DCONF_PROFILE to the test location
        if gsettings:
            self.dconf = DConfClient()
            self.dconf.reset()
            for (schema, key, value) in gsettings:
                self.dconf.write(schema, key, value)

        helpers.log("[Conf] environment ready")
Exemplo n.º 15
0
def begin_crawl():

    # explode out all of our category `start_urls` into subcategories
    with open(settings.start_file, "r") as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith("#"):
                continue  # skip blank and commented out lines

            page, html = make_request(line)
            count = 0

            # look for subcategory links on this page
            subcategories = page.findAll("div", "bxc-grid__image")  # downward arrow graphics
            subcategories.extend(page.findAll("li", "sub-categories__list__item"))  # carousel hover menu
            sidebar = page.find("div", "browseBox")
            if sidebar:
                subcategories.extend(sidebar.findAll("li"))  # left sidebar

            for subcategory in subcategories:
                link = subcategory.find("a")
                if not link:
                    continue
                link = link["href"]
                count += 1
                enqueue_url(link)

            log("Found {} subcategories on {}".format(count, line))
Exemplo n.º 16
0
    def reload():
        GlobalVars.commit = commit = git_commit_info()

        GlobalVars.commit_with_author = "`{}` ({}: {})".format(
            commit.id, commit.author, commit.message)

        GlobalVars.on_branch = git_ref()
        GlobalVars.s = "[ {} ] SmokeDetector started at [rev {}]({}/commit/{}) (running on {}, Python {})".format(
            GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
            GlobalVars.commit.id, GlobalVars.location, platform.python_version())
        GlobalVars.s_reverted = \
            "[ {} ] SmokeDetector started in [reverted mode](" \
            "https://charcoal-se.org/smokey/SmokeDetector-Statuses#reverted-mode) " \
            "at [rev {}]({}/commit/{}) (running on {})".format(
                GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
                GlobalVars.commit.id, GlobalVars.location)
        GlobalVars.s_norestart = "[ {} ] Blacklists reloaded at [rev {}]({}/commit/{}) (running on {})".format(
            GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
            GlobalVars.commit.id, GlobalVars.location)
        GlobalVars.s_norestart2 = "[ {} ] FindSpam module reloaded at [rev {}]({}/commit/{}) (running on {})".format(
            GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
            GlobalVars.commit.id, GlobalVars.location)
        GlobalVars.standby_message = \
            "[ {} ] SmokeDetector started in [standby mode](" \
            "https://charcoal-se.org/smokey/SmokeDetector-Statuses#standby-mode) " \
            "at [rev {}]({}/commit/{}) (running on {})".format(
                GlobalVars.chatmessage_prefix, GlobalVars.commit_with_author, GlobalVars.bot_repository,
                GlobalVars.commit.id, GlobalVars.location)
        log('debug', "GlobalVars loaded")
Exemplo n.º 17
0
def handle_slice_sampler_exception(exception, starting_point, proposal_measure, opt_compwise=False):
    '''
    Handles slice sampler exceptions. If the slice sampler shrank to zero the slice sampler will be restarted
    a few times. If this fails or if the exception was another this method will raise the given exception.
    Args:
        exception: the exception that occured
        starting_point: the starting point that was used
        proposal_measure: the used proposal measure
        opt_compwise: how to set the compwise option
    Returns:
        the output of the slice sampelr
    Raises:
        Exception: the first argument
    '''
    if exception.message == "Slice sampler shrank to zero!":
        log("Slice sampler shrank to zero! Action: trying to restart " + str(NUMBER_OF_RESTARTS)
            + " times with same starting point")
        restarts_left = NUMBER_OF_RESTARTS
        while restarts_left > 0:
            try:
                return slice_sample(starting_point, proposal_measure, compwise=opt_compwise)
            except Exception as e:
                log("Restart failed. " + str(restarts_left) + " restarts left. Exception was: " + e.message)
                restarts_left = restarts_left - 1
        # if we leave the while loop we will raise the exception we got
    raise exception
Exemplo n.º 18
0
    def __init__(self, progress, total, message, newline=True):
        if 'linux' in sys.platform:
            message = "{message:<20}".format(message=message)
        elif 'darwin' in sys.platform:
            message = """  \033[38;5;204m{message:<20}\033[0m """.format(message=message)

        import time
        time.sleep(0.01)

        progress += 1
        percentage = (progress * 10 / total)  # (divided by 10)
        percentage_left = 10 - percentage

        bar = '['
        bar += percentage * log('*', colour="white", ret=True)
        bar += percentage_left * log('*', colour="black", ret=True)
        bar += ']'
        bar += ' {}'.format(progress)

        string = ''
        if progress != total:
            if progress == 1 and newline == True:
                string = '\n\r {message} {bar}'.format(message=message, bar=bar)
            else:
                string = '\r {message} {bar}'.format(message=message, bar=bar)
        else:
            string = '\r {message} {bar}'.format(message=message, bar=bar)

        sys.stdout.write(string)
        sys.stdout.flush()
Exemplo n.º 19
0
def sample_hyperparameters_gp(mcmc_iters, noiseless, input_points, func_values, cov_func, noise, amp2, ls):
    '''
    Samples hyper parameters for Gaussian processes.
    Args:
        mcmc_iters: the number of hyper-parameter samples required
        noiseless: the modeled function is noiseless
        input_points: all the points that have been evaluated so far
        func_values: the corresponding observed function values
        cov_func: the covariance function the Gaussian process uses
        noise: a starting value for the noise
        amp2: a starting value for the amplitude
        ls: an array of starting values for the length scales (size has to be the dimension of the input points)
    Returns:
        a list of hyper-parameter tuples
        the tuples are of the form (mean, noise, amplitude, [length-scales])
    '''
    mean = np.mean(func_values)
    hyper_samples = []
    # sample hyper parameters
    for i in xrange(0, mcmc_iters):
        if noiseless:
            noise = 1e-3
            [mean, amp2] = _sample_mean_amp_noise(input_points, func_values, cov_func, np.array([mean, amp2]), ls)
        else:
            [mean, amp2, noise] = _sample_mean_amp_noise(input_points, func_values, cov_func, np.array([mean, amp2, noise]), ls)
        ls = _sample_ls(input_points, func_values, cov_func, ls, mean, amp2, noise)
        #This is the order as expected
        #log("mean: " + str(mean) + ", noise: " + str(noise) + " amp: " + str(amp2) + ", ls: " + str(ls))
        hyper_samples.append((mean, noise, amp2, ls))

    samples = []
    for i in xrange(0, mcmc_iters - 1, mcmc_iters / 10):
        samples.append(hyper_samples[i])
        log("mean: " + str(hyper_samples[i][0]) + ", noise: " + str(hyper_samples[i][1]) + " amp: " + str(hyper_samples[i][2]) + ", ls: " + str(hyper_samples[i][3]))
    return samples
Exemplo n.º 20
0
    def determine_if_autoflagged(post_url):
        """
        Given the URL for a post, determine whether or not it has been autoflagged.
        """
        payload = {
            'key': GlobalVars.metasmoke_key,
            'filter': 'GKNJKLILHNFMJLFKINGJJHJOLGFHJF',  # id and autoflagged
            'urls': post_url
        }
        try:
            response = Metasmoke.get("/api/v2.0/posts/urls", params=payload).json()
        except Exception as e:
            log('error', e)
            return False, []

        if len(response["items"]) > 0 and response["items"][0]["autoflagged"]:
            # get flagger names
            id = str(response["items"][0]["id"])
            payload = {'key': GlobalVars.metasmoke_key}

            flags = Metasmoke.get("/api/v2.0/posts/" + id + "/flags", params=payload).json()

            if len(flags["items"]) > 0:
                return True, [user["username"] for user in flags["items"][0]["autoflagged"]["users"]]

        return False, []
Exemplo n.º 21
0
    def subscribe(self, post_url, callback=None, pickle=True, timeout=None):
        post_id, post_site, post_type = fetch_post_id_and_site_from_url(post_url)

        if post_site not in GlobalVars.site_id_dict:
            log("warning", "unknown site {} when subscribing to {}".format(post_site, post_url))
            return

        if post_type == "answer":
            question_id = datahandling.get_post_site_id_link((post_id, post_site, post_type))

            if question_id is None:
                return
        else:
            question_id = post_id

        site_id = GlobalVars.site_id_dict[post_site]
        action = "{}-question-{}".format(site_id, question_id)
        max_time = (time.time() + timeout) if timeout else None

        if action not in self.posts:
            self.posts[action] = (post_id, post_site, post_type, post_url, [(callback, max_time)] if callback else [])
            try:
                self.socket.send(action)
            except websocket.WebSocketException:
                log('error', 'DeletionWatcher failed on sending {}'.format(action))
        elif callback:
            _, _, _, _, callbacks = self.posts[action]
            callbacks.append((callback, max_time))
        else:
            return

        if pickle:
            Tasks.do(self._save)
Exemplo n.º 22
0
    def determine_if_autoflagged(post_url):
        """
        Given the URL for a post, determine whether or not it has been autoflagged.
        """
        payload = {
            'key': GlobalVars.metasmoke_key,
            'filter': 'GFGJGHFMHGOLMMJMJJJGHIGOMKFKKILF',  # id and autoflagged
            'urls': post_url
        }
        try:
            response = Metasmoke.get("/api/v2.0/posts/urls", params=payload).json()
        except Exception as e:
            log('error', e)
            return False, []

        # The first report of a URL is the only one that will be autoflagged. MS responses to the
        # /posts/urls endpoint have the oldest report last.
        first_report_index = len(response["items"]) - 1
        if first_report_index > -1 and response["items"][first_report_index]["autoflagged"]:
            # get flagger names
            id = str(response["items"][first_report_index]["id"])
            payload = {'key': GlobalVars.metasmoke_key}

            flags = Metasmoke.get("/api/v2.0/posts/" + id + "/flags", params=payload).json()

            if len(flags["items"]) > 0:
                return True, [user["username"] for user in flags["items"][0]["autoflagged"]["users"]]

        return False, []
Exemplo n.º 23
0
def setup_websocket(attempt, max_attempts):
    try:
        ws = websocket.create_connection("wss://qa.sockets.stackexchange.com/")
        ws.send("155-questions-active")
        return ws
    except websocket.WebSocketException:
        log('warning', 'WS failed to create websocket connection. Attempt {} of {}.'.format(attempt, max_attempts))
        return None
Exemplo n.º 24
0
def log_exception(exctype, value, tb):
    now = datetime.utcnow()
    tr = '\n'.join((traceback.format_tb(tb)))
    exception_only = ''.join(traceback.format_exception_only(exctype, value)).strip()
    logged_msg = "{exception}\n{now} UTC\n{row}\n\n".format(exception=exception_only, now=now, row=tr)
    log('error', logged_msg)
    with open("errorLogs.txt", "a") as f:
        f.write(logged_msg)
Exemplo n.º 25
0
 def reset (self):
     profile = os.environ ["DCONF_PROFILE"]
     assert profile == "trackertest"
     # XDG_CONFIG_HOME is useless
     dconf_db = os.path.join (os.environ ["HOME"], ".config", "dconf", profile)
     if os.path.exists (dconf_db):
         log ("[Conf] Removing dconf-profile: " + dconf_db)
         os.remove (dconf_db)
Exemplo n.º 26
0
 def tracker_store_restart_with_new_ontologies (self, ontodir):
     self.store.stop ()
     if ontodir:
         helpers.log ("[Conf] Setting %s - %s" % ("TRACKER_DB_ONTOLOGIES_DIR", ontodir))
         os.environ ["TRACKER_DB_ONTOLOGIES_DIR"] = ontodir
     try:
         self.store.start ()
     except dbus.DBusException, e:
         raise UnableToBootException ("Unable to boot the store \n(" + str(e) + ")")
    def send_message(self, text, length_check=True):
        if "no-chat" in sys.argv:
            log('info', "Blocked message to {0} due to no-chat setting: {1}".format(self.name, text))
            return

        if "charcoal-hq-only" not in sys.argv or int(self.id) == 11540:
            return rooms.Room.send_message(self, text, length_check)
        else:
            log('info', "Blocked message to {0} due to charcoal-hq-only setting: {1}".format(self.name, text))
Exemplo n.º 28
0
 def callback(self):
     log('--no callback defined-- %r\n' % self)
     (r, w, x) = select.select(self.socks, [], [], 0)
     for s in r:
         v = s.recv(4096)
         if not v:
             log('--closed-- %r\n' % self)
             self.socks = []
             self.ok = False
Exemplo n.º 29
0
def check_if_spam_json(json_data):
    try:
        post = Post(json_data=json_data)
    except PostParseError as err:
        log('error', 'Parse error {0} when parsing json_data {1!r}'.format(
            err, json_data))
        return False, '', ''
    is_spam, reason, why = check_if_spam(post)
    return is_spam, reason, why
Exemplo n.º 30
0
def read(url):
    try:
        response = urlopen(url)
        content = response.read()
        return json.loads(content.decode('utf8'))
    except URLError as e:
        log("Could not read data!", str(e))

    return {}
Exemplo n.º 31
0
 def __init__(self, name, t):
     if name not in ('master', 'slave'):
         helpers.log("HA controller must either be 'master' or 'slave'")
     self.t = t
     self._name = name
     self.rest = HaBsnRestClient(name, t)
Exemplo n.º 32
0
    def make_api_call_for_site(self, site):
        if site not in self.queue:
            return

        self.queue_modify_lock.acquire()
        new_posts = self.queue.pop(site)
        store_bodyfetcher_queue()
        self.queue_modify_lock.release()

        new_post_ids = [int(k) for k in new_posts.keys()]

        if GlobalVars.flovis is not None:
            for post_id in new_post_ids:
                GlobalVars.flovis.stage('bodyfetcher/api_request', site,
                                        post_id, {
                                            'site': site,
                                            'posts': list(new_posts.keys())
                                        })

        self.queue_timing_modify_lock.acquire()
        post_add_times = [v for k, v in new_posts.items()]
        pop_time = datetime.utcnow()

        for add_time in post_add_times:
            try:
                seconds_in_queue = (pop_time - add_time).total_seconds()
                if site in self.queue_timings:
                    self.queue_timings[site].append(seconds_in_queue)
                else:
                    self.queue_timings[site] = [seconds_in_queue]
            except KeyError:  # XXX: Any other possible exception?
                continue  # Skip to next item if we've got invalid data or missing values.

        store_queue_timings()

        self.queue_timing_modify_lock.release()
        self.max_ids_modify_lock.acquire()

        if site in self.previous_max_ids and max(
                new_post_ids) > self.previous_max_ids[site]:
            previous_max_id = self.previous_max_ids[site]
            intermediate_posts = range(previous_max_id + 1, max(new_post_ids))

            # We don't want to go over the 100-post API cutoff, so take the last
            # (100-len(new_post_ids)) from intermediate_posts

            intermediate_posts = intermediate_posts[(100 - len(new_post_ids)):]

            # new_post_ids could contain edited posts, so merge it back in
            combined = chain(intermediate_posts, new_post_ids)

            # Could be duplicates, so uniquify
            posts = list(set(combined))
        else:
            posts = new_post_ids

        try:
            if max(new_post_ids) > self.previous_max_ids[site]:
                self.previous_max_ids[site] = max(new_post_ids)
                store_bodyfetcher_max_ids()
        except KeyError:
            self.previous_max_ids[site] = max(new_post_ids)
            store_bodyfetcher_max_ids()

        self.max_ids_modify_lock.release()

        log('debug', "New IDs / Hybrid Intermediate IDs for {}:".format(site))
        if len(new_post_ids) > 30:
            log(
                'debug', "{} +{} more".format(
                    sorted(new_post_ids)[:30],
                    len(new_post_ids) - 30))
        else:
            log('debug', sorted(new_post_ids))
        if len(new_post_ids) == len(posts):
            log('debug', "[ *Identical* ]")
        elif len(posts) > 30:
            log('debug',
                "{} +{} more".format(sorted(posts)[:30],
                                     len(posts) - 30))
        else:
            log('debug', sorted(posts))

        question_modifier = ""
        pagesize_modifier = {}

        if site == "stackoverflow.com":
            # Not all SO questions are shown in the realtime feed. We now
            # fetch all recently modified SO questions to work around that.
            if self.last_activity_date != 0:
                pagesize = "50"
            else:
                pagesize = "25"

            pagesize_modifier = {
                'pagesize': pagesize,
                'min': str(self.last_activity_date)
            }
        else:
            question_modifier = "/{0}".format(";".join(
                [str(post) for post in posts]))

        url = "https://api.stackexchange.com/2.2/questions{}".format(
            question_modifier)
        params = {
            'filter':
            '!*xq08dCDNr)PlxxXfaN8ntivx(BPlY_8XASyXLX-J7F-)VK*Q3KTJVkvp*',
            'key': 'IAkbitmze4B8KpacUfLqkw((',
            'site': site
        }
        params.update(pagesize_modifier)

        # wait to make sure API has/updates post data
        time.sleep(3)

        GlobalVars.api_request_lock.acquire()
        # Respect backoff, if we were given one
        if GlobalVars.api_backoff_time > time.time():
            time.sleep(GlobalVars.api_backoff_time - time.time() + 2)
        try:
            time_request_made = datetime.now().strftime('%H:%M:%S')
            response = requests.get(url, params=params, timeout=20).json()
        except (requests.exceptions.Timeout, requests.ConnectionError,
                Exception):
            # Any failure in the request being made (timeout or otherwise) should be added back to
            # the queue.
            self.queue_modify_lock.acquire()
            if site in self.queue:
                self.queue[site].update(new_posts)
            else:
                self.queue[site] = new_posts
            self.queue_modify_lock.release()
            GlobalVars.api_request_lock.release()
            return

        self.api_data_lock.acquire()
        add_or_update_api_data(site)
        self.api_data_lock.release()

        message_hq = ""
        if "quota_remaining" in response:
            if response[
                    "quota_remaining"] - GlobalVars.apiquota >= 5000 and GlobalVars.apiquota >= 0:
                tell_rooms_with(
                    "debug",
                    "API quota rolled over with {0} requests remaining. "
                    "Current quota: {1}.".format(GlobalVars.apiquota,
                                                 response["quota_remaining"]))

                sorted_calls_per_site = sorted(
                    GlobalVars.api_calls_per_site.items(),
                    key=itemgetter(1),
                    reverse=True)
                api_quota_used_per_site = ""
                for site_name, quota_used in sorted_calls_per_site:
                    sanatized_site_name = site_name.replace(
                        '.com', '').replace('.stackexchange', '')
                    api_quota_used_per_site += sanatized_site_name + ": {0}\n".format(
                        str(quota_used))
                api_quota_used_per_site = api_quota_used_per_site.strip()

                tell_rooms_with("debug", api_quota_used_per_site)
                clear_api_data()
            if response["quota_remaining"] == 0:
                tell_rooms_with(
                    "debug", "API reports no quota left!  May be a glitch.")
                tell_rooms_with("debug",
                                str(response))  # No code format for now?
            if GlobalVars.apiquota == -1:
                tell_rooms_with(
                    "debug", "Restart: API quota is {quota}.".format(
                        quota=response["quota_remaining"]))
            GlobalVars.apiquota = response["quota_remaining"]
        else:
            message_hq = "The quota_remaining property was not in the API response."

        if "error_message" in response:
            message_hq += " Error: {} at {} UTC.".format(
                response["error_message"], time_request_made)
            if "error_id" in response and response["error_id"] == 502:
                if GlobalVars.api_backoff_time < time.time(
                ) + 12:  # Add a backoff of 10 + 2 seconds as a default
                    GlobalVars.api_backoff_time = time.time() + 12
            message_hq += " Backing off on requests for the next 12 seconds."
            message_hq += " Previous URL: `{}`".format(url)

        if "backoff" in response:
            if GlobalVars.api_backoff_time < time.time() + response["backoff"]:
                GlobalVars.api_backoff_time = time.time() + response["backoff"]

        GlobalVars.api_request_lock.release()

        if len(message_hq) > 0 and "site is required" not in message_hq:
            tell_rooms_with("debug", message_hq.strip())

        if "items" not in response:
            return

        if site == "stackoverflow.com":
            items = response["items"]
            if len(items) > 0 and "last_activity_date" in items[0]:
                self.last_activity_date = items[0]["last_activity_date"]

        num_scanned = 0
        start_time = time.time()

        for post in response["items"]:
            pnb = copy.deepcopy(post)
            if 'body' in pnb:
                pnb['body'] = 'Present, but truncated'
            if 'answers' in pnb:
                del pnb['answers']

            if "title" not in post or "body" not in post:
                if GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage(
                        'bodyfetcher/api_response/no_content', site,
                        post['question_id'], pnb)
                continue

            post['site'] = site
            try:
                post['edited'] = (post['creation_date'] !=
                                  post['last_edit_date'])
            except KeyError:
                post[
                    'edited'] = False  # last_edit_date not present = not edited

            try:
                post_ = Post(api_response=post)
            except PostParseError as err:
                log('error',
                    'Error {0} when parsing post: {1!r}'.format(err, post_))
                if GlobalVars.flovis is not None and 'question_id' in post:
                    GlobalVars.flovis.stage('bodyfetcher/api_response/error',
                                            site, post['question_id'], pnb)
                continue

            num_scanned += 1

            is_spam, reason, why = check_if_spam(post_)

            if is_spam:
                try:
                    if GlobalVars.flovis is not None and 'question_id' in post:
                        GlobalVars.flovis.stage(
                            'bodyfetcher/api_response/spam', site,
                            post['question_id'], {
                                'post': pnb,
                                'check_if_spam': [is_spam, reason, why]
                            })
                    handle_spam(post=post_, reasons=reason, why=why)
                except Exception as e:
                    log('error', "Exception in handle_spam:", e)
            elif GlobalVars.flovis is not None and 'question_id' in post:
                GlobalVars.flovis.stage(
                    'bodyfetcher/api_response/not_spam', site,
                    post['question_id'], {
                        'post': pnb,
                        'check_if_spam': [is_spam, reason, why]
                    })

            try:
                if "answers" not in post:
                    pass
                else:
                    for answer in post["answers"]:
                        anb = copy.deepcopy(answer)
                        if 'body' in anb:
                            anb['body'] = 'Present, but truncated'

                        num_scanned += 1
                        answer["IsAnswer"] = True  # Necesssary for Post object
                        answer[
                            "title"] = ""  # Necessary for proper Post object creation
                        answer[
                            "site"] = site  # Necessary for proper Post object creation
                        try:
                            answer['edited'] = (answer['creation_date'] !=
                                                answer['last_edit_date'])
                        except KeyError:
                            answer[
                                'edited'] = False  # last_edit_date not present = not edited
                        answer_ = Post(api_response=answer, parent=post_)

                        is_spam, reason, why = check_if_spam(answer_)
                        if is_spam:
                            try:
                                if GlobalVars.flovis is not None and 'answer_id' in answer:
                                    GlobalVars.flovis.stage(
                                        'bodyfetcher/api_response/spam', site,
                                        answer['answer_id'], {
                                            'post': anb,
                                            'check_if_spam':
                                            [is_spam, reason, why]
                                        })
                                handle_spam(answer_, reasons=reason, why=why)
                            except Exception as e:
                                log('error', "Exception in handle_spam:", e)
                        elif GlobalVars.flovis is not None and 'answer_id' in answer:
                            GlobalVars.flovis.stage(
                                'bodyfetcher/api_response/not_spam', site,
                                answer['answer_id'], {
                                    'post': anb,
                                    'check_if_spam': [is_spam, reason, why]
                                })

            except Exception as e:
                log('error', "Exception handling answers:", e)

        end_time = time.time()
        GlobalVars.posts_scan_stats_lock.acquire()
        GlobalVars.num_posts_scanned += num_scanned
        GlobalVars.post_scan_time += end_time - start_time
        GlobalVars.posts_scan_stats_lock.release()
        return