Exemplo n.º 1
0
    def _handle_page(self, title, text):
        self.n_pages += 1

        if not self.filter_page_raw(title, text):
            return

        if '==' in text:
            text = text.split('==')[0]

        text = self.braces_pattern.sub('', text)
        text = self.wikiref_pattern.sub(lambda x: x.groups(0)[1], text)
        text = self.wikiref2_pattern.sub('', text)
        text = self.extraref_pattern.sub(lambda x: x.groups(0)[1], text)
        text = self.extraref2_pattern.sub('', text)

        text = self.xml_tag_pattern.sub('', text)

        text = self.paren_fixup_pattern.sub('(', text)
        text = self.space_fixup_pattern.sub(' ', text)

        text = self.entity_fixup_pattern.sub(lambda x:
            { 'mdash': '-', 'ndash': '-', 'quot': '"',
                'amp': '&', 'lt': '<', 'gt': '>' }[x.groups()[0]], text)

        text = text.strip()
        self.handle_page(title, text)
def process_gh_project_names(text):
    if '-' in text:
        return ' '.join(text.split('-'))
    elif text[0].isupper():
        return ''.join(
            map(lambda x: x
                if x.islower() else " " + x, text[0].lower() + text[1:]))
    else:
        return ''.join(map(lambda x: x if x.islower() else " " + x, text))
def Tokenize(text):
    tokenizer = MWETokenizer(category.all())
    for word in category:
        if word.find(' '):
            tokenizer.add_mwe(word.split())
    for word in sub_category:
        if word.find(' '):
            tokenizer.add_mwe(word.split())
    for word in brand:
        if word.find(' '):
            tokenizer.add_mwe(word.split())
    for word in article:
        if word.find(' '):
            tokenizer.add_mwe(word.split())

    token = tokenizer.tokenize(text.split())
    tokens = []
    for word in token:
        word = word.replace("_", " ")
        tokens.append(word)
    return tokens
Exemplo n.º 4
0
def build_tsquery( text ):
    text=scalar('select gazetteer.gaz_plainText2(:text)',text=text)
    return ' & '.join(map( lambda x: re.sub(r'\*$',':*',x),text.split()))
Exemplo n.º 5
0
def preprocess_search_query(text):
    text = re.sub(r'\W', ' ', text, re.UNICODE)
    tokens = text.split()
    tokens = map(lambda t: t.strip(), tokens)
    return ' & '.join(['%s:*' % t for t in tokens])
Exemplo n.º 6
0
def build_tsquery(text):
    text = scalar('select gazetteer.gaz_plainText2(:text)', text=text)
    return ' & '.join(map(lambda x: re.sub(r'\*$', ':*', x), text.split()))
Exemplo n.º 7
0
def handle_app_mention(event_data):
    message = event_data["event"]

    # Get the user who initiated the @mention
    source_user = slack_client.users_info(user=message["user"])

    # Default variable values
    slack_message, msg, mentioned_user, attachments = "", "", None, None
    notify_user = source_user["user"]["id"]

    # Iterate through message body looking for the destination user. Use the first match.
    elements = message["blocks"][0]["elements"][0]["elements"]
    for element in elements:
        if element["type"] == "user" and element["user_id"] != SLACKBOT_USERID:
            mentioned_user = slack_client.users_info(user=element["user_id"])
            break

    # If there was a user mention, handle the points.
    if mentioned_user is not None:
        if source_user["user"]["id"] == mentioned_user["user"]["id"]:
            msg = "Hey <@" + source_user["user"][
                "id"] + "> - nice try but you can only give points to others"
        else:
            # Try to get the number of points, default to 1.
            text = message.get('text')
            points_array = [int(s) for s in text.split() if s.isdigit()]
            points = 1 if len(points_array) == 0 else points_array[0]

            source_user_email = source_user["user"]["profile"]["email"]
            mentioned_user_email = mentioned_user["user"]["profile"]["email"]
            original_message = slack_client.chat_getPermalink(
                channel=message["channel"], message_ts=message["ts"])
            # Try to grant points
            if points > 10:
                msg = """Hey <@{source}> :wave: I couldn't do <{permalink}|this>. 
                
You can give a maximum of 10 points at a time! You should try giving <@{mentioned}> some points again (but only up to 10, remember?)!""".format(
                    source=source_user["user"]["id"],
                    permalink=original_message["permalink"],
                    mentioned=mentioned_user["user"]["id"])
            else:
                error = try_grant_points(source_user_email,
                                         mentioned_user_email, points)
                if error is None:
                    msg = """Hey <@{mentioned}> :wave: <{permalink}|you got {points} points> from <@{source}>!

    ⚡ Check out the leaderboard <https://snitch-leaderboard.herokuapp.com/|here>! ⚡

    ⚡ And get to snitching! ⚡""".format(
                        mentioned=mentioned_user["user"]["id"],
                        points=str(points),
                        source=source_user["user"]["id"],
                        permalink=original_message["permalink"])
                    notify_user = mentioned_user["user"]["id"]
                    #React to the slack post now, for some sense of transparency
                    try:
                        slack_client.reactions_add(
                            channel=message["channel"],
                            timestamp=message["event_ts"],
                            name="thumbsup")
                    except:
                        print(
                            "well that's too bad I couldn't react... try again with another emoji"
                        )
                        try:
                            slack_client.reactions_add(
                                channel=message["channel"],
                                timestamp=message["event_ts"],
                                name="white_check_mark")
                        except:
                            print("something went really really wrong")
                            pass
                # If we couldn't grant points, let people know
                else:
                    msg = "Hey <@" + source_user["user"][
                        "id"] + "> - I couldn't give <@" + mentioned_user["user"][
                            "id"] + "> points from you, here's what the computer told me: " + error
    # If there was no user mention, handle the error.
    else:
        msg = "Hey <@" + source_user["user"][
            "id"] + "> something funny just happened... can you try granting that point again?"

    print("Sending %s the following alert: %s" % (notify_user, msg))
    slack_client.chat_postMessage(channel=notify_user, text=msg)
Exemplo n.º 8
0
def preprocess_search_query(text):
    text = re.sub(r'\W', ' ', text, re.UNICODE)
    tokens = text.split()
    tokens = map(lambda t: t.strip(), tokens)
    return ' & '.join(['%s:*' % t for t in tokens])
Exemplo n.º 9
0
def build_tsquery(text):
    text = scalar("select gazetteer.gaz_plainText2(:text)", text=text)
    return " & ".join([re.sub(r"\*$", ":*", x) for x in text.split()])