示例#1
0
 def test_qualified_re_split(self):
     self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
     self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
     self.assertEqual(re.split("(:)", ":a:b::c", 2),
                      ['', ':', 'a', ':', 'b::c'])
     self.assertEqual(re.split("(:*)", ":a:b::c", 2),
                      ['', ':', 'a', ':', 'b::c'])
示例#2
0
 def test_qualified_re_split(self):
     self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
     self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
     self.assertEqual(re.split("(:)", ":a:b::c", 2),
                      ['', ':', 'a', ':', 'b::c'])
     self.assertEqual(re.split("(:*)", ":a:b::c", 2),
                      ['', ':', 'a', ':', 'b::c'])
示例#3
0
    def toNumber(self, lang, s):
        if lang == "en":
            try:
                if s[-1] in ["a", "b"]:
                    amud = s[-1]
                    daf = int(s[:-1])
                else:
                    amud = "a"
                    daf = int(s)
            except ValueError:
                raise InputError(u"Couldn't parse Talmud reference: {}".format(s))

            if self.length and daf > self.length:
                #todo: Catch this above and put the book name on it.  Proably change Exception type.
                raise InputError(u"{} exceeds max of {} dafs.".format(daf, self.length))

            indx = daf * 2
            if amud == "a":
                indx -= 1
            return indx
        elif lang == "he":
            num = re.split("[.:,\s]", s)[0]
            daf = decode_hebrew_numeral(num) * 2
            if s[-1] == ":" or (
                    s[-1] == u"\u05d1"    #bet
                        and
                    ((len(s) > 2 and s[-2] in ", ")  # simple bet
                     or (len(s) > 4 and s[-3] == u'\u05e2')  # ayin"bet
                     or (len(s) > 5 and s[-4] == u"\u05e2")  # ayin''bet
                    )
            ):
                return daf  # amud B
            return daf - 1
示例#4
0
def add_to_whitelist(logins):
    logins = re.split(r'[\s@,]+', logins.strip(' \t@'))

    added = []
    already = []
    denied = []
    not_found = []
    for login in logins:
        try:
            if users.add_to_whitelist(login):
                added.append(login)
            else:
                already.append(login)
        except SubscribeError:
            denied.append(login)
        except UserNotFound:
            not_found.append(login)
        except AlreadySubscribed:
            already.append(login)

    return xmpp_template('wl_updated',
                         added=added,
                         already=already,
                         denied=denied,
                         not_found=not_found)
示例#5
0
文件: parse.py 项目: trs225/koch
    def pipe(self, key, value):
        doc = value
        for i, elm in enumerate(doc.content_html.elements):
            build_blobs(elm, doc, [i])

        for blob in doc.blobs:
            tokenized = nltk.word_tokenize(blob.text)
            pos_tagged = ((t, convert_pos(p))
                          for t, p in nltk.pos_tag(tokenized))
            pos_filtered = ((t, p) for t, p in pos_tagged
                            if p in self.pos_tags)
            lemmatized = (self.wordnet.lemmatize(t, convert_pos(p))
                          for t, p in pos_filtered)
            normalized = (t.lower() for s in lemmatized
                          for t in re2.split(r"\W+", s) if t)
            enumerated = ((i, t) for i, t in enumerate(normalized)
                          if not t.isdigit())
            filtered = ((i, t) for i, t in enumerated
                        if t not in self.stopwords)
            for index, text in filtered:
                blob.words.add(index=index, text=text)

        if not self.debug:
            doc.ClearField("raw_html")
            doc.ClearField("parsed_html")
            doc.ClearField("content_html")

        yield key, doc
示例#6
0
def add_post():
    text = env.request.args('text', '').strip()

    tags = env.request.args('tags', '').strip(' \t*,;')
    if isinstance(tags, str):
        tags = tags.decode('utf-8')
    tags = [t.replace(u"\xa0", " ") for t in re.split(r'\s*[,;*]\s*', tags)]

    private = bool(env.request.args('private'))

    m = re.search(r'^\s*(?P<to>(?:@[a-z0-9_-]+[,\s]*)+)', text)
    to = parse_logins(m.group('to')) if m else []

    files = _files([])

    sess = Session()
    sess['clear_post_input'] = True
    sess.save()

    try:
        id = posts.add_post(text, tags=tags, to=to, private=private, files=files)
    except PostTextError:
        return render('/post-error.html')

    return Response(redirect='%s://%s.%s/%s' % \
                             (env.request.protocol,
                              env.user.login, settings.domain, id))
示例#7
0
def findRelations(sentence, relex):
    ''' iterate through words in a sentence and extract all relations '''
    relations = set()
    for word in filter(None, re.split('[ ,.]', sentence)):
        if word in relex:
            relations.add(word)
    return relations
示例#8
0
def calculateManaPerms(manaCost):
    """Calculate the possible mana permutations of cards.

    Used for hybrid cards when comparing mana costs)
    Input: Mana Cost of Card as a string (i.e {4}{U/G}{R/G})
    Output: Array of unique different mana cost combinations (i.e [4UR, 4UG, 4GR, 4GG])
    """
    manacost_permutes = []
    manacost_statics = []
    totalmanaoptions = []
    manacost_split = re.split("\{(.*?)\}+?", manaCost)

    for manaitem in manacost_split:
        if manaitem != '':
            if "/" in manaitem:
                manacost_permutes.append(manaitem.replace("/", ""))
            else:
                manacost_statics.append(manaitem)

    if manacost_permutes == []:
        totalmanaoptions.append("".join(manacost_statics))
    else:
        for x in product(*manacost_permutes):
            v = "".join(manacost_statics) + "".join(x)
            totalmanaoptions.append(''.join(sorted(v)))

    return dedupe(totalmanaoptions)
示例#9
0
def add_post():
    text = env.request.args('text', '').strip()

    tags = env.request.args('tags', '').strip(' \t*,;')
    if isinstance(tags, str):
        tags = tags.decode('utf-8')
    tags = [t.replace(u"\xa0", " ") for t in re.split(r'\s*[,;*]\s*', tags)]

    private = bool(env.request.args('private'))

    m = re.search(r'^\s*(?P<to>(?:@[a-z0-9_-]+[,\s]*)+)', text)
    to = parse_logins(m.group('to')) if m else []

    files = _files([])

    try:
        id = posts.add_post(text,
                            tags=tags,
                            to=to,
                            private=private,
                            files=files)
    except PostTextError:
        return render('/post-error.html')

    return Response(redirect='%s://%s.%s/%s' % \
                             (env.request.protocol,
                              env.user.login, settings.domain, id))
示例#10
0
def findRelations(sentence, relex):
	''' iterate through words in a sentence and extract all relations '''
	relations = set()
	for word in filter(None, re.split('[ ,.]', sentence)):
		if word in relex:
			relations.add(word)
	return relations
示例#11
0
    def toNumber(self, lang, s):
        if lang == "en":
            try:
                if s[-1] in ["a", "b"]:
                    amud = s[-1]
                    daf = int(s[:-1])
                else:
                    amud = "a"
                    daf = int(s)
            except ValueError:
                raise InputError(
                    u"Couldn't parse Talmud reference: {}".format(s))

            if self.length and daf > self.length:
                #todo: Catch this above and put the book name on it.  Proably change Exception type.
                raise InputError(u"{} exceeds max of {} dafs.".format(
                    daf, self.length))

            indx = daf * 2
            if amud == "a":
                indx -= 1
            return indx
        elif lang == "he":
            num = re.split("[.:,\s]", s)[0]
            daf = decode_hebrew_numeral(num) * 2
            if s[-1] == ":" or (
                    s[-1] == u"\u05d1"  #bet
                    and ((len(s) > 2 and s[-2] in ", ")  # simple bet
                         or (len(s) > 4 and s[-3] == u'\u05e2')  # ayin"bet
                         or (len(s) > 5 and s[-4] == u"\u05e2")  # ayin''bet
                         )):
                return daf  # amud B
            return daf - 1
示例#12
0
文件: eval.py 项目: trs225/koch
def get_word_counts(string):
  out = {}
  for word in re2.split(r"\W", string):
    if word in out:
      out[word] += 1
    else:
      out[word] = 1
  return out
示例#13
0
def isReference(sentence, authors):
	''' checks if a sentence is a reference - use a hashed author database to check each word'''
	score = 0
	for word in re.split('[ ,;]', sentence):
		if word in authors:
			score += 1
		if score > 3:
			return True
	return False
示例#14
0
def isReference(sentence, authors):
    ''' checks if a sentence is a reference - use a hashed author database to check each word'''
    score = 0
    for word in re.split('[ ,;]', sentence):
        if word in authors:
            score += 1
        if score > 3:
            return True
    return False
示例#15
0
def diff_ratio(str1, str2):
    if not isinstance(str1, unicode):
        str1 = str1.decode('utf-8')
    str1 = ' '.join(re.split(r'[\s\.]+', str1)).lower()
    if not isinstance(str2, unicode):
        str2 = str2.decode('utf-8')
    str2 = ' '.join(re.split(r'[\s\.]+', str2)).lower()

    d = distance(str1, str2)
    if d <= 1:
        return True

    if settings.edit_distance > 0 and d > settings.edit_distance:
        return False

    r = ratio(str1, str2)
    if r < settings.edit_ratio:
        return False

    return True
示例#16
0
def diff_ratio(str1, str2):
    if not isinstance(str1, unicode):
        str1 = str1.decode("utf-8")
    str1 = " ".join(re.split(r"[\s\.]+", str1)).lower()
    if not isinstance(str2, unicode):
        str2 = str2.decode("utf-8")
    str2 = " ".join(re.split(r"[\s\.]+", str2)).lower()

    d = distance(str1, str2)
    if d <= 1:
        return True

    if settings.edit_distance > 0 and d > settings.edit_distance:
        return False

    r = ratio(str1, str2)
    if r < settings.edit_ratio:
        return False

    return True
示例#17
0
 def test_re_split(self):
     self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
     self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
     self.assertEqual(re.split("(:*)", ":a:b::c"),
                      ['', ':', 'a', ':', 'b', '::', 'c'])
     self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
     self.assertEqual(re.split("(:)*", ":a:b::c"),
                      ['', ':', 'a', ':', 'b', ':', 'c'])
     self.assertEqual(re.split("([b:]+)", ":a:b::c"),
                      ['', ':', 'a', ':b::', 'c'])
     self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
                      ['', None, ':', 'a', None, ':', '', 'b', None, '',
                       None, '::', 'c'])
     self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
                      ['', 'a', '', '', 'c'])
示例#18
0
 def test_re_split(self):
     self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
     self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
     self.assertEqual(re.split("(:*)", ":a:b::c"),
                      ['', ':', 'a', ':', 'b', '::', 'c'])
     self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
     self.assertEqual(re.split("(:)*", ":a:b::c"),
                      ['', ':', 'a', ':', 'b', ':', 'c'])
     self.assertEqual(re.split("([b:]+)", ":a:b::c"),
                      ['', ':', 'a', ':b::', 'c'])
     self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), [
         '', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'
     ])
     self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
                      ['', 'a', '', '', 'c'])
示例#19
0
    def save(post):
        text = env.request.args('text', '').strip()

        tags = env.request.args('tags', '').strip(' \t*,;')
        if isinstance(tags, str):
            tags = tags.decode('utf-8')
        tags = [t.replace(u"\xa0", " ") for t in re.split(r'\s*[,;*]\s*', tags)]

        private = bool(env.request.args('private'))

        posts.edit_post(post, text=text, tags=tags, private=private, files=files)

        return Response(redirect='%s://%s.%s/%s' % \
                                 (env.request.protocol,
                                  env.user.login, settings.domain, post.id))
示例#20
0
def parse_tags(tags):
    if tags:
        tags = tags.strip(" \r\n\t*")
        if isinstance(tags, str):
            tags = tags.decode("utf-8")
        # tags = re.findall(r'[^\s*]+', tags)
        tags = filter(
            None, [t.replace(u"\xa0", " ").strip()[:64] for t in uniqify(re.split(r"(?<!\\)[\*,]", tags)[:10])]
        )
        if not tags:
            tags = None

    else:
        tags = []
    return map(lambda t: re.sub(r"\\,", ",", t), tags)
示例#21
0
文件: blog.py 项目: radjah/point-www
    def save(post):
        text = env.request.args('text', '').strip()

        tags = env.request.args('tags', '').strip(' \t*,;')
        if isinstance(tags, str):
            tags = tags.decode('utf-8')
        tags = [t.replace(u"\xa0", " ") for t in re.split(r'\s*[,;*]\s*', tags)]

        private = bool(env.request.args('private'))

        posts.edit_post(post, text=text, tags=tags, private=private, files=files)

        return Response(redirect='%s://%s.%s/%s' % \
                                 (env.request.protocol,
                                  env.user.login, settings.domain, post.id))
示例#22
0
def parse_tags(tags):
    if tags:
        tags = tags.strip(' \r\n\t*')
        if isinstance(tags, str):
            tags = tags.decode('utf-8')
        #tags = re.findall(r'[^\s*]+', tags)
        tags = filter(None,
                [t.replace(u"\xa0", " ").strip()[:64] for t in \
                        uniqify(re.split(r'(?<!\\)[\*,]', tags)[:10])])
        if not tags:
            tags = None

    else:
        tags = []
    return map(lambda t: re.sub(r'\\,', ',', t), tags)
示例#23
0
def parse_macro(macro):
    opts = {}
    vb_vars = {}
    result = {}
    cleaned = ""
    strings = set()
    iocs = []
    macro = normalize_code(macro)

    enc_func_name, enc_type = find_enc_function(macro)
    if not enc_func_name:
        enc_func_name, enc_type = r"xor\w+", "xor"

    decrypt_func = DECRYPTORS.get(enc_type)

    opts = {
        "enc_func_name": enc_func_name,
        "decrypt_func": decrypt_func,
        "vb_vars": vb_vars
    }

    for line in macro.splitlines():
        line = line.strip()
        if line.startswith("'"):
            continue

        substituted = handle_techniques(line, **opts)
        # Look for variable assignments
        split = [
            part for part in re.split(r"^(\w+)\s*=\s*", line, maxsplit=1)[1:]
            if part
        ]

        # Basic variable data find/replace.
        if len(split) == 2:
            name, val = split
            vb_vars[name] = substituted

        # Walk the deobfuscated macro and check for any IOCs
        for string in substituted.splitlines():
            ioc = extract_iocs(string)
            if ioc:
                iocs.append(ioc)

    # Dedup IOCs
    result = sorted(set(iocs), key=lambda p: p[0])

    return result
示例#24
0
def del_from_blacklist(logins):
    logins = re.split(r'[\s@,]+', logins.strip(' \t@'))

    deleted = []
    not_deleted = []
    not_found = []

    for login in logins:
        try:
            if users.del_from_blacklist(login):
                deleted.append(login)
            else:
                not_deleted.append(login)
        except UserNotFound:
            not_found.append(login)

    return xmpp_template('bl_updated', deleted=deleted,
                                       not_deleted=not_deleted,
                                       not_found=not_found)
示例#25
0
def add_to_blacklist(logins):
    logins = re.split(r'[\s@,]+', logins.strip(' \t@'))

    added = []
    already = []
    not_found = []

    for login in logins:
        try:
            if users.add_to_blacklist(login):
                added.append(login)
            else:
                already.append(login)
        except SubscribeError:
            pass
        except UserNotFound:
            not_found.append(login)

    return xmpp_template('bl_updated', added=added, already=already,
                                       not_found=not_found)
示例#26
0
def del_from_blacklist(logins):
    logins = re.split(r'[\s@,]+', logins.strip(' \t@'))

    deleted = []
    not_deleted = []
    not_found = []

    for login in logins:
        try:
            if users.del_from_blacklist(login):
                deleted.append(login)
            else:
                not_deleted.append(login)
        except UserNotFound:
            not_found.append(login)

    return xmpp_template('bl_updated',
                         deleted=deleted,
                         not_deleted=not_deleted,
                         not_found=not_found)
示例#27
0
def parse_macro(macro):
    opts = {}
    vb_vars = {}
    result = {}
    cleaned = ""
    strings = set()
    iocs = []
    macro = normalize_code(macro)

    enc_func_name, enc_type = find_enc_function(macro)
    if not enc_func_name:
        enc_func_name, enc_type = r"xor\w+", "xor"

    decrypt_func = DECRYPTORS.get(enc_type)

    opts = {"enc_func_name": enc_func_name, "decrypt_func": decrypt_func, "vb_vars": vb_vars}

    for line in macro.splitlines():
        line = line.strip()
        if line.startswith("'"):
            continue

        substituted = handle_techniques(line, **opts)
        # Look for variable assignments
        split = [part for part in re.split(r"^(\w+)\s*=\s*", line, maxsplit=1)[1:] if part]

        # Basic variable data find/replace.
        if len(split) == 2:
            name, val = split
            vb_vars[name] = substituted

        # Walk the deobfuscated macro and check for any IOCs
        for string in substituted.splitlines():
            ioc = extract_iocs(string)
            if ioc:
                iocs.append(ioc)

    # Dedup IOCs
    result = sorted(set(iocs), key=lambda p: p[0])

    return result
示例#28
0
 def _get_shortcut_url_map(self, pat, lines, shortcut_size):
     shortcut_url_map = {}
     secondary_lines = []
     total_rules = 0
     total_comments = 0
     total_shortcuts = 0
     for line in lines:
         line.strip()
         if line[0] == '!':
             total_comments += 1
             continue
         total_rules += 1
         url = re2.split(r'\$+', line)[0]
         searches = pat.findall(url)
         flag = 0
         if searches:
             total_shortcuts += 1
         else:
             secondary_lines.append(line)
             continue
         min_count = -1
         for s in searches:
             for i in xrange(len(s) - shortcut_size+1):
                 cur_s = s[i:i+shortcut_size]
                 if cur_s not in shortcut_url_map:
                     shortcut_url_map[cur_s] = [line]
                     flag = 1
                     break
                 if (min_count == -1 or
                         len(shortcut_url_map[cur_s]) < min_count):
                     min_count = len(shortcut_url_map[cur_s])
                     min_s = cur_s
             if flag == 1:
                 break
         if flag == 0:
             shortcut_url_map[min_s].append(line)
     if self.print_maps:
         self._print_statistics_of_map(
             shortcut_size, total_rules, total_comments,
             total_shortcuts, len(secondary_lines), shortcut_url_map)
     return shortcut_url_map, secondary_lines
示例#29
0
    def _generate_login(self):
        name = unidecode(self.get_info('name')).lower()
        if not name:
            name = re.sub(r'^\w+:/+', '', self._url.lower())

        name = re.sub('^\W+|\W+$', '', name)

        words = re.split(r'\W+', name)
        name = ''
        br = False
        for w in words[:]:
            if not name:
                _name = w
            else:
                _name = "%s-%s" % (name, w)
            if len(_name) <= 16:
                name = _name
            else:
                name = _name[:16]
                br = True
                break

        if br:
            try:
                ri = name.rindex('-')
            except ValueError:
                ri = 16
            if ri > 6:
                name = name[:ri]

        i = 0

        while True:
            login = '******' % (name, i or '')
            try:
                User('login', login)
            except UserNotFound:
                return login
            i += 1
示例#30
0
    def _generate_login(self):
        name = unidecode(self.get_info('name')).lower()
        if not name:
            name = re.sub(r'^\w+:/+', '', self._url.lower())

        name = re.sub('^\W+|\W+$', '', name)

        words = re.split(r'\W+', name)
        name = ''
        br = False
        for w in words[:]:
            if not name:
                _name = w
            else:
                _name = "%s-%s" % (name, w)
            if len(_name) <= 16:
                name = _name
            else:
                name = _name[:16]
                br = True
                break

        if br:
            try:
                ri = name.rindex('-')
            except ValueError:
                ri = 16
            if ri > 6:
                name = name[:ri]

        i = 0

        while True:
            login = '******' % (name, i or '')
            try:
                User('login', login)
            except UserNotFound:
                return login
            i += 1
示例#31
0
def GetValues(string):
    parts = (s for s in re2.split(r"\(.*\)", string) if s)
    return set(Normalize(s) for part in parts for s in part.split(','))
示例#32
0
def nl2br(environ, text):
    return ''.join(['<p>%s</p>' % escape(s) for s in re.split(_nl_re, text)])
示例#33
0
def dispatch_message(message, raw_message, channel):
    """For a message, figure out how to handle it and return the text to reply with.

    INPUT: message = Message string
    INPUT: channel = TRUE if message came from a main channel, FALSE if came from PM
    OUTPUT: (optional: list of) tuple of (reply_message, pm_override)
    OUTPUT: pm_override is TRUE if the reply should go through PM regardless

    If they give us "<string> extend", assume that it's "<cardname extend>".
    If they give us "<string>*", assume that it's "<cardname*>"
    """
    logging.debug("Dispatching message: {} (Raw text {})".format(message, raw_message))
    if message == "help":
        return (help(), True)
    elif message == "helpsearch":
        return (helpsearch(), True)
    elif message.startswith("url "):
        return (url(message[4:]), False)
    elif message.startswith("printsets"):
        c.execute('SELECT DISTINCT(name), code, releaseDate FROM sets ORDER BY ' + ('releaseDate' if message.endswith("inorder") else 'name') + ' ASC')
        message_out = ""
        for name, code, date in [(x[0], x[1], x[2]) for x in c.fetchall()]:
            message_out += name + " (" + code + ")" + " [" + date + "]" + "\n"
        return (message_out, True)
    elif message == "random":
        cards = cardSearch(c, ['en:' + random.choice(allCardNames)])
        if not cards:
            return ("No cards found :(", False)
        return (printCard(c, cards[0], quick=False, slackChannel=channel), False)
    elif message.endswith("extend"):
        cards = cardSearch(c, ['en:' + message[:-6].rstrip()])
        if not cards:
            return ("", False)
        return (printCard(c, cards[0], extend=2, quick=False), True)
    elif message.endswith("*"):
        cards = cardSearch(c, ['n:' + message[:-1]])
        if not cards:
            return ("", False)
        if len(cards) > 20:
            return ("Too many cards to print! ({} > 20). Please narrow search".format(len(cards)), False)
        if channel:
            # If we've asked for some cards in a channel
            if len(cards) == 1:
                # One card is fine, show them
                return (printCard(c, cards[0], quick=False, slackChannel=channel), False)
            elif len(cards) <= 5:
                # 2 - 5 cards is fine, but only show name and mana cost
                return ("\n".join([printCard(c, card, quick=True, slackChannel=channel) for card in cards]), False)
            else:
                # > 5 is only showing name and mana cost and forced to PM
                return [("{} results sent to PM".format(len(cards)), False), ("\n".join([printCard(c, card, quick=True, slackChannel=channel) for card in cards]), True)]
        else:
            return ("\n".join([printCard(c, card, quick=False, slackChannel=channel) for card in cards] + ["{} result/s".format(len(cards))]), False)
    elif raw_message.startswith("!s ") or raw_message.startswith("!qs "):
        logging.debug("Advanced Search!")
        quick = False
        if message == "qs":
            quick = True
            card_name = raw_message[4:].lower()
        else:
            card_name = raw_message[3:].lower()
        logging.debug("Searching for {}".format(card_name))
        output = []
        try:
            parsed_data = super_total.parseString(card_name)
            logging.debug("Parsed it as: {}".format(parsed_data))
        except (ParseException, ParseFatalException) as e:
            return ("Unable to parse search terms\n{}".format(e), False)

        last_was_s = False
        for idx, x in enumerate(parsed_data.asList()):
            if x in ["and", "or", "not"]:
                output.append(x)
                last_was_s = False
            elif x == "(":
                if last_was_s:
                    output.append("AND")
                output.append("(")
                last_was_s = False
            elif x == ")":
                output.append(")")
                last_was_s = False
            else:
                if last_was_s:
                    output.append("AND")
                output.append(x)
                last_was_s = True
        logging.debug("Advanced search final terms: {}".format(output))
        cards = cardSearch(c, output)
        if not cards:
            return ("No cards found", False)
        if len(cards) > 20:
            return ("Too many cards to print! ({} > 20). Please narrow search".format(len(cards)), False)
        if channel:
            # If we've asked for some cards in a channel
            # If they're quick, <= 10 is fine
            if quick and len(cards) <= 10:
                return ("\n".join([printCard(c, card, quick=quick, slackChannel=channel) for card in cards]), False)
            if len(cards) <= 5:
                # 1 - 5 cards is fine
                return ("\n".join([printCard(c, card, quick=quick, slackChannel=channel) for card in cards]), False)
            else:
                # > 5 is only showing name and mana cost and forced to PM
                return [("{} results sent to PM".format(len(cards)), False), ("\n".join([printCard(c, card, quick=True, slackChannel=channel) for card in cards]), True)]
        else:
            return ("\n".join([printCard(c, card, quick=quick, slackChannel=channel) for card in cards] + ["{} result/s".format(len(cards))]), False)
    elif raw_message.startswith("!r ") or rule_regexp.match(raw_message):
        logging.debug("Rules query!")
        if message == "r":
            message = raw_message[3:]
        return (ruleSearch(all_rules, message), False)
    else:
        logging.debug("Trying to figure out card name")
        logging.debug("Maybe we get extremely lucky")
        if message in allCardNames:
            logging.debug("We do!")
            cards = cardSearch(c, ['en:' + message])
            return (printCard(c, cards[0], quick=False, slackChannel=channel), False)
        logging.debug("We don't")
        # Handle !card1 !card2
        # Handle !card1&!card2
        # Handle Blah !card1 blah !card2
        # Don't forget if it's a PM we'll have stripped the possible initial ! so let's
        # use the raw message
        # TODO: Do it backwards, so longest matches are better
        command_list = bot_command_regex.findall(raw_message)
        logging.debug("Command list: {}".format(command_list))
        cards_found = []
        for card in command_list:
            if card in allCardNames:
                logging.debug("Bailing early due to exact match")
                cards_found.append('en:"%s"' % card)
                continue
            card_tokens = re.split(' |&', raw_message[raw_message.find(card):])
            logging.debug("Tokenising: {}".format(card_tokens))
            backup = []
            real = False
            for i in xrange(1, len(card_tokens) + 1):
                card_name = " ".join(card_tokens[:i])
                if card_tokens[i - 1].startswith("!"):
                    break
                if not backup:
                    backup.extend([x for x in allCardNames if difflib.SequenceMatcher(None, x.split(", ")[0].lower(), card_name.lower()).ratio() >= 0.8])
                real = difflib.get_close_matches(card_name, allCardNames, cutoff=0.8)
                if len(real):
                    cards_found.append('en:"%s"' % real[0])
                    real = True
                    break
            if not real:
                if backup:
                    cards_found.append('en:"%s"' % backup[0])
        logging.debug("Finally, the cards: {}".format(cards_found))
        if cards_found:
            terms = list(intersperse("OR", cards_found))
            logging.debug("Searching for {}".format(terms))
            cards = cardSearch(c, terms)
            logging.debug("Found {} cards".format(len(cards)))
            if len(cards) > 20:
                return ("Too many cards to print! ({} > 20). Please narrow search".format(len(cards)), False)
            if len(cards) <= 5:
                return ("\n".join([printCard(c, card, quick=False, slackChannel=channel) for card in cards]), False)
            else:
                return [("{} results sent to PM".format(len(cards)), False), ("\n".join([printCard(c, card, quick=False, slackChannel=channel) for card in cards]), True)]
        else:
            logging.debug("I didn't understand the command")
            return ("", False)
示例#34
0
文件: afrab0t.py 项目: Akendo/afrab0t
	def do_command(self, e, cmd, nick, target, reply, dm):
		c = self.connection

		emoticontable = {
				':)': '☺',
# Some lines commented out due to lack of widespread font support
#				':D': '😃',
#				'^^': '😄',
#				'^_^':'😄',
#				':|': '😑',
				':(': '☹',
#				':/': '😕',
#				':\\':'😕',
#				'-.-':'😒',
#				':P' :'😛',
#				';P' :'😜',
#				'xP' :'😝',
#				';)' :'😉',
#				':?' :'😖',
#				'>:(':'😠',
#				'D:' :'😦',
#				':o' :'😯',
#				':O' :'😮',
#				'B)' :'😎'
				}
		for emoticon, uchar in emoticontable.items():
			if re.findall('(^|\W)'+re.escape(emoticon)+'(\W|$)', cmd) and random() < 0.333:
				reply('Did you mean {} (U+{:x}) with “{}”?'.format(uchar, ord(uchar), emoticon))
				break

		def replyopen():
			if self.lastopen:
				reply('Space was last marked {} by {} on {}.'.format(*self.lastopen))
			else:
				reply("I don't know when was the last time the space was open.")
		if cmd.startswith('open'):
			if '?' in cmd or '‽' in cmd:
				if cmd.count('?') >= 5:
					self.sendchan('afrabot: open?')
					return
				replyopen()
			else:
				if cmd.count('!') > 5:
					reply('u mad bro?')
					return
				self.set_open(True, nick)
			return
		if cmd.startswith('closed'):
			if '?' in cmd or '‽' in cmd:
				replyopen()
			else:
				if cmd.count('!') > 5:
					reply('u mad bro?')
					return
				dm('Please remember to follow the shutdown protocol.')
				self.set_open(False, nick)
			return
		if re.match('^ *genug +pleniert[.!]{,5}$', cmd) or re.match('^plenum[?!‽.]{,5}$', cmd):
			cs = self.chaossternchen
			if 'genug' in cmd:
				self.chaossternchen = []
				reply('Plenum beendet.')
			else:
				reply('Aye! So far, there are {} Chaos-☆'.format(len(cs)) + ('.' if len(cs) == 0 else ':'))
			for entry in enumerate(cs):
				reply('Chaos-☆ {}: {}'.format(*entry))
			return
		csmatch = re.match('^ *(delete|remove) +chaos-?([☆★☼☀*]|sternchen) *([0-9]+)[.!]{,5}$', cmd)
		if csmatch:
			try:
				num = int(csmatch.group(3))
				del self.chaossternchen[num]
				reply('Chaos-☆ {} deleted.'.format(num))
			except:
				reply('wut?')
			return
		if re.match('^help[?!‽.]*$', cmd):
			helptext = """open|closed? - query whether space is open
open|closed - set space open/closed
chaos*: [foobar] - add plenum topic
delete chaos* [num] - delete plenum topic number [n]
shutdown - list things to do when closing the space
plenum - list plenum topics
... and many more, doc urgently needed. Please submit PRs on github: https://github.com/afra/afrab0t
"""
			for line in helptext.splitlines():
				reply(line)
			return
		if re.match('^shutdown[?‽]*$', cmd):
			helptext = """* Fenster schließen (Beim rechten Fenster muss ein Hebel unten am Fenster betätigt werden. Bitte stellt sicher, dass beide Fenster dicht geschlossen sind.)
* Tische aufräumen und bei Bedarf kurz abwischen
* Geschirr spülen
* Kühlschrank auffüllen
* Heizung auf eine angemessene Stufe stellen (Winter: 2-3)
* Lampen, Computer, Boxen, Beamer, Kochplatte, Ofen, *Wasserkocher*, Laser abschalten
* Gucken, ob ralisi noch Geschirr abwäscht
* Müll mit runter nehmen
* Raum-, Aufgangs- und Haustür verschließen
"""
			for line in helptext.splitlines():
				reply(line)
			return
		if cmd == 'ponies?':
			reply('yes please!')
			return
		if re.match('^ *tell +afrab[o0]t +', cmd):
			reply('what is your problem?')
			return
		if cmd.rstrip('?') in ('where', 'location', 'wo'):
			reply('AfRA e.V. is located at Herzbergstr. 55, 10365 Berlin, 2.HH/Aufgang B, 3. floor on the'
					'left (Rm 3.08). Public transport: Tram M8, 21, 37 & Bus 256, N56, N50 → Herzbergstr./Siegfriedstr.'
					'Door closed? Try +49-176-29769254 !')
			return
		if cmd.rstrip('?') in ('tel', 'telefon', 'telephone', 'phone', 'handy', 'fon'):
			reply("Locked out? Wanna know what's up at AfRA? Try +49-176-29769254 !")
			return
		if cmd.rstrip('?!.') in ('cats', 'katzen', 'kittens', 'kätzchen'):
			try:
				submissions = self.reddit.get_subreddit('cats').get_hot(limit=50)
				index, item = next((i,s) for i,s in enumerate(submissions) if s.url not in self.catpiccache and not s.stickied and not s.is_self)
				self.catpiccache.append(item.url)
				if index != 5:
					reply('Got some cats for you: '+item.url)
				else:
					reply("Gee, you really like those cat things, don't you? You know, I could use some love, too: https://github.com/afra/afrab0t")
			except StopIteration:
				reply('The intertubes are empty.')
			return
		if cmd.rstrip('?!.') == 'catspam':
			def catspam():
				try:
					submissions = self.reddit.get_subreddit('cats').get_hot(limit=32)
					for s in submissions:
						if s.url not in self.nickcatpiccache[nick] and s.url not in self.catpiccache and not s.stickied and not s.is_self:
							self.nickcatpiccache[nick].append(s.url)
							dm(s.url)
							time.sleep(3)
				except Exception as e:
					log('Catspam problem:', e)
					reply('The intertubes are empty.')
			thr = Thread(target=catspam)
			thr.start()
			return
		if cmd.rstrip('?!.') in ('answer', 'antworte', 'antwort'):
			reply('42')
			return
		# ETA handling
		if cmd.rstrip('?') in ('etas', 'who', 'da'):
			with self.db as db:
				db.execute("DELETE FROM etas WHERE timestamp < DATETIME('now', '-1 day')")
			etas = ', '.join(nick+': '+eta for nick,eta in db.execute("SELECT nick, eta FROM etas").fetchall())
			if etas:
				reply('Current ETAs: '+etas)
			else:
				reply('No ETAs have been announced yet.')
			return
		# key handling
		keycmd = re.match('key ([\w]+) to ([\w]+)( *: *.*)?', cmd)
		if keycmd:
			with self.db as db:
				keystate, = db.execute("SELECT keystate FROM keylog ORDER BY timestamp DESC LIMIT 1").fetchone()
				keystatelist = keystate.split(', ')
				fromnick, tonick, comment = keycmd.groups()
				if not fromnick in keystatelist:
					reply('According to my information, as of now {} does not have a key. Current key'
							'holders are {}.'.format(fromnick, keystate))
					return
				keystatelist[keystatelist.index(fromnick)] = tonick
				keystate = ', '.join(keystatelist)
				db.execute("INSERT INTO keylog VALUES (DATETIME('now'),?,?,?,?)", (fromnick, tonick, keystate, comment))
				self.sendchan('Key transfer: {}→{}. Current key holders: {}'.format(fromnick, tonick, keystate))
			return
		if cmd.rstrip('?') == 'progress':
			t = datetime.datetime.now().time()
			p = 0
			if t.hour > 6 and t.hour < 18:
				p = ((t.hour-6)*3600+t.minute*60+t.second)/(3600*11)
			foo = round(67*p)
			bar = '='*foo
			space = ' '*(67-foo)
			reply('['+bar+'>'+space+'] ({:.2f}%)'.format(p*100))
			return
		if cmd.startswith('keystate '):
			keystate = re.split('[,;/: ]*', cmd)[1:]
			self.db.execute("INSERT INTO keylog VALUES (DATETIME('now'),'','',?,'')", (', '.join(keystate),))
			self.sendchan('Key status set. Current key holders: {}'.format(', '.join(keystate)))
			return
		keylog = re.match('keylog *([0-9]*)', cmd)
		if keylog:
			num = max(50, int(keylog.group(1) or 8))
			dm('The latest {} key log entries:'.format(num))
			loglines = self.db.execute("SELECT * FROM keylog ORDER BY timestamp DESC LIMIT ?", (num,))
			for timestamp, fromnick, tonick, keystate, comment in reversed(loglines):
				dm('{}: {}→{}; Key holders {}; Comment: "{}"'.format(
						timestamp, fromnick, tonick, keystate, comment))
			dm('EOL')
			return
		if cmd.startswith("f**k you"):
			reply('F*****g is entirely unnecessary: I can reproduce via copy-and-paste!')
			return
		if cmd.startswith("geh kacken"):
			reply('Command "kacken" not implemented. You are welcome to submit a pull request on github at https://github.com/afra/afrab0t')
			return
		# fall-through
		c.notice(nick, 'I don\'t know what you mean with "{}"'.format(cmd))
示例#35
0
def get_data(files, expect_labels=True, tokenize=False, verbose=False, files_already_opened=False):
    """
    load text from files, returning an instance of the Doc class
    doc.frag is the first frag, and each points to the next
    """
    
    if type(files) == type(''): files = [files]
    frag_list = None
    word_index = 0
    frag_index = 0
    curr_words = []
    lower_words, non_abbrs = sbd_util.Counter(), sbd_util.Counter()

    for file in files:
        sys.stderr.write('reading [%s]\n' %file)

        #fh = open(file)
        if files_already_opened:
            fh = file
        else:
            fh = open(file)

        for line in fh:

            ## deal with blank lines
            if (not line.strip()) and frag_list:
                if not curr_words: frag.ends_seg = True
                else:
                    frag = Frag(' '.join(curr_words))
                    frag.ends_seg = True
                    if expect_labels: frag.label = True
                    prev.next = frag
                    if tokenize:
                        tokens = word_tokenize.tokenize(frag.orig)
                    frag.tokenized = tokens
                    frag_index += 1
                    prev = frag
                    curr_words = []

            for word in line.split():
                curr_words.append(word)

                if is_sbd_hyp(word):
                #if True: # hypothesize all words
                    frag = Frag(' '.join(curr_words))
                    if not frag_list: frag_list = frag
                    else: prev.next = frag
                    
                    ## get label; tokenize
                    if expect_labels: frag.label = int('<S>' in word)
                    if tokenize:
                        tokens = word_tokenize.tokenize(frag.orig)
                        # BJD possible hack, but pretty sure this is needed
                        tmp_tokens = tokens.split()
                        tokens = ' '.join(tmp_tokens[:-1] + re.split(r'([.?!]+["\')\]]*)$', tmp_tokens[-1]))
                    else: tokens = frag.orig
                    tokens = re.sub('(<A>)|(<E>)|(<S>)', '', tokens)
                    frag.tokenized = tokens
                    
                    frag_index += 1
                    prev = frag
                    curr_words = []

                word_index += 1

        if files_already_opened:
            pass
        else:
            fh.close()
        #fh.close()

        ## last frag
        frag = Frag(' '.join(curr_words))
        if not frag_list: frag_list = frag
        else: prev.next = frag
        if expect_labels: frag.label = int('<S>' in word)
        if tokenize:
            tokens = word_tokenize.tokenize(frag.orig)
        else: tokens = frag.orig
        tokens = re.sub('(<A>)|(<E>)|(<S>)', '', tokens)
        frag.tokenized = tokens
        frag.ends_seg = True
        frag_index += 1

    if verbose: sys.stderr.write(' words [%d] sbd hyps [%d]\n' %(word_index, frag_index))

    ## create a Doc object to hold all this information
    doc = Doc(frag_list)
    return doc
示例#36
0
    def _parse_response(self, response):
        if response == b"":
            logging.info("[SPAM ASSASSIN] Empty response")
            return None

        match = divider_pattern.match(response)
        if not match:
            logging.error("[SPAM ASSASSIN] Response error:")
            logging.error(response)
            return None

        first_line = match.group(1)
        headers = match.group(2)
        body = response[match.end(0) :]

        # Checking response is good
        match = first_line_pattern.match(first_line)
        if not match:
            logging.error("[SPAM ASSASSIN] invalid response:")
            logging.error(first_line)
            return None

        report_list = [
            s.strip() for s in body.decode("utf-8", errors="ignore").strip().split("\n")
        ]
        linebreak_num = report_list.index([s for s in report_list if "---" in s][0])
        tablelists = [s for s in report_list[linebreak_num + 1 :]]

        self.report_fulltext = "\n".join(report_list)

        # join line when current one is only wrap of previous
        tablelists_temp = []
        if tablelists:
            for _, tablelist in enumerate(tablelists):
                if len(tablelist) > 1:
                    if (tablelist[0].isnumeric() or tablelist[0] == "-") and (
                        tablelist[1].isnumeric() or tablelist[1] == "."
                    ):
                        tablelists_temp.append(tablelist)
                    else:
                        if tablelists_temp:
                            tablelists_temp[-1] += " " + tablelist
        tablelists = tablelists_temp

        # create final json
        self.report_json = dict()
        for tablelist in tablelists:
            wordlist = re.split(r"\s+", tablelist)
            try:
                self.report_json[wordlist[1]] = {
                    "partscore": float(wordlist[0]),
                    "description": " ".join(wordlist[1:]),
                }
            except ValueError:
                LOG.w("Cannot parse %s %s", wordlist[0], wordlist)

        headers = (
            headers.decode("utf-8")
            .replace(" ", "")
            .replace(":", ";")
            .replace("/", ";")
            .split(";")
        )
        self.score = float(headers[2])
示例#37
0
 def __init__(self, input_date):
     self.input = input_date
     self.min = None
     self.max = None
     self.compare = None
     # possible values:
     # 1. absolute date
     # 2. relative date
     # 3. range (relative or absolute)
     # for absolute:
     # parse
     # create max and min
     # for relative:
     # create a timedelta
     # subtract that from now
     # no need for max and min, subtraction is precise
     # for range:
     # create a DateRange for each
     # select max and min from both to create largest possible range
     # first let's handle the range
     if ".." in self.input:
         self.input = self.input.split("..")
         if len(self.input) != 2:
             raise CommandError("Date ranges must have 2 dates.")
         # if the date is a manual range, convert to a DateRange
         self.max = []
         self.min = []
         for date in self.input:
             date = DateRange(date)
             self.max.append(date.max)
             self.min.append(date.min)
         # max and min are now both lists of possible dates
         # pick max and min to yield the biggest date
         # max: None is always Now
         # min: None is alwyas The Beginning of Time
         # for 2 absolute dates this is easy, just pick biggest diff
         # for 2 relative dates, pick both of whichever is not None
         # for 1:1, pick not None of relative then ->largest of absolute
         # filter None from lists
         self.max = [i for i in self.max if i]
         self.min = [i for i in self.min if i]
         # special case for 2 relative dates - both will only have max
         if len(self.max) == 2 and len(self.min) == 0:
             self.min = min(self.max)
             self.max = max(self.max)
             return
         diffs = []
         for i, minimum in enumerate(self.min):
             for j, maximum in enumerate(self.max):
                 diffs.append(
                     {
                         'i': i,
                         'j': j,
                         'diff': self.min[i].diff(self.max[j]).in_seconds(),
                     }
                 )
         diffs = max(diffs, key=lambda x: x['diff'])
         self.max = self.max[diffs['j']]
         self.min = self.min[diffs['i']]
         # do other stuff
         return
     # strip the comparison
     match = re.match(r"([>=<]{1,2})(.*)", self.input)
     if match:
         self.compare = match.group(1)
         self.input = match.group(2)
     if self.date_is_absolute():
         # the date is absolute
         # minimise the date
         minimum = pd.datetime(*self.date.lower_strict()[:6])
         minimum = minimum.set(hour=0, minute=0, second=0)
         # maximise the date
         maximum = pd.datetime(*self.date.upper_strict()[:6])
         maximum = maximum.set(hour=23, minute=59, second=59)
         if self.compare == "<":
             self.max = minimum
         elif self.compare == "<=":
             self.max = maximum
         elif self.compare == ">":
             self.min = maximum
         elif self.compare == ">=":
             self.min = minimum
         elif self.compare in ["=", None]:
             # = means between maximum and minimum
             self.min = minimum
             self.max = maximum
         else:
             raise CommandError(
                 "Unknown operator in absolute date "
                 "comparison ({}).".format(self.compare)
             )
     elif re.match(r"([0-9]+[A-Za-z])+$", self.input):
         # the date is relative
         sel = [i for i in re.split(r"([0-9]+)", self.input) if i]
         # sel is now a number-letter-repeat list
         # convert list to dict via pairwise
         sel = DateRange.reverse_pairwise(sel)
         # convert all numbers to int
         sel = dict([a, int(x)] for a, x in sel.items())
         self.date = pd.now()
         # check time units
         for key in sel:
             if key not in 'smhdwMy':
                 raise CommandError(
                     "'{}' isn't a valid unit of time in a relative date. "
                     "Valid units are s, m, h, d, w, M, and y.".format(key)
                 )
         self.date = pd.now().subtract(
             years=sel.get('y', 0),
             months=sel.get('M', 0),
             weeks=sel.get('w', 0),
             days=sel.get('d', 0),
             hours=sel.get('h', 0),
             minutes=sel.get('m', 0),
             seconds=sel.get('s', 0),
         )
         if self.compare in ["<", "<="]:
             self.min = self.date
         elif self.compare in [">", ">=", None]:
             self.max = self.date
         elif self.compare == "=":
             self.max = self.date
             self.min = self.date
         else:
             raise CommandError(
                 "Unknown operator in relative date "
                 "comparison ({}).".format(self.compare)
             )
     else:
         raise CommandError(
             "'{}' isn't a valid absolute or relative date "
             "type.".format(self.input)
         )
示例#38
0
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
    return [int(text) if text.isdigit() else text.lower()
            for text
            in re.split(_nsre, s)]