def match(self): # tokenize the message if self.lexerType == LEXER_TOKENS: tokens = scanTokens(self.msg, self.ws) else: tokens = scanNgrams(self.msg) if len(tokens) != len(self.template.content): # no match due to different token count return None fields = [] match = True for (tok, msg) in zip(self.template.content, tokens): curVal = msg[1] # check for field if tok == "": fields.append(curVal) else: if urllib.quote(curVal) != tok: match = False break if match: ret = fields else: ret = None return ret
def match(self): # tokenize the message # somehow self.lexerType is a boolean..? # thus always ngrams was picked and failed # due to token length missmatch # if self.lexerType == LEXER_TOKENS: if self.lexerType == False: tokens = scanTokens(self.msg, self.ws) else: tokens = scanNgrams(self.msg) if len(tokens) != len(self.template.content): # no match due to different token count return None fields = [] match = True for (tok, msg) in zip(self.template.content, tokens): curVal = msg[1] # check for field if tok == "": fields.append(curVal) else: if urllib.quote(curVal) != tok: match = False break if match: ret = fields else: ret = None return ret
def match(self): # tokenize the message if self.lexerType == LEXER_TOKENS: tokens = scanTokens(self.msg, self.ws) else: tokens = scanNgrams(self.msg) fields = [] for (tok, msg) in zip(self.template.content, tokens): curVal = msg[1] # check for field if tok == "": fields.append(curVal) # find distance between msg and template template_str = ''.join([t for t in self.template.content]) d = distance(self.msg, template_str) return fields, d
def match(self): if self.lexerType == LEXER_TOKENS: tokens = scanTokens(self.msg, self.ws) else: tokens = scanNgrams(self.msg) if len(tokens) != len(self.template.content): # no match due to different token count return None fields = [] match = True for (tok, msg) in zip(self.template.content, tokens): curVal = msg[1] # check for field if tok == "": fields.append(curVal) else: if urllib.quote(curVal) != tok: match = False break if match: ret = fields else: ret = None return ret
def getTokensForMsg(self, msgIndex): if self.ngram == 0: return scanTokens(self.messages[msgIndex], self.whitespace) else: return scanNgrams(self.messages[msgIndex])