示例#1
0
文件: lens.py 项目: sucof/pulsar
 def match(self):
     # tokenize the message
     if self.lexerType == LEXER_TOKENS:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     if len(tokens) != len(self.template.content):
         # no match due to different token count
         return None
     fields = []
     match = True
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
         else:
             if urllib.quote(curVal) != tok:
                 match = False
                 break
     if match:
         ret = fields
     else:
         ret = None
     return ret
示例#2
0
文件: lens.py 项目: Logan-lu/pulsar
 def match(self):
     # tokenize the message
     # somehow self.lexerType is a boolean..?
     # thus always ngrams was picked and failed
     # due to token length missmatch
     # if self.lexerType == LEXER_TOKENS:
     if self.lexerType == False:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     if len(tokens) != len(self.template.content):
         # no match due to different token count
         return None
     fields = []
     match = True
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
         else:
             if urllib.quote(curVal) != tok:
                 match = False
                 break
     if match:
         ret = fields
     else:
         ret = None
     return ret
示例#3
0
文件: lens.py 项目: sucof/pulsar
 def match(self):
     # tokenize the message
     if self.lexerType == LEXER_TOKENS:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     fields = []
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
     # find distance between msg and template
     template_str = ''.join([t for t in self.template.content])
     d = distance(self.msg, template_str)
     return fields, d
示例#4
0
文件: lens.py 项目: hhao020/pulsar-1
 def match(self):
     # tokenize the message
     if self.lexerType == LEXER_TOKENS:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     fields = []
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
     # find distance between msg and template
     template_str = ''.join([t for t in self.template.content])
     d = distance(self.msg, template_str)
     return fields, d
示例#5
0
文件: lens.py 项目: hhao020/pulsar-1
 def match(self):
     if self.lexerType == LEXER_TOKENS:
         tokens = scanTokens(self.msg, self.ws)
     else:
         tokens = scanNgrams(self.msg)
     if len(tokens) != len(self.template.content):
         # no match due to different token count
         return None
     fields = []
     match = True
     for (tok, msg) in zip(self.template.content, tokens):
         curVal = msg[1]
         # check for field
         if tok == "":
             fields.append(curVal)
         else:
             if urllib.quote(curVal) != tok:
                 match = False
                 break
     if match:
         ret = fields
     else:
         ret = None
     return ret
示例#6
0
文件: data.py 项目: hhao020/pulsar-1
 def getTokensForMsg(self, msgIndex):
     if self.ngram == 0:
         return scanTokens(self.messages[msgIndex], self.whitespace)
     else:
         return scanNgrams(self.messages[msgIndex])
示例#7
0
文件: data.py 项目: hgascon/pulsar
 def getTokensForMsg(self, msgIndex):
     if self.ngram == 0:
         return scanTokens(self.messages[msgIndex], self.whitespace)
     else:
         return scanNgrams(self.messages[msgIndex])