Exemplo n.º 1
0
class NonT_W(NonT):
    sym, prod, prob = 'W', '', 0.0
    english_dawg = IntDAWG().load(GRAMMAR_PATH + 'words.dawg')
    chinese_dawg = IntDAWG().load(GRAMMAR_PATH + 'pinyin.dawg')
    total_f = english_dawg[u"__total__"] + chinese_dawg[u'__total__']
    l33t_replaces = DAWG.compile_replaces({
        '3': 'e',
        '4': 'a',
        '@': 'a',
        '$': 's',
        '0': 'o',
        '1': 'i',
        'z': 's'
    })

    def __init__(self, word):
        # 传入参数为待分析的密码
        # super(NonT_W, self).__init__()
        w = word.lower()
        dawg = []
        for d in [self.english_dawg, self.chinese_dawg]:
            # 使用replaces的替换,找到和w相似的内容,返回一个list,【0】为与w最相似的部分
            k = d.similar_keys(w, self.l33t_replaces)
            if k:
                dawg.append((d, k[0]))
        # dawg中存放了之前word,fname,lname中与密码最相似的部分
        if dawg:
            # d[1]中存放的是word,fname,lname;里面的字符串可能会有重复的地发
            v = list(set([d[1] for d in dawg]))
            # 假如这个v中存在两个以上的字符串,或者说第一个元素不全是字符串(???会这样的咩)
            if len(v) > 1 or not v[0].isalpha():
                return  #
            # 这里说明,这个字符串至少出现过一次,这里在不同的字典中统计这个字符串的出现次数
            v = v[0]
            f = sum([d[0][v] for d in dawg])

            self.prod = v
            self.sym = 'W%s' % get_nont_class('W', v)

            self.L = NonT_L(v, word)  # 引入NonT_L 分析password的大小写情况
            # print(self.L)
            self.prob = self.L.prob * float(f) / self.total_f  # 添加特殊字符对概率的影响

    def parse_tree(self):
        pt = ParseTree()
        pt.add_rule((self.sym, self.prod))
        pt.extend_rules(self.L.parse_tree())
        return pt

    def rule_set(self):
        rs = RuleSet()
        rs.add_rule(self.sym, self.prod)
        # rs.update_set(self.L.rule_set())
        return rs

    def __str__(self):
        return '%s: %s<%s> (%g)' % (self.sym, self.prod, self.L, self.prob)
Exemplo n.º 2
0
class NonT_W(NonT):
    sym, prod, prob = 'W', '', 0.0
    thisdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    word_dawg = IntDAWG().load('{}dictionary1.1.dawg'.format(thisdir))
    fname_dawg = IntDAWG().load('{}eng_dict.dawg'.format(thisdir))
    lname_dawg = IntDAWG().load('{}eng_dict.dawg'.format(thisdir))
    total_f = word_dawg[u'__total__'] + \
        fname_dawg[u'__total__'] + \
        lname_dawg[u'__total__']

    l33t_replaces = DAWG.compile_replaces({
        '3': 'e',
        '4': 'a',
        '@': 'a',
        '$': 's',
        '0': 'o',
        '1': 'i',
        'z': 's'
    })

    def __init__(self, word):
        # super(NonT_W, self).__init__()
        w = unicode(word.lower())
        dawg = []
        for d in [
                self.word_dawg,  # 
                self.fname_dawg,  # 
                self.lname_dawg
        ]:
            k = d.similar_keys(w, self.l33t_replaces)
            if k:
                dawg.append((d, k[0]))
        if dawg:
            v = list(set([d[1] for d in dawg]))
            if len(v) > 1 or not v[0].isalpha():
                return  #
            v = v[0]
            f = sum([d[0][v] for d in dawg])
            self.prod = v
            self.sym = 'W%s' % get_nont_class('W', v)
            # self.L = NonT_L(v, word)
            self.prob = self.L.prob * float(f) / self.total_f  #

    def parse_tree(self):
        pt = ParseTree()
        pt.add_rule((self.sym, self.prod))
        pt.extend_rules(self.L.parse_tree())
        return pt

    def rule_set(self):
        rs = RuleSet()
        rs.add_rule(self.sym, self.prod)
        rs.update_set(self.L.rule_set())
        return rs

    def __str__(self):
        return '%s: %s<%s> (%g)' % (self.sym, self.prod, self.L, self.prob)
Exemplo n.º 3
0
class NonT_W(NonT):
    sym, prod, prob = 'W', '', 0.0
    word_dawg = IntDAWG().load('data/English_30000.dawg')
    fname_dawg = IntDAWG().load('data/facebook-firstnames-withcount.dawg')
    lname_dawg = IntDAWG().load('data/facebook-lastnames-withcount.dawg')
    total_f = word_dawg[u'__total__'] + \
        fname_dawg[u'__total__'] + \
        lname_dawg[u'__total__']

    l33t_replaces = DAWG.compile_replaces({
        '3': 'e',
        '4': 'a',
        '@': 'a',
        '$': 's',
        '0': 'o',
        '1': 'i',
        'z': 's'
    })

    def __init__(self, word):
        # super(NonT_W, self).__init__()
        w = unicode(word.lower())
        dawg = []
        for d in [self.word_dawg, self.fname_dawg, self.lname_dawg]:
            k = d.similar_keys(w, self.l33t_replaces)
            if k:
                dawg.append((d, k[0]))
        if dawg:
            v = list(set([d[1] for d in dawg]))
            if len(v) > 1 or not v[0].isalpha():
                return
            v = v[0]
            f = sum([d[0][v] for d in dawg])
            self.prod = v
            self.sym = 'W%s' % get_nont_class('W', v)
            self.L = NonT_L(v, word)
            self.prob = self.L.prob * float(f) / self.total_f

    def parse_tree(self):
        pt = ParseTree()
        pt.add_rule((self.sym, self.prod))
        pt.extend_rule(self.L.parse_tree())
        return pt

    def rule_set(self):
        rs = RuleSet()
        rs.add_rule(self.sym, self.prod)
        rs.update_set(self.L.rule_set())
        return rs

    def __str__(self):
        return '%s: %s<%s> (%g)' % (self.sym, self.prod, self.L, self.prob)
Exemplo n.º 4
0
    def load(self, filename):
        self.G = json.load(open_(filename), object_pairs_hook=OrderedDict)
        # 这里读取字典的键值
        for k, v in self.G.items():
            if self.cal_cdf:
                # print_err("Calculating CDF!")
                # lf表示的是当前规则中的数量
                # 每一个规则都要把上一次的规则的数量加载其中(有点像是处理(5)-(0)就能求出从1~5的规则出现的次数
                lf = 0
                for l, f in v.items():
                    # v[l] += lf
                    lf += f
                v['__total__'] = lf
            else:
                v['__total__'] = sum(v.values())

        # 然后这里统计出现的所有的W字符串,在一会得字符串生成过程中使用
        self.Wlist = []
        for k, D in self.G.items():
            if k.startswith('W'):
                self.Wlist.extend([x for x in D])

        # 设定data变量,方便管理日期规则
        self.date = Date()
        # 建立dawg,方便生成管理word规则
        self.Wlist = IntDAWG(self.Wlist)
Exemplo n.º 5
0
    def finalize(self):
        self.fix_freq()
        self.NonT_set = [x for x in list(self.G.keys())
                         if x.find('_') < 0]  # + list('Yymd')
        self.G = self.R.G
        Wlist = [
            x for k, v in list(self.G.items()) for x in v if k.startswith('W')
        ]
        self.Wdawg = IntDAWG(Wlist)
        for k, v in self.G.items():
            for rhs, f in v.items():
                if f <= 0:
                    print("Zero frequency LHS added, setting frequency to 1")
                    v[rhs] = 1
                    if '__total__' in v:
                        v['__total__'] += 1
            if '__total__' not in v:
                print(
                    '__total__ should be there in the keys!!. I am adding one.'
                )
                v['__total__'] = sum(v.values())

        if 'T' in self.G:
            self.date = Date(T_rules=[
                x for x in list(self.G['T'].keys()) if x != '__total__'
            ])
        self.freeze = True
        self.R.G.default_factory = None
Exemplo n.º 6
0
 def finalize(self):
     self.fix_freq()
     self.NonT_set = filter(lambda x: x.find('_') < 0,
                            self.G.keys())  #+ list('Yymd')
     self.G = self.R.G
     Wlist = [x for k, v in self.G.items() for x in v if k.startswith('W')]
     self.Wdawg = IntDAWG(Wlist)
     if 'T' in self.G:
         self.date = Date(
             T_rules=[x for x in self.G['T'].keys() if x != '__total__'])
     self.freeze = True
Exemplo n.º 7
0
 def load(self, filename):
     self.G = json.load(open_(filename), object_pairs_hook=OrderedDict)
     for k, v in self.G.items():
         if self.cal_cdf:
             print_err("Calculating CDF!")
             lf = 0
             for l, f in v.items():
                 v[l] += lf
                 lf += f
             v['__total__'] = lf
         else:
             v['__total__'] = sum(v.values())
     Wlist = [x for k, v in self.G.items() for x in v if k.startswith('W')]
     self.date = Date()
     self.Wdawg = IntDAWG(Wlist)
Exemplo n.º 8
0
def build_int_dawg(filename):
    with open_(filename) as inpf:
        freq_style = get_f_w_freq
        f_line = inpf.readline()
        w = []
        if f_line.startswith('#'):
            words = f_line.strip().split()
            freq_style = get_file_data_format(words[1:])
        else:
            w = [freq_style(f_line)]
        w.extend([freq_style(line) for line in inpf])
        w.append(('__total__', sum_freq))
        int_dawg = IntDAWG(w)
        of = filename.split('.')[0] + '.dawg'
        with open(of, 'wb') as o:
            int_dawg.write(o)
        test_dawg(of, w[:10] + w[-10:])
Exemplo n.º 9
0
 def finalize(self):
     self.fix_freq()
     self.NonT_set = filter(lambda x: x.find('_') < 0,  
                            self.G.keys()) #+ list('Yymd')
     self.G = self.R.G
     Wlist = [x 
              for k,v in self.G.items()
              for x in v
              if k.startswith('W')]
     self.Wdawg = IntDAWG(Wlist)
     for k,v in self.G.items():
         if '__total__' not in v:
             print '__total__ should be there in the keys!!. I am adding one.'
             v['__total__'] = sum(v.values())
         
         
     if 'T' in self.G:
         self.date = Date(T_rules=[x 
                                   for x in self.G['T'].keys()
                                   if x != '__total__'])
     self.freeze = True
Exemplo n.º 10
0
def test_dawg(filename, wlist):
    d = IntDAWG()
    d = d.load(filename)
    for w in wlist:
        assert w[1] == d[str(w[0])]