示例#1
0
def create_pcfg(vault_leak, password_leak=None):
    # learn the grammar
    vault_d = json.load(open(vault_leak))
    print "# of vaults: ", len(vault_d)
    print "max size of vault:", max(len(x) for x in vault_d.values())
    print "max size of vault:", min(len(x) for x in vault_d.values())

    if not password_leak:
        D = defaultdict(int)
        for k, v in vault_d.items():
            if len(v) > 40: continue
            for x in v:
                D[x] += 1
        password_leak = PW_TMP_FILE
        with open(password_leak, 'w') as f:
            f.write('\n'.join('%d\t%s' % (f, p) for p, f in sorted(
                D.items(), key=lambda x: x[1], reverse=True)))
        print "Password file created"
    parallel_buildpcfg(password_leak)

    # learn the vault distribution
    tg = TrainedGrammar()
    G = cal_size_subG(tg, vault_leak)
    f = os.tmpfile()
    json.dump(G, f)
    f.seek(0)
    cal_stat(fds=[f])
    f.close()
示例#2
0
    # s = [sum(R[i])/float(len(R[i])) for i in range(len(NT))]
    print G.keys()
    for k, v in G.items():
        if len(v) < 30:
            for i in range(1, len(v) + 30):
                v[i] = 5 * v.get(i, 1)
    json.dump(G,
              open(GRAMMAR_DIR + 'vault_dist.cfg', 'wb'),
              indent=2,
              separators=(',', ':'),
              sort_keys=True)


if __name__ == "__main__":
    if sys.argv[1] == '-process':
        tg = TrainedGrammar()
        print json.dumps(cal_size_subG(tg, sys.argv[2]), indent=2)
    elif sys.argv[1] == '-stat':
        # give the vaultcleaned files,
        cal_stat(fnames=sys.argv[2:])
    elif sys.argv[1] == '-default':
        tg = TrainedGrammar()
        files = [
            "data_vault/%s_vaultcleaned.json" % x for x in ['joe', 'weir'][:1]
        ]
        G = {}
        for f in files:
            G.update(cal_size_subG(tg, f))
        f = os.tmpfile()
        json.dump(G, f)
        f.seek(0)
示例#3
0
 def __init__(self, grammar=None, cal_cdf=False):
     self.G = grammar
     if not self.G:
         self.G = TrainedGrammar(cal_cdf=cal_cdf)
示例#4
0
class DTE_large(DTE):
    """
    encodes a rule
    """
    def __init__(self, grammar=None, cal_cdf=False):
        self.G = grammar
        if not self.G:
            self.G = TrainedGrammar(cal_cdf=cal_cdf)
            # self.G.load(hny_config.GRAMMAR_DIR+'/grammar.cfg')

    def encode(self, lhs, rhs):
        return self.G.encode_rule(lhs,rhs)

    def decode(self, lhs, pt):
        return self.G.decode_rule(lhs, pt)
        
    def get_freq(self, lhs, rhs):
        return self.G.get_freq(lhs, rhs)
        try:
            s, e = self.G.get_freq_range(lhs, rhs)
            return e-s
        except ValueError: 
            print "ValueError in get_freq -- %s is not in %s:" % \
                (rhs,self.G[lhs][0])
            return -1
    
    def encode_grammar(self, G):
        # Encode sub-grammar
        vd = VaultDistribution()
        stack = ['G']
        code_g = []
        done = []
        while stack:
            head = stack.pop()
            assert head not in done
            done.append(head)
            rule_dict = G[head]
            t_set = []
            for rhs, f in rule_dict.items():
                if rhs != '__total__':
                    r = filter(lambda x: x not in done+stack, 
                               self.G.get_actual_NonTlist(head, rhs))
                    if r:
                        for x in r:
                            if (x not in t_set):
                                t_set.append(x)
            t_set.reverse()
            stack.extend(t_set)
            n = len(rule_dict.keys())-1
            code_g.append(vd.encode_vault_size(head, n))
            if n<0: 
                print "Sorry I cannot encode your password! Please choose"
                print "something different, password12"
                exit(0)
            assert n == vd.decode_vault_size(head, code_g[-1])
            code_g.extend([self.encode(head, r) 
                           for r in rule_dict.keys()
                           if r != '__total__'])
        extra = hny_config.HONEY_VAULT_GRAMMAR_SIZE - len(code_g);
        code_g.extend([convert2group(0,1) for x in range(extra)])
        return code_g

    def decode_grammar(self, P):
        g=SubGrammar(self.G)
        vd = VaultDistribution()
        iterp = iter(P)
        stack = ['G']
        done = []
        while stack:
            head = stack.pop()
            assert head not in done
            done.append(head)
            p = iterp.next()
            n = vd.decode_vault_size(head, p)
            #print "RuleSizeDecoding:", head, n
            t_set = []
            for x in range(n):
                rhs = self.decode(head, iterp.next())
                #print "Decoding:", stack, head, '==>', rhs
                if rhs != '__totoal__':
                    r = filter(lambda x: x not in done+stack, 
                               self.G.get_actual_NonTlist(head, rhs))
                    if r:
                        for x in r:
                            if (x not in t_set):
                                t_set.append(x)
                g.add_rule(head, rhs)
            t_set.reverse()
            stack.extend(t_set)
        g.finalize() # fixes the freq and some other book keepings
        return g