Пример #1
0
  def decrypt(self, cipherText):
    upperCipher = cipherText.upper()
    def swapIndices(a, b, key):
      tempKey = list(key)
      tempKey[a], tempKey[b] = tempKey[b], tempKey[a]
      return "".join(tempKey)

    def sample(swaps):
      r = random.random()
      start = 0
      for swap in swaps:
        start += swap[0]
        if r <= start: return swap

    def gibbs(key):
      best_swap = (float('-inf'),"")
      last_n = []
      for i in xrange(200):
        for a in xrange(len(key)):
          swaps = []
          for b in xrange(len(key)):
            temp_key = swapIndices(a, b, key)
            temp_score = self.languagemodel.score(util.encrypt(upperCipher, temp_key))
            temp_swap = (temp_score, temp_key)
            if temp_swap[0] > best_swap[0]: best_swap = temp_swap
            swaps.append(temp_swap)

          # convert to probabilities
          maxSwap = max(swaps, key=operator.itemgetter(0))
          swaps = [(math.e ** (swap[0] - maxSwap[0]),swap[1],swap[0]) for swap in swaps]
          scoreSum = sum([swap[0] for swap in swaps])
          swaps = [(float(swap[0])/scoreSum,swap[1],swap[2]) for swap in swaps]

          # sample randomly
          selected = sample(swaps)
          key = selected[1]

        # keep last n swaps
        converge_n = 5
        last_n.append(best_swap)
        last_n = last_n[-converge_n:]

        # print best_swap[0], util.encrypt(upperCipher, best_swap[1])

        # check for convergence
        avgSwap = sum([swap[0] for swap in last_n]) / float(converge_n)
        if sum([abs(swap[0] - avgSwap) <= 1 for swap in last_n]) == converge_n:
          return best_swap

        decrypted = util.encrypt(upperCipher, best_swap[1])
        sys.stdout.write('.')
        sys.stdout.flush()

      return best_swap

    best_swap = (float('-inf'),"")
    num_best = 0
    for i in xrange(self.numIters):
      key = util.generateKey()
      swap = gibbs(key)
      if swap[0] == best_swap[0]: num_best += 1
      if swap[0] > best_swap[0]:
        best_swap = swap
        num_best = 1

      sys.stdout.write(str(float(i+1)/self.numIters * 100) + "%")
      sys.stdout.flush()

    translated = util.encryptCase(cipherText, best_swap[1])
    print "\n", num_best, "BEST: ", best_swap[0], translated
    return translated, best_swap[1], num_best
Пример #2
0
def main(argv):
    learnfile = "ngrams.txt"
    testfile = "europarl-v7.es-en.en"
    verbose = False
    noise = 0.05
    numIterations = 0
    minLength = 10
    maxLength = 60

    def printHelpMessage():
        print 'decryptor.py [-i <n-gram file> -t <testfile> -n <noise level>]'
        print '-v verbose'
        print '-h help'
    try:
        opts, args = getopt.getopt(argv,"hvi:t:n:")
    except getopt.GetoptError:
        printHelpMessage()
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            printHelpMessage()
            sys.exit()
        elif opt in ("-i"): learnfile = arg
        elif opt in ("-t"): testfile = arg
        elif opt in ("-n"): noise = float(arg)
        elif opt in ("-v"): verbose = True

    print "Learning..."
    sys.stdout.flush()
    languagemodel = LanguageModel.LanguageModel(learnfile)
    original_text_file = open(testfile, "r")

    cipher_solver = solver.Solver(languagemodel)
    cipher_baseline = baseline.Baseline()
    solver_accuracy = []
    baseline_accuracy = []
    max_counts = []

    for original_text in original_text_file:
        if len(original_text) < minLength: continue
        if len(original_text) > maxLength: continue
        numIterations += 1
        encryption_key = util.generateKey()
        original_text_noised = util.add_noise(original_text, noise)
        cipher_text = util.encryptCase(original_text_noised, encryption_key)
        startTime = datetime.datetime.now()

        if verbose:
            print "============================"
            print "Iteration ", numIterations
            print "Length ", len(original_text)
            print "Start Time", startTime
            print "Original Text", original_text
            print "Original Text Noised", original_text_noised
            print "Key", encryption_key
            print "Cipher Text Noised", cipher_text
            
        
        baseline_text, baseline_decryption_key = cipher_baseline.decrypt(cipher_text)
        guess_text, guess_decryption_key, num_guesses = cipher_solver.decrypt(cipher_text)

        baseline_score = score_accuracy(encryption_key, baseline_decryption_key, cipher_text, original_text)
        baseline_accuracy.append(baseline_score)
        solver_score = score_accuracy(encryption_key, guess_decryption_key, cipher_text, original_text)
        solver_accuracy.append(solver_score)
        max_counts.append(num_guesses)

        if verbose:
            print "End Time", datetime.datetime.now()
            print "Duration", datetime.datetime.now() - startTime
            print "Length, Accuracy, Duration,", len(original_text), ',', solver_score, ',', datetime.datetime.now() - startTime
            print "Baseline Accuracy: ", baseline_score
            print "Average Accuracy of Baseline: ", sum(baseline_accuracy)/len(baseline_accuracy)
            print "Solver Accuracy: ", solver_score
            print "Average Accuracy of Solver: ", sum(solver_accuracy)/len(solver_accuracy)
            print "Reached same thing many times", max_counts

    print "Average Accuracy of Baseline: ", sum(baseline_accuracy)/len(baseline_accuracy)
    print "Average Accuracy of Solver: ", sum(solver_accuracy)/len(solver_accuracy)
    print "Over %d cipher texts" % len(solver_accuracy)