Пример #1
0
def main():
    # Set up the commandline arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", help="input file")
    parser.add_argument("-o", "--outfile", help="output file")
    parser.add_argument("-t", "--training", help="training file")
    parser.add_argument('-n',
                        "--norm",
                        type=int,
                        default=0,
                        help="Normalise frequencies to...  0=don't normalise")
    parser.add_argument('-c',
                        '--caseinsensitive',
                        default=True,
                        action='store_true')

    # Parse the commandline arguments
    args = parser.parse_args()

    intext = ''
    outtext = ''

    training = frequency.frequency(caseinsensitive=args.caseinsensitive)
    ciphertext = frequency.frequency(caseinsensitive=args.caseinsensitive)

    if args.training is not None:
        with open(args.training, 'r') as f:
            for line in f:
                intext = intext + line
        training.add(intext)

    intext = ''
    if args.infile is not None:
        with open(args.infile, 'r') as f:
            for line in f:
                intext = intext + line
        ciphertext.add(intext)
    else:
        sys.exit(2)

    train_freq = training.chars()
    cipher_freq = ciphertext.chars()

    for c in intext:
        if c in KEY:
            outtext += KEY[c].lower()
        else:
            outtext += c

    outtext += "\nKey: " + str(KEY)

    if args.outfile is not None:
        with open(args.outfile, 'w') as f:
            f.write(outtext)
    else:
        print(outtext)
Пример #2
0
def padding(artificial_payload, raw_payload):
    padding = ''
    # Get frequency of raw_payload and artificial profile payload
    artificial_frequency = frequency.frequency(artificial_payload)
    raw_payload_frequency = frequency.frequency(raw_payload)

    highest = None
    highestDiff = 0
    for key, diff in artificial_frequency.iteritems():
        if raw_payload_frequency.has_key(key):
            diff -= raw_payload_frequency[key]
        if diff >= highestDiff:
            highestDiff = diff
            highest = key
    raw_payload.append(highest)
def smpl_dstr(filename):
    words_histogram = frequency.histogram(filename)
    words_source = []
    for each_word in words_histogram:
        for i in range(0, frequency.frequency(each_word, words_histogram)):
            words_source.append(each_word)

    index = random.randint(0, len(words_source) - 1)
    return(words_source[index])
Пример #4
0
def frequency_function(args):
    dataset_name = args.dataset_name
    ngram_min = args.ngram_min
    ngram_max = args.ngram_max
    analysis_type = args.analysis_type
    year_start = args.year_start
    year_end = args.year_end
    n = args.n
    preprocessing = args.preprocessing
    if analysis_type == "tf" or analysis_type == "df":
        frequency(dataset_name, year_start, year_end, n, ngram_min, ngram_max,
                  analysis_type, n, preprocessing)
    elif analysis_type == "u":
        use_index(dataset_name, year_start, year_end, ngram_min, ngram_max,
                  analysis_type, n, preprocessing)
    elif analysis_type == "a":
        adoption_index(dataset_name, year_start, year_end, ngram_min,
                       ngram_max, analysis_type, n, preprocessing)
Пример #5
0
def parseFrequencies(msg,wordDict):
	for word in msg.lower().split():
		if word in wordDict:
			wordDict[word] = wordDict[word] + 1
		else:
			wordDict[word] = 1
	for key in wordDict.keys():
		wordDict[key] = wordDict[key] * frequency.frequency(key)
	return wordDict	
Пример #6
0
    def test_frequency_with_valid(self):
        *_, gl30_10 = random_test_data()

        expected = OrderedDict(OrderedDict([(0, 877), (10, 888), (20, 919),
                                            (30, 876), (40, 938), (50, 957),
                                            (60, 916), (70, 915), (80, 930),
                                            (90, 903), (100, 881)]))
        actual = frequency(gl30_10)

        self.assertEqual(expected, actual)
Пример #7
0
def getSubstitutionTable(artificial_payload, attack_payload):
    # You will need to generate a substitution table which can be used to encrypt the attack body by replacing the most frequent byte in attack body to the most frequency byte in artificial profile one by one

    # Note the frequency for each byte is provided below in dictionay format. Please check frequency.py for more details
    artificial_frequency = frequency.frequency(artificial_payload)
    attack_frequency = frequency.frequency(attack_payload)

    sorted_artificial_frequency = frequency.sorting(artificial_frequency)
    sorted_attack_frequency = frequency.sorting(attack_frequency)

    # I am assuming a that len(sorted_artificial_frequency) >= len(sorted_attack_frequency)
    # AKA, the m <= n case described in the paper
    substitution_table = dict()
    for i, val in enumerate(sorted_attack_frequency): #Do an inital one to one mapping
        substitution_table[val[0]] = [sorted_artificial_frequency[i]]

    for i in range(len(sorted_attack_frequency), len(sorted_artificial_frequency)): #Map the rest of the valid characters
        highestRatio = -1
        highest = None
        for key, val in substitution_table.iteritems():
            ratio = 0

            for sub in val:
                ratio += sub[1]

            ratio = attack_frequency[key] / ratio
            if ratio > highestRatio:
                highestRatio = ratio
                highest = key
        substitution_table[highest].append(sorted_artificial_frequency[i])

    sys.stdout.write('Substitution Table:\n')
    sys.stdout.write('{0}\n'.format(substitution_table))

    # You may implement substitution table in your way. Just make sure it can be used in substitute(attack_payload, subsitution_table)
    return substitution_table
Пример #8
0
def optimize_gollier_group_size(population, beta, FNR, FPR, tests_per_person_per_week_ub,
                                group_size_min = 10, group_size_max = 1000, grid_size = 100, nreps = 100,
                                group_test_name='Gollier',
                                verbose=True):
    best_group_size = -1
    best_quarantines_per_person = 1.01
    best_days_between_tests = -1
    best_tests_per_person_per_week = -1
    group_size_grid = np.linspace(group_size_min, group_size_max, grid_size)

    for group_size in group_size_grid:
        if group_test_name == 'Gollier':
            group_test = HouseholdGroupTest(group_size, 1, FNR, FPR)
        elif group_test_name == 'ThreeStageHierarchical':
            group_test = ThreeStageHierarchicalTest(group_size, np.sqrt(group_size), 1, FNR, FNR)
        else:
            print('Unknown group test requested {}.  Using Gollier'.format(group_test_name))
            group_test = HouseholdGroupTest(group_size, 1, FNR, FPR)

        # We want (population size % group size) / (population size) to be small

        QFNR, QFPR, tests_per_person, quarantines_per_person = StaticSimulation(population,group_test).sim(nreps)

        # Number of days between screenings
        days_between_tests = frequency(QFNR, population.non_quarantine_alpha, beta)

        weeks_between_tests = days_between_tests / 7

        tests_per_person_per_week = tests_per_person / weeks_between_tests

        if verbose:
            print('group size={:.2f} QFNR={} QFPR={:.2f} tests/person/wk={:.3f} quarantines/person={:.2f} days between tests={:2f}'.format(
                group_size,QFNR,QFPR,tests_per_person_per_week,quarantines_per_person,days_between_tests))
        
        if tests_per_person_per_week <= tests_per_person_per_week_ub:
            if quarantines_per_person < best_quarantines_per_person:
                best_quarantines_per_person = quarantines_per_person
                best_group_size = group_size
                best_days_between_tests = days_between_tests
                best_tests_per_person_per_week = tests_per_person_per_week

    return best_group_size, best_quarantines_per_person, best_tests_per_person_per_week, best_days_between_tests
Пример #9
0
 def run_cmd(self):
     possible_commands = ['read', 'frequency', 'set', 'apply_key', 'reset', 'quit', 'print']
     help_message = "This is where a help message will go"
     try:
         cmd = possible_commands.index(self.args[0])
         if cmd == 0:
             self.instance.set_original_txt(reader(self.args[1:]))
         elif cmd == 1:
             return frequency(self.args[1:], self.instance.current_text)
         elif cmd == 2:
             # instance.set_current_text(set_enc(args[1:]))
             return set_enc(self.args[1:])
         elif cmd == 3:
             # instance.set_current_text(apply_key(args[1:]))
             return apply_key(self.args[1:])
         elif cmd == 4:
             # instance.set_current_text(instance.original_text)
             return "running reset"
         elif cmd == 5:
             sys.exit(0)
         elif cmd == 6:
             return "running print"
     except ValueError:
         raise InputError("invalid input: ", self.args)
Пример #10
0
def main():
    # Set up the commandline arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-i",
                        "--infile",
                        required=True,
                        help="ciphertext input file")
    parser.add_argument("-o",
                        "--outfile",
                        help="output file")
    parser.add_argument("-d",
                        "--dictionary",
                        default='dictionary.txt',
                        help="dictionary file")
    parser.add_argument("-c",
                        "--crib",
                        default='cribwords.txt',
                        help="cribwords file")
    parser.add_argument("-s",
                        "--stripciphertext",
                        action="store_true",
                        help="should we strip the ciphertext of spaces and"
                             "punctuation before we start to attempt decode")

    # Parse the commandline arguments
    args = parser.parse_args()

    outtext = ''
    ciphertext = ''

    # Read in the ciphertext`
    if args.infile is not None:
        with open(args.infile, 'r') as f:
            for line in f:
                ciphertext += line

    # Strip if requested
    if args.stripciphertext:
        ciphertext = stripciphertext(ciphertext)

    # Initialise the key
    k = key.Key()

    # Do the frequency analysis
    freq = frequency.frequency()
    freq.add(ciphertext)
    freqlist = freq.freq_list()
    print(str(freqlist))

    # Assume e is the most frequent and t is the second most frequent letters
    c, n = freqlist[0]
    k.set(c, 'e', 'Frequency')
    e = c
    c, n = freqlist[1]
    k.set(c, 't', "Frequency")
    t = c
    # Now we know 't' and 'e', try to find the h from 'the'
    c, n = freq.h_spotter(t, e, ciphertext)
    k.set(c, 'h', 'h spotter')

    # get the partially decrypted ciphertext
    pd = k.decipher(ciphertext)

    # Use the cribs to try and get a start on the cracking
    crib = Crib()
    crib.load(args.crib)
    frequent_hits, hits = crib.search2(ciphertext)

    for cribword in frequent_hits:
        for match in frequent_hits[cribword]:
            score = frequent_hits[cribword][match] / len(hits[cribword])
            if score > 0.1:
                k.set_string(match, cribword, "Cribword: " + cribword)

    outtext += str(hits) + '\n'
    outtext += str(frequent_hits) + '\n'
    for cribword in frequent_hits:
        outtext += cribword + '\n'
        for match in frequent_hits[cribword]:
            score = frequent_hits[cribword][match]/len(hits[cribword])
            outtext += '\t' + match + ":\t " + str(score) + '\n'
    outtext += '\n\n' + k.decipher(ciphertext) + '\n\n'
    outtext += '\n' + str(k.key) + '\n'
    outtext += '\n' + str(k.history) + '\n'

    if args.outfile is not None:
        with open(args.outfile, 'w') as f:
            f.write(outtext)
    else:
        print(outtext)
Пример #11
0
def main2():
    # Set up the commandline arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-i",
                        "--infile",
                        required=True,
                        help="ciphertext input file")
    parser.add_argument("-o",
                        "--outfile",
                        help="output file")
    parser.add_argument("-d",
                        "--dictionary",
                        default='dictionary.txt',
                        help="dictionary file")
    parser.add_argument("-c",
                        "--crib",
                        default='cribwords.txt',
                        help="cribwords file")
    parser.add_argument("-r",
                        "--reverse",
                        action="store_true",
                        help="Reverse the ciphertext before decryption")
    parser.add_argument("-s",
                        "--stripciphertext",
                        action="store_true",
                        help="should we strip the ciphertext of spaces and"
                             "punctuation before we start to attempt decode")
    parser.add_argument("-t",
                        "--cribthreshold",
                        default=0.1,
                        help="Thresholds for us to use the cribwords",
                        type=float)

    # Parse the commandline arguments
    args = parser.parse_args()

    outtext = ''
    ciphertext = ''

    # Read in the ciphertext`
    if args.infile is not None:
        with open(args.infile, 'r') as f:
            for line in f:
                ciphertext += line

    # Reverse the ciphertext if wanted
    if args.reverse:
        ciphertext = ciphertext[::-1]

    # Strip if requested
    if args.stripciphertext:
        ciphertext = stripciphertext(ciphertext)

    # Initialise the key
    k = key.Key()

    # Do the frequency analysis
    freq = frequency.frequency()
    freq.add(ciphertext)
    freqlist = freq.freq_list()
    print(str(freqlist))

    # Assume e is the most frequent and t is the second most frequent letters
    #c, n = freqlist[0]
    #k.set(c, 'e', 'Frequency')
    #e = c
    #c, n = freqlist[1]
    #k.set(c, 't', "Frequency")
    #t = c
    # Now we know 't' and 'e', try to find the h from 'the'
    #c, n = freq.h_spotter(t, e, ciphertext)
    #k.set(c, 'h', 'h spotter')

    # Use the cribs to try and get a start on the cracking
    crib = Crib()
    crib.load(args.crib)
    frequent_hits, hits = crib.search2(ciphertext)

    for cribword in frequent_hits:
        for match in frequent_hits[cribword]:
            score = frequent_hits[cribword][match] / len(hits[cribword])
            if score > args.cribthreshold:
                k.set_string(match, cribword, "Cribword: " + cribword)

    # get the partially decrypted ciphertext
    print(k.decipher(ciphertext))

    # put the loop in a try block so that if we hit an Exception
    # we will save the state of the key and ciphertext...
    try:
        # Start an interactive loops
        while True:
            cmd = input("\n:> ")
            # Check to see whether we entered an integer
            try:
                # If so then lets use it to
                # go back in time...
                i = int(cmd)
                k.history = i
            except:
                if len(cmd) == 1:
                    if cmd.upper() in ENCODE_CHARS:
                        p = input("Plaintext :> ")
                        k.set(cmd, p, 'User')
                    elif cmd == '?':
                        help()
                elif cmd == 'key':
                    print('\n' + str(k.key) + '\n')
                elif cmd == 'freq':
                    print(str(freqlist))
                elif cmd == 'quit':
                    break
                elif cmd == 'history':
                    print(k.history)
                elif cmd == 'crib':
                    frequent_hits, hits = crib.search2(k.decipher(ciphertext))
                    print('\n' + str(hits))
                    print('\n' + str(frequent_hits))
                elif cmd == 'help':
                    help()
            print('\n' + k.decipher(ciphertext))
    finally:

        outtext = ''
        for cribword in frequent_hits:
            outtext += cribword + '\n'
            for match in frequent_hits[cribword]:
                score = frequent_hits[cribword][match]/len(hits[cribword])
                outtext += '\t' + match + ":\t " + str(score) + '\n'
        outtext += '\n\n' + k.decipher(ciphertext) + '\n\n'
        outtext += '\n' + str(k.key) + '\n'
        outtext += '\n' + str(k.history) + '\n'

        if args.outfile is not None:
            with open(args.outfile, 'w') as f:
                f.write(outtext)
        else:
            print(outtext)
Пример #12
0
driver_countries = {
    re.sub(r'_', ' ', values[0]).title(): DIRS.driver / values[1]
    for values in names
}

total = len(driver_countries)
records = []
for idx, items in enumerate(driver_countries.items()):
    key, value = items

    with rio.open(value, 'r') as src:
        data = src.read(1)

        try:
            pdd = frequency(data)

        except:
            print('Error on: ', key)
            continue

        finally:
            del data
            gc.collect()

        transform = src.profile['transform']

        x = haversine((transform.xoff, transform.yoff), (transform.xoff + transform.a, transform.yoff))
        y = haversine((transform.xoff, transform.yoff), (transform.xoff, transform.yoff + transform.e))
        area = round(x * y)
Пример #13
0
    content = ""

    urls = getAllReportUrls(
        "http://www.gov.cn/guoqing/2006-02/16/content_2616810.htm")
    urls["2017"] = "http://www.gov.cn/premier/2017-03/16/content_5177940.htm"
    urls["2018"] = "http://www.gov.cn/zhuanti/2018lh/2018zfgzbg/zfgzbg.htm"
    urls["2019"] = "http://www.gov.cn/zhuanti/2019qglh/2019lhzfgzbg/index.htm"
    for itemUrl in urls:
        content += getReportText(urls[itemUrl])

    print("计算频次最高的20个关键字")
    for item in getTopnWords(content, 20):
        print(str(item[0]) + ":" + str(item[1]))
    print("=======================================")
    print("提取权重大的20个关键字")
    print(frequency(content, 20))
    print("=======================================")
    print("生成词云")
    wCloudImage(content, "1954-2019-wclound", "chinamap.jpg")
    print("=======================================")
    print("生成条状图")
    histogram(getTopnWords(content, 20), "1954-2019年政府工作报告词频统计",
              "1954-2019-histogram")
    print("=======================================")
    print("执行完毕")

    # 如果需要写入文件,使用以下代码

    # f = open("report.txt", "a", encoding="utf-8")
    # f.write(content)
Пример #14
0
        "reportData/2016.txt",
        "reportData/2017.txt",
        "reportData/2018.txt",
        "reportData/2019.txt",
    ]

    # 处理文件编码
    for eachFile in filename:
        for encodeStr in ["utf-8", "gb18030", "gb2312", "gbk", "Error"]:
            try:
                fr += open(eachFile, "r", encoding=encodeStr).read()
                break
            except:
                if encodeStr == "Error":
                    raise Exception("file read error")
                continue

    print("计算频次最高的30个关键字")
    for item in getTopnWords(fr, 30):
        print(item)
    print("=======================================")
    print("提取权重大的30个关键字")
    print(frequency(fr, 30))
    print("=======================================")
    print("生成词云")
    wCloudImage(fr, "wclound2", "map.jpg")
    print("=======================================")
    print("生成条状图")
    histogram(getTopnWords(fr, 30), "2013-2019年海拉尔区政府工作报告词频统计", "histogram2")
    print("=======================================")
    print("执行完毕")
Пример #15
0
    def test_frequency_with_float(self):
        a = np.zeros((100, 100), dtype=np.float16)

        with self.assertRaises(ValueError) as err:
            frequency(a)
Пример #16
0
def main():
    # Set up the commandline arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--infile", help="ciphertext input file")
    parser.add_argument("-o", "--outfile", help="output file")
    parser.add_argument("-t", "--training", help="training file")
    parser.add_argument("-d",
                        "--dictionary",
                        default='dictionary.txt',
                        help="dictionary file")
    parser.add_argument('-n',
                        "--norm",
                        type=int,
                        default=0,
                        help="Normalise frequencies to...  0=don't normalise")
    parser.add_argument('-c',
                        '--caseinsensitive',
                        default=True,
                        action='store_true')

    # Parse the commandline arguments
    args = parser.parse_args()

    outtext = ''

    freqs = frequency.frequency(caseinsensitive=args.caseinsensitive)
    ciphertext = ''

    # Read in the ciphertext`
    if args.infile is not None:
        with open(args.infile, 'r') as f:
            for line in f:
                ciphertext += line
        freqs.add(ciphertext)
        cipher_freq = freqs.chars()
    else:
        sys.exit(2)

    # Create a substitution cipher solver
    vigenere = solve.Substitution()
    # Make an initial solution
    #outtext = vigenere.solve(ciphertext)
    vigenere.ciphertext = ciphertext
    outtext = vigenere.solve2()
    # Now we have a better idea of the likely words let's have a second go
    #outtext = vigenere.solve(outtext)
    outtext = vigenere.solve2()

    outtext += "\nCipher Freqs: " + str(cipher_freq)
    outtext += "\nKey: "
    outtext += str(vigenere.key)
    outtext += "\nSolve:\n"
    for c in vigenere.cipher_chars:
        outtext += c + ": " + str(vigenere.cipher_chars[c]) + "\n"
    outtext += "\n"
    vigenere.solve3()

    if args.outfile is not None:
        with open(args.outfile, 'w') as f:
            f.write(outtext)
    else:
        print(outtext)
Пример #17
0
    def test_numbers(self):

        # Failure message:
        # expected frequency([1, 2, 3, 4, 4, 4], 4) to equal 3
        self.assertEqual(frequency([1, 2, 3, 4, 4, 4], 4), 3)
Пример #18
0
    def test_booleans(self):

        # Failure message:
        # expected frequency([True, False, True, True], False) to equal 1
        self.assertEqual(frequency([True, False, True, True], False), 1)