def ComputeEdges(self, seqs):
     edge_dict = dict()
     for i in range(len(seqs)):
         for j in range(i + 1, len(seqs)):
             edge_dict[(i,
                        j)] = utils.HammingDistance(seqs[i].seq,
                                                    seqs[j].seq)
     return edge_dict
예제 #2
0
 def _ConstructGraph(self, seqs):
     graph = Graph()
     graph.add_vertices(len(seqs))
     edges = []
     edge_weights = dict()
     for i in range(len(seqs)):
         for j in range(i + 1, len(seqs)):
             edges.append((i, j))
             edge_weights[(i, j)] = utils.HammingDistance(seqs[i], seqs[j])
     graph.add_edges(edges)
     return graph, edge_weights
예제 #3
0
def NormalizedHammingDistance(bin_text, keysize):
	"""Computes the normalized average Hamming distance between
	consecutive pairs of bin_text blocks of KEYSIZE bytes.

	The lowest this result is, the more are consecutive blocks of
	KEYSIZE bytes likely to be similar to each other; this means 
	that they were likely encoded with the same set of characters,
	and therefore the key could be of KEYSIZE length (in bytes).
	"""
	hamming_distance = 0

	# Number of blocks of KEYSIZE bytes in the text.
	num_blocks = int(len(bin_text) / (keysize * 8))
	# Keeps track of the first bit of the pair of chunks we
	# are considering
	start_block_index = 0

	# Takes the first pair of blocks of KEYSIZE bytes, then
	# the second pair, etc... (no overlapping). For each pair,
	# compare the Hamming distance between the two blocks and sum
	# it to our accumulator.

	# For some keysizes there are leftover data at the end that
	# cannot be divided in two chunks of the required size; in that
	# case we ignore them and stop before.
	end_block = len(bin_text) - (keysize * 16)
	while start_block_index <= end_block:
		# Each binary chunk is therefore of size keysize*8.
		chunk1 = bin_text[start_block_index : start_block_index + keysize*8]
		chunk2 = bin_text[start_block_index + keysize*8 : start_block_index + keysize*16]

		hamming_distance += utils.HammingDistance(chunk1, chunk2)
		start_block_index += (keysize * 16)

	# The normalized distance is the Hamming distance divided by the
	# number of blocks and the key size.
	return (hamming_distance / (num_blocks * keysize))
예제 #4
0
#!/usr/bin/env python
'''
Content: solution to a programming assignment for the Bioinformatics Algorithms (Part 1) on Coursera.
Associated textbook: "Bioinformatics Algorithms: An Active-Learning Approach" by Phillip Compeau & Pavel Pevzner.
Assignment: hosted in Stepik.org
Problem Title: Hamming Distance Problem
URL: https://stepik.org/lesson/9/step/3?course=Stepic-Interactive-Text-for-Week-2&unit=8224
Code Challenge:      Hamming Distance Problem: Compute the Hamming distance between two strings.
     Input: Two strings of equal length.
     Output: The Hamming distance between these strings.
'''

import sys
import utils
import numpy as np

if __name__ == '__main__':
    seq1 = sys.stdin.readline()[:-1]
    seq2 = sys.stdin.readline()[:-1]
    print(utils.HammingDistance(seq1,seq2))