def calcMutualInformation(self, TreeTable): sumMutualInformation = 0 pairCount = 0 for i, snp in enumerate(self.SNPList): if i < len(self.SNPList) - 1: for j in range(i + 1, len(self.SNPList)): hx = snp.entropy hy = self.SNPList[j].entropy hxy = EntropyCalculator.main(TreeTable, snp.aGenomes,snp.tGenomes,snp.cGenomes,snp.gGenomes, self.SNPList[j].aGenomes, self.SNPList[j].tGenomes, self.SNPList[j].cGenomes, self.SNPList[j].gGenomes) sumMutualInformation += hx + hy - hxy pairCount += 1 self.avgMutualInformation = sumMutualInformation / pairCount
def __init__(self, args): self.index=0 # Parametros self.file = args[FILE_PARAM] self.entropy_file = args[ENTROPY_FILE_PARAM] self.arp_entropy_file = args[ARP_ENTROPY_FILE_PARAM] self.timeout = args[TIMEOUT_PARAM] self.arp = args[ARP_PARAM_ALIAS] self.console = args[CONSOLE_PARAM] # Variables self.entropyCalculator = EntropyCalculator() self.arp_text= { SCAPY_WHO_HAS : WHO_HAS_TEXT, SCAPY_IS_AT : IS_AT_TEXT }
def calcMutualInformation(self, TreeTable): sumMutualInformation = 0 pairCount = 0 for i, snp in enumerate(self.SNPList): if i < len(self.SNPList) - 1: for j in range(i + 1, len(self.SNPList)): hx = snp.entropy hy = self.SNPList[j].entropy hxy = EntropyCalculator.main( TreeTable, snp.aGenomes, snp.tGenomes, snp.cGenomes, snp.gGenomes, self.SNPList[j].aGenomes, self.SNPList[j].tGenomes, self.SNPList[j].cGenomes, self.SNPList[j].gGenomes) sumMutualInformation += hx + hy - hxy pairCount += 1 self.avgMutualInformation = sumMutualInformation / pairCount
class Sniffer(object): def __init__(self, args): self.index=0 # Parametros self.file = args[FILE_PARAM] self.entropy_file = args[ENTROPY_FILE_PARAM] self.arp_entropy_file = args[ARP_ENTROPY_FILE_PARAM] self.timeout = args[TIMEOUT_PARAM] self.arp = args[ARP_PARAM_ALIAS] self.console = args[CONSOLE_PARAM] # Variables self.entropyCalculator = EntropyCalculator() self.arp_text= { SCAPY_WHO_HAS : WHO_HAS_TEXT, SCAPY_IS_AT : IS_AT_TEXT } def run(self): self.file.write(FILE_HEADER) sniff(prn=self.writePacketToFile, timeout = self.timeout, filter=self.arp) self.file.close() self.entropyCalculator.write_to_file(self.entropy_file) self.entropyCalculator.arp_write_to_file(self.arp_entropy_file) def writePacketToFile(self, packet): if ARP in packet: packetInfo = time.strftime("%d/%m/%y - %H:%M:%S") packetInfo += self.decorate(packet[1].hwsrc) packetInfo += self.decorate(packet[1].hwdst) packetInfo += self.decorate(packet[1].psrc) packetInfo += self.decorate(packet[1].pdst) packetInfo += self.decorate(self.arp_text[packet[1].op]) else: packetInfo = time.strftime("%d/%m/%y - %H:%M:%S") packetInfo += self.decorate(packet.src) packetInfo += self.decorate(packet.dst) if IP in packet: packetInfo += self.decorate(packet[IP].src) packetInfo += self.decorate(packet[IP].dst) else: packetInfo += FAKE_IP packetInfo += FAKE_IP packetInfo += self.decorate(packet[1].name) if self.console: print(packetInfo) packetInfo += '\n' self.file.write(packetInfo) self.index+=1 self.entropyCalculator.arp_entropyUpdate(packet) self.entropyCalculator.entropyUpdate(packet) def decorate(self, obj): return "\t" + str(obj)
def main(inputFile, treeTable): fo = open(inputFile, "r") ISGData = [] #read in number of genomes from file fo.seek(12) numGenomes = int(fo.readline().strip()) numGenomes += 1 #add one to include reference genome #read in header row arrLine = fo.readline().strip().split("\t") #place genome names in array arrGenomeName = [] intCounter = 0 for word in arrLine: if (intCounter > 1 and intCounter < (numGenomes + 2)): arrGenomeName.append(word) intCounter += 1 #Determine genome group SNP differentiates for line in fo.readlines(): arrLine = line.strip().split("\t") strChrom = arrLine[0] strPos = arrLine[1] arrSNP = [] #place all SNPs into array for i in range(2, numGenomes + 2): arrSNP.append(arrLine[i]) #sort Genomes into groups by SNP call arrA = [] arrT = [] arrC = [] arrG = [] intCounter = 0 for j in arrSNP: if (arrSNP[intCounter] == 'A'): arrA.append(arrGenomeName[intCounter]) if (arrSNP[intCounter] == 'T'): arrT.append(arrGenomeName[intCounter]) if (arrSNP[intCounter] == 'C'): arrC.append(arrGenomeName[intCounter]) if (arrSNP[intCounter] == 'G'): arrG.append(arrGenomeName[intCounter]) intCounter += 1 #Checking if there at least 2 groups of at least 2 genomes intGroups = 0 if (len(arrA) >= 1): intGroups += 1 if (len(arrT) >= 1): intGroups += 1 if (len(arrC) >= 1): intGroups += 1 if (len(arrG) >= 1): intGroups += 1 # calculate entropy values entropy = EntropyCalculator.main(treeTable, frozenset(arrA), frozenset(arrT), frozenset(arrC), frozenset(arrG)) ISGData.append( SNP(strChrom, int(strPos), entropy, arrA, arrT, arrC, arrG)) fo.close() return ISGData
#!/usr/bin/python import sys import EntropyCalculator """Invoke as Entropy.py Document Output.csv Document may be plain text or XML Analysis will be performed with a block size of 1000 words May add options to customize analysis or output at a later date Columns of output are word, Entropy per instance of that word, proportion of document's entropy from instances of that word. Summary statistics will be printed to stdout""" H = EntropyCalculator.EntropyCalculator(10) data = open(sys.argv[1], 'r') H.SetText(data) H.SetWordsPerPart(1000) H.AnalyseText() output = open(sys.argv[2], 'w') H.OutputWords(output) data.close() output.close() print argv[1], "contains", H.Nwords print "Analysed with", H.Parts, "blocks" print "Total entropy", H.TotalEntropy, "bits" print "Words with entropy >", float( H.Parts) / (H.Parts + 1), "are likely to be particulary significant" print "Detailed analysis written to ", sys.argv[2]
def main(inputFile, treeTable): fo = open(inputFile, "r") ISGData = [] #read in number of genomes from file fo.seek(12) numGenomes = int(fo.readline().strip()) numGenomes += 1 #add one to include reference genome #read in header row arrLine = fo.readline().strip().split("\t") #place genome names in array arrGenomeName = [] intCounter = 0 for word in arrLine: if(intCounter > 1 and intCounter < (numGenomes + 2)): arrGenomeName.append(word) intCounter += 1 #Determine genome group SNP differentiates for line in fo.readlines(): arrLine = line.strip().split("\t") strChrom = arrLine[0] strPos = arrLine[1] arrSNP = [] #place all SNPs into array for i in range(2,numGenomes+2): arrSNP.append(arrLine[i]) #sort Genomes into groups by SNP call arrA = [] arrT = [] arrC = [] arrG = [] intCounter = 0 for j in arrSNP: if(arrSNP[intCounter] == 'A'): arrA.append(arrGenomeName[intCounter]) if(arrSNP[intCounter] == 'T'): arrT.append(arrGenomeName[intCounter]) if(arrSNP[intCounter] == 'C'): arrC.append(arrGenomeName[intCounter]) if(arrSNP[intCounter] == 'G'): arrG.append(arrGenomeName[intCounter]) intCounter += 1 #Checking if there at least 2 groups of at least 2 genomes intGroups = 0 if(len(arrA) >= 1): intGroups += 1 if(len(arrT) >= 1): intGroups += 1 if(len(arrC) >= 1): intGroups += 1 if(len(arrG) >= 1): intGroups += 1 # calculate entropy values entropy = EntropyCalculator.main(treeTable, frozenset(arrA), frozenset(arrT), frozenset(arrC), frozenset(arrG)) ISGData.append(SNP(strChrom, int(strPos), entropy, arrA, arrT, arrC, arrG)) fo.close() return ISGData