Exemplo n.º 1
0
def randomReconWrapper(dirName, D, T, L, numSamples, typeGen):
    """Takes in a directory of newick files, dirName, duplication, loss and 
    transfer costs, the number of desired random reconciliations, and the type
    of generator (biased or uniform), and calls those random generators to
    build a file containing the number of temporal inconsistencies found in 
    those randomly generated reconciliations as well as other information 
    relating to the file"""
    totalTimeTravel = 0 # To record total number of time travels in directory
    outOf = 0 # To record total number of reconciliations made
    # loop through files in directory
    for fileName in os.listdir(dirName):
        if fileName.endswith('.newick'):
            f = open(fileName[:-7]+'.txt', 'w')
            f.write(typeGen+" random reconciliations"+"\n")
            hostTree, parasiteTree, phi = newickFormatReader.getInput\
                (dirName+"/"+fileName)
            # find size of parasite and host trees
            parasiteSize = len(parasiteTree)+1
            hostSize = len(hostTree)+1
            DTLReconGraph, numRecon = DP.DP(hostTree, parasiteTree, phi, D, T, L)
            rootList = rootGenerator(DTLReconGraph, parasiteTree)
            randomReconList = []
            for n in range(numSamples):
                timeTravelCount = 0
                startRoot = random.choice(rootList)
                if typeGen == "uniform":
                    currentRecon = uniformRecon(DTLReconGraph, [startRoot], {})
                else: 
                    normalizeDTL = normalizer(DTLReconGraph)
                    currentRecon = biasedRecon(normalizeDTL, [startRoot], {})
                for key in currentRecon.keys():
                    currentRecon[key] = currentRecon[key][:-1]
                randomReconList.append(currentRecon)
            # make sure there are no duplicate reconciliations
            uniqueReconList = []
            for recon in randomReconList:
                if not recon in uniqueReconList:
                    uniqueReconList.append(recon)
            outOf += len(uniqueReconList)
            for recon in uniqueReconList:
                graph = reconciliationGraph.buildReconstruction\
                    (hostTree, parasiteTree, recon)
                currentOrder = orderGraph.date(graph)
                numTrans = findTransfers(recon)
                if currentOrder == 'timeTravel':
                    f.write("Temporal Inconsistency, reconciliation has "+str(numTrans)+" transfers"+"\n")
                    timeTravelCount += 1
                    totalTimeTravel += 1
                else: 
                    f.write("No temporal inconsistencies, reconciliation has "+str(numTrans)+" transfers"+"\n")
            f.write(fileName+" contains "+str(timeTravelCount)+" temporal "+ \
                "inconsistencies out of "+ str(len(uniqueReconList))+ \
                " reconciliations."+"\n"+"Total number of reconciliations: "+\
                str(numRecon)+"\n"+"Host tree size: "+str(hostSize)+"\n"+\
                "Parasite tree size: "+str(parasiteSize)+ "\n")
            f.close()
    print "Total fraction of temporal inconsistencies in directory: ", \
            totalTimeTravel, '/', outOf
Exemplo n.º 2
0
def getCostscapeDTLs(DTLPairs, hostTree, parasiteTree, phi):
    """This function takes as input DTLPairs, a list of tuples with T and L 
	costs, and the hostTree, parasiteTree, and phi. It returns a list of DTLs 
	who scores are computed with the T and L values from each element in 
	DTLPairs."""

    DTLList = []
    for i in DTLPairs:
        newDTL = DP(hostTree, parasiteTree, phi, 1, i[0], i[1])[0]
        DTLList.append(newDTL)
    return DTLList
Exemplo n.º 3
0
def newScoreWrapper(newickFile, switchLo, switchHi, lossLo, lossHi, D, T, L):
    """This function takes as input hostTree, parasiteTree, phi, duplication 
	cost D, transfer cost T, and loss cost L, and returns the newDTL whose 
	scores were calculated from costscape."""

    H, P, phi = newickFormatReader.getInput(newickFile)
    originalDTL, numRecon, leaves = DP(H, P, phi, D, T, L)
    pointList = findCenters(newickFile, switchLo, switchHi, lossLo, lossHi)
    DTLPairs = getDTLVals(pointList)
    DTLList = getCostscapeDTLs(DTLPairs, H, P, phi)
    newDTL = changeDTLScores(originalDTL, DTLList)
    return newDTL, numRecon, leaves
Exemplo n.º 4
0
def run_test(fileName, max_k):
    cache_dir = './cache'
    D = 2.
    T = 3.
    L = 1.

    host, paras, phi = newickFormatReader.getInput(fileName)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
        f = open('%s/README' % cache_dir, 'w')
        f.write(
            'This directory holds a cache of reconciliation graph for the TreeLife data set'
        )
        f.close()

    cache_location = '%s/%s.graph' % (cache_dir, os.path.split(fileName)[1])
    if not os.path.isfile(cache_location):
        print >> sys.stderr, 'A reconciliation graph has not been built yet for this newick file'
        print >> sys.stderr, 'Doing so now and caching it in {%s}...' % cache_location

        DictGraph, numRecon = DP.DP(host, paras, phi, D, T, L)

        f = open(cache_location, 'w+')
        f.write(repr(DictGraph))
        f.close()

    print >> sys.stderr, 'Loading reonciliation graph from cache'
    f = open(cache_location)
    DictGraph = eval(f.read())
    f.close()

    scoresList, dictReps = Greedy.Greedy(DictGraph, paras)

    print >> sys.stderr, 'Found cluster representatives using point-collecting'

    graph = ReconGraph.ReconGraph(DictGraph)
    setReps = [
        ReconGraph.dictRecToSetRec(graph, dictRep) for dictRep in dictReps
    ]
    random.seed(0)
    extra_reps = [KMeans.get_template(graph) for i in xrange(max_k)]

    representatives = setReps + extra_reps

    print >> sys.stderr, 'Starting K Means algorithm ... '
    print >> sys.stderr, 'Printing Average and Maximum cluster radius at each step'

    for i in xrange(1, max_k + 1):
        print 'k = %d' % i
        KMeans.k_means(graph, 10, i, 0, representatives[:i])
Exemplo n.º 5
0
def unitScoreDTL(hostTree, parasiteTree, phi, D, T, L):
	""" Takes a hostTree, parasiteTree, tip mapping function phi, and 
	duplication cost (D), transfer cost (T), and loss cost (L) and returns the
	DTL graph in the form of a dictionary, with event scores set to 1. 
	Cospeciation is assumed to cost 0. """
	DTLReconGraph, numRecon = DP.DP(hostTree, parasiteTree, phi, D, T, L)
	newDTL = {}
	for vertex in DTLReconGraph:
		newDTL[vertex] = []
		for event in DTLReconGraph[vertex][:-1]:
			newEvent = event[:-1] + [1.0]
			newDTL[vertex].append(newEvent)
		newDTL[vertex].append(DTLReconGraph[vertex][-1])
	return newDTL
def getCostscapeDTLReconGraphs(DTLReconGraphPairs, hostTree, parasiteTree, \
  phi):
    """This function takes as input DTLReconGraphPairs, a list of tuples with 
	transfer and loss costs, and the hostTree, parasiteTree, and phi. It 
	returns a list of DTLReconGraphs whose scores are computed with the 
	transfer and loss values from each element in DTLReconGraphPairs."""

    DTLReconGraphList = []
    for cost in DTLReconGraphPairs:
        #assign those associated costs to the newDTLReconGraph
        newDTLReconGraph = DP(hostTree, parasiteTree, phi, 1, cost[0], \
         cost[1])[0]
        DTLReconGraphList.append(newDTLReconGraph)
    return DTLReconGraphList
def freqSummation(argList):
    """Takes as input an argument list containing a newick file of host and 
	parasite trees as well as their phi mapping, duplication, transfer, and 
	loss costs, the type of frequency scoring to be used, as well as switch 
	and loss cost ranges for xscape scoring, and returns a file containing the
	list of scores for each individual reconciliation, the sum of the those 
	scores, the total cost of those reconciliations and the number of 
	reconciliations of those trees."""
    newickFile = argList[1]
    D = float(argList[2])
    T = float(argList[3])
    L = float(argList[4])
    freqType = argList[5]
    switchLo = float(argList[6])
    switchHi = float(argList[7])
    lossLo = float(argList[8])
    lossHi = float(argList[9])
    fileName = newickFile[:-7]
    f = open(fileName + "freqFile.txt", 'w')
    host, paras, phi = newickFormatReader.getInput(newickFile)
    DTL, numRecon = DP.DP(host, paras, phi, D, T, L)
    if freqType == "Frequency":
        newDTL = DTL
    elif freqType == "xscape":
        newDTL = calcCostscapeScore.newScoreWrapper(newickFile, switchLo,
                                                    switchHi, lossLo, lossHi,
                                                    D, T, L)
    elif freqType == "unit":
        newDTL = MasterReconciliation.unitScoreDTL(host, paras, phi, D, T, L)
    scoresList, reconciliation = Greedy.Greedy(newDTL, paras)
    totalSum = 0
    for score in scoresList:
        totalSum += score
    for index in reconciliation:
        totalCost = 0
        for key in index:
            if index[key][0] == "L":
                totalCost += L
            elif index[key][0] == "T":
                totalCost += T
            elif index[key][0] == "D":
                totalCost += D
    f.write(str(scoresList) + '\n')
    f.write(str(totalSum) + '\n')
    f.write(str(totalCost) + '\n')
    f.write(str(numRecon))
    f.close()
Exemplo n.º 8
0
def Reconcile(argList):
    """Takes command-line arguments of a .newick file, duplication, transfer, 
	and loss costs, the type of scoring desired and possible switch and loss 
	ranges. Creates Files for the host, parasite, and reconciliations"""
    fileName = argList[1]  #.newick file
    D = float(argList[2])  # Duplication cost
    T = float(argList[3])  # Transfer cost
    L = float(argList[4])  # Loss cost
    freqType = argList[5]  # Frequency type
    # Optional inputs if freqType == xscape
    switchLo = float(argList[6])  # Switch lower boundary
    switchHi = float(argList[7])  # Switch upper boundary
    lossLo = float(argList[8])  # Loss lower boundary
    lossHi = float(argList[9])  # Loss upper boundary

    host, paras, phi = newickFormatReader.getInput(fileName)
    hostRoot = cycleCheckingGraph.findRoot(host)
    hostv = cycleCheckingGraph.treeFormat(host)
    Order = orderGraph.date(hostv)
    # Default scoring function (if freqtype== Frequency scoring)
    DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
    print DTLReconGraph, numRecon
    #uses xScape scoring function
    if freqType == "xscape":
        DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \
         switchHi, lossLo, lossHi, D, T, L)
    #uses Unit scoring function
    elif freqType == "unit":
        DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L)

    DTLGraph = copy.deepcopy(DTLReconGraph)
    scoresList, rec = Greedy.Greedy(DTLGraph, paras)
    for n in range(len(rec)):
        graph = cycleCheckingGraph.buildReconciliation(host, paras, rec[n])
        currentOrder = orderGraph.date(graph)
        if currentOrder == "timeTravel":
            rec[n], currentOrder = detectCycles.detectCyclesWrapper(
                host, paras, rec[n])
            currentOrder = orderGraph.date(currentOrder)
        hostOrder = hOrder(hostv, currentOrder)
        hostBranchs = branch(hostv, hostOrder)
        if n == 0:
            newickToVis.convert(fileName, hostBranchs, n, 1)
        else:
            newickToVis.convert(fileName, hostBranchs, n, 0)
        # filename[:-7] is the file name minus the .newick
        reconConversion.convert(rec[n], DTLReconGraph, paras, fileName[:-7], n)
Exemplo n.º 9
0
def Reconcile(argList):
	"""Takes command-line arguments of a .newick file, duplication, transfer, 
	and loss costs, the type of scoring desired and possible switch and loss 
	ranges. Creates Files for the host, parasite, and reconciliations"""
	fileName = argList[1] #.newick file
	D = float(argList[2]) # Duplication cost
	T = float(argList[3]) # Transfer cost
	L = float(argList[4]) # Loss cost
	freqType = argList[5] # Frequency type
	# Optional inputs if freqType == xscape
	switchLo = float(argList[6]) # Switch lower boundary
	switchHi = float(argList[7]) # Switch upper boundary
	lossLo = float(argList[8]) # Loss lower boundary
	lossHi = float(argList[9]) # Loss upper boundary

	host, paras, phi = newickFormatReader.getInput(fileName)
	hostRoot = ReconciliationGraph.findRoot(host)
	# Default scoring function (if freqtype== Frequency scoring)
	DTLReconGraph, numRecon = DP.DP(host, paras, phi, D, T, L)
	#uses xScape scoring function
	# if freqType == "xscape":
	# 	DTLReconGraph = calcCostscapeScore.newScoreWrapper(fileName, switchLo, \
	# 		switchHi, lossLo, lossHi, D, T, L)
	#uses Unit scoring function
	if freqType == "unit":
		DTLReconGraph = unitScoreDTL(host, paras, phi, D, T, L)

	DTLGraph = copy.deepcopy(DTLReconGraph)
	scoresList, recs = Greedy.Greedy(DTLGraph, paras)

	infeasible_recs = []
	for rec in recs:
		if orderGraph.date(ReconciliationGraph.buildReconciliation(host, paras, rec)) == False:
			infeasible_recs.append(rec)

	return infeasible_recs, recs
Exemplo n.º 10
0
        # to DP
        # DPParam = DP_fit(z_batch)
        # DPParam = np.ones((batch_size))
        # gamma: 'LPMtx' (batch_size, # of cluster)
        # N : 'Nvec' (# of cluster, )
        # m : 'm' (# of cluster, latent_dim)
        # W : 'B' (# of cluster, latent_dim, latent_dim)
        # v: 'nu' (# of cluster)

        # DPParam = DPObj.fit(z_batch)
        if dataset == 'firstBatch' or dataset == 'secondBatch':
            newinitname = results.initModelPath
            DPObj = DP.DP(output_path=fullOutputPath,
                          initname=newinitname,
                          gamma1=gamma1,
                          gamma0=gamma0,
                          sf=sf,
                          nBatch=nBatch,
                          taskID=taskID)
            DPParam, newinitname = DPObj.fitWithWarmStart(z_batch, newinitname)
        else:
            if epoch == 0 and iteration == 0:
                newinitname = 'randexamples'
                if dataset == 'reuters10k':
                    DPObj = DP.DP(output_path=fullOutputPath,
                                  initname=newinitname,
                                  gamma1=gamma1,
                                  gamma0=gamma0,
                                  Kmax=Kmax,
                                  sf=sf,
                                  nBatch=nBatch,
Exemplo n.º 11
0
import DP
import numpy as np
import sys

if __name__ == "__main__":

    map = np.loadtxt("../testcase/case" + sys.argv[1] + ".txt")
    terminal = np.loadtxt("../testcase/terminal" + sys.argv[1] + ".txt",
                          dtype='i')
    size_row, size_col = map.shape
    it = iter(terminal)
    terminal = list(zip(it, it))
    dp = DP.DP(size_row, size_col, map, terminal, -1)
    if (sys.argv[2] == "pi"):
        count = dp.policyIteration()
        print("Stop after " + str(count) + " iteration of policy iteration")

    else:
        count = dp.valueIteration()
        print("Stop after " + str(count) + " iteration of value iteration")
    dp.plot(sys.argv[1], sys.argv[2])
Exemplo n.º 12
0
## make full output path
fullOutputPath = createOutputFolderName(outputPath, Kmax, dataset, epoch,
                                        batch_iter, scale, batchsize, rep, sf)
## name log file and write console output to log.txt
logFileName = os.path.join(fullOutputPath, 'log.txt')

if results.logFile:
    sys.stdout = open(logFileName, 'w')

MNIST_df = XData(aa['z'], dtype='auto')
##########################################################
## create a DP object and get DPParam
DPObj = DP.DP(output_path=fullOutputPath,
              initname='randexamples',
              gamma1=gamma1,
              gamma0=gamma0,
              Kmax=Kmax,
              sf=sf,
              nLap=nLap,
              taskID=taskID)
DPParam, newinitname = DPObj.fit(aa['z'])
## after training model, get DPParam
#########################################################
## add evaluation summary metric and save results
#########################################################
## get z_fit from the encoder and fit with DP model to get all the labels for all training data
z_fit = aa['z']
fittedY = obtainFittedYFromDP(DPParam, z_fit)


####################################
## Obtain the relationship between fittec class lable and true label, stored in a dictionary