示例#1
0
    def __init__(self, field):
        numpy.random.seed(21)        
        
        dataDir = PathDefaults.getDataDir() + "dblp/"
        self.xmlFileName = dataDir + "dblp.xml"
        self.xmlCleanFilename = dataDir + "dblpClean.xml"        

        resultsDir = PathDefaults.getDataDir() + "reputation/" + field + "/"
        self.expertsFileName = resultsDir + "experts.txt"
        self.expertMatchesFilename = resultsDir + "experts_matches.csv"
        self.trainExpertMatchesFilename = resultsDir + "experts_train_matches.csv"
        self.testExpertMatchesFilename = resultsDir + "experts_test_matches.csv"
        self.coauthorsFilename = resultsDir + "coauthors.csv"
        self.publicationsFilename = resultsDir + "publications.csv"
        
        self.stepSize = 100000
        self.numLines = 33532888
        self.publicationTypes = set(["article" , "inproceedings", "proceedings", "book", "incollection", "phdthesis", "mastersthesis", "www"])
        self.p = 0.5     
        self.matchCutoff = 0.95
        
        
        self.cleanXML()
        self.matchExperts()
        logging.warning("Now you must disambiguate the matched experts if not ready done")        
    def __init__(self, maxIter=None, iterStartTimeStamp=None): 
        outputDir = PathDefaults.getOutputDir() + "recommend/erasm/"

        if not os.path.exists(outputDir): 
            os.mkdir(outputDir)
            
        #iterStartDate is the starting date of the iterator 
        if iterStartTimeStamp != None: 
            self.iterStartTimeStamp = iterStartTimeStamp
        else: 
            self.iterStartTimeStamp = 1286229600
            
        self.timeStep = timedelta(30).total_seconds()             
                
        self.ratingFileName = outputDir + "data.npz"          
        self.userDictFileName = outputDir + "userIdDict.pkl"   
        self.groupDictFileName = outputDir + "groupIdDict.pkl" 
        self.isTrainRatingsFileName = outputDir + "is_train.npz"
    
        self.dataDir = PathDefaults.getDataDir() + "erasm/"
        self.dataFileName = self.dataDir + "groupMembers-29-11-12" 
        
        self.maxIter = maxIter 
        self.trainSplit = 4.0/5 
        
        self.processRatings()
        self.splitDataset()        
        self.loadProcessedData()
def processSimpleDataset(name, numRealisations, split, ext=".csv", delimiter=",", usecols=None, skiprows=1, converters=None):
    numpy.random.seed(21)
    dataDir = PathDefaults.getDataDir() + "modelPenalisation/regression/"
    fileName = dataDir + name + ext
    
    print("Loading data from file " + fileName)
    outputDir = PathDefaults.getDataDir() + "modelPenalisation/regression/" + name + "/"

    XY = numpy.loadtxt(fileName, delimiter=delimiter, skiprows=skiprows, usecols=usecols, converters=converters)
    X = XY[:, :-1]
    y = XY[:, -1]
    idx = Sampling.shuffleSplit(numRealisations, X.shape[0], split)
    preprocessSave(X, y, outputDir, idx)
    def testGenerateRandomGraph(self):
        egoFileName = PathDefaults.getDataDir() + "infoDiffusion/EgoData.csv"
        alterFileName = PathDefaults.getDataDir()  + "infoDiffusion/AlterData.csv"
        numVertices = 1000
        infoProb = 0.1

        
        p = 0.1
        neighbours = 10
        generator = SmallWorldGenerator(p, neighbours)
        graph = SparseGraph(VertexList(numVertices, 0))
        graph = generator.generate(graph)

        self.svmEgoSimulator.generateRandomGraph(egoFileName, alterFileName, infoProb, graph)
示例#5
0
def saveRatingMatrix(): 
    """
    Take the coauthor graph above and make vertices indexed from 0 then save 
    as matrix market format. 
    """    
    edgeFileName = PathDefaults.getOutputDir() + "erasm/edges2.txt"
    
    logging.debug("Reading edge list")
    edges = numpy.loadtxt(edgeFileName, delimiter=",", dtype=numpy.int)
    logging.debug("Total number of edges: " + str(edges.shape[0]))
    
    vertexIdDict = {} 
    vertexIdSet = set([])
    
    i = 0 
        
    for edge in edges:
        if edge[0] not in vertexIdSet: 
            vertexIdDict[edge[0]] = i
            vertexIdSet.add(edge[0])
            i += 1 
         
        if edge[1] not in vertexIdSet: 
            vertexIdDict[edge[1]] = i 
            vertexIdSet.add(edge[1])
            i += 1 

    n = len(vertexIdDict)    
    R = scipy.sparse.lil_matrix((n, n))
    logging.debug("Creating sparse matrix")
    
    for edge in edges:
        R[vertexIdDict[edge[0]], vertexIdDict[edge[1]]] += 1 
        R[vertexIdDict[edge[1]], vertexIdDict[edge[0]]] += 1 
        
    logging.debug("Created matrix " + str(R.shape) + " with " + str(R.getnnz()) + " non zeros")    

    R = R.tocsr()    
    
    minCoauthors = 20
    logging.debug("Removing vertices with <" + str(minCoauthors) + " coauthors")
    nonzeros = R.nonzero()    
    inds = numpy.arange(nonzeros[0].shape[0])[numpy.bincount(nonzeros[0]) >= minCoauthors]
    R = R[inds, :][:, inds]
    logging.debug("Matrix has shape " + str(R.shape) + " with " + str(R.getnnz()) + " non zeros")    
        
    matrixFileName = PathDefaults.getOutputDir() + "erasm/R"
    scipy.io.mmwrite(matrixFileName, R)
    logging.debug("Wrote matrix to file " + matrixFileName)
示例#6
0
    def __init__(self, trainXIteratorFunc, testXIteratorFunc, cmdLine=None, defaultAlgoArgs = None, dirName=""):
        """ priority for default args
         - best priority: command-line value
         - middle priority: set-by-function value
         - lower priority: class value
        """
        # Parameters to choose which methods to run
        # Obtained merging default parameters from the class with those from the user
        self.algoArgs = RecommendExpHelper.newAlgoParams(defaultAlgoArgs)
        
        #Function to return iterators to the training and test matrices  
        self.trainXIteratorFunc = trainXIteratorFunc
        self.testXIteratorFunc = testXIteratorFunc
        
        #How often to print output 
        self.logStep = 10
        
        #The max number of observations to use for model selection
        self.sampleSize = 5*10**6

        # basic resultsDir
        self.resultsDir = PathDefaults.getOutputDir() + "recommend/" + dirName + "/"

        # update algoParams from command line
        self.readAlgoParams(cmdLine)
示例#7
0
 def main(argv=None):
     if argv is None:
         argv = sys.argv
     try:
         # read options
         try:
             opts, args = getopt.getopt(argv[1:], "hd:n:D", ["help", "dir=", "nb_user="******"debug"])
         except getopt.error as msg:
              raise RGUsage(msg)
         # apply options
         dir = PathDefaults.getDataDir() + "cluster/"
         nb_user = None
         log_level = logging.INFO
         for o, a in opts:
             if o in ("-h", "--help"):
                 print(__doc__)
                 return 0
             elif o in ("-d", "--dir"):
                 dir = a
             elif o in ("-n", "--nb_user"):
                 nb_user = int(a)
             elif o in ("-D", "--debug"):
                 log_level = logging.DEBUG
         logging.basicConfig(stream=sys.stdout, level=log_level, format='%(levelname)s (%(asctime)s):%(message)s')
         # process: generate data files
         BemolData.generate_data_file(dir, nb_user)
     except RGUsage as err:
         logging.error(err.msg)
         logging.error("for help use --help")
         return 2
    def testLoadParams(self):
        try:
            lmbda = 0.01

            alterRegressor = PrimalRidgeRegression(lmbda)
            egoRegressor = PrimalRidgeRegression(lmbda)
            predictor = EgoEdgeLabelPredictor(alterRegressor, egoRegressor)

            params = [0.1, 0.2]
            paramFuncs = [egoRegressor.setLambda, alterRegressor.setLambda]
            fileName = PathDefaults.getTempDir() + "tempParams.pkl"

            predictor.saveParams(params, paramFuncs, fileName)

            params2 = predictor.loadParams(fileName)

            self.assertTrue(params2[0][0] == "apgl.predictors.PrimalRidgeRegression")
            self.assertTrue(params2[0][1] == "setLambda")
            self.assertTrue(params2[0][2] == 0.1)

            self.assertTrue(params2[1][0] == "apgl.predictors.PrimalRidgeRegression")
            self.assertTrue(params2[1][1] == "setLambda")
            self.assertTrue(params2[1][2] == 0.2)
        except IOError as e:
            logging.warn(e)
示例#9
0
    def testWriteToFile3(self):
        """
        We will test out writing out some random graphs to Pajek
        """
        numVertices = 20
        numFeatures = 0 
        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList)

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        pw = PajekWriter()
        directory = PathDefaults.getOutputDir() + "test/"
        pw.writeToFile(directory + "erdosRenyi20", graph)

        #Now write a small world graph
        p = 0.2
        k = 3

        graph.removeAllEdges()
        generator = SmallWorldGenerator(p, k)
        graph = generator.generate(graph)

        pw.writeToFile(directory + "smallWorld20", graph)
示例#10
0
    def testToyData(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]


        pxSum = 0
        pY1XSum = 0
        pYminus1XSum = 0

        px2Sum = 0 
        squareArea = (gridPoints[1]-gridPoints[0])**2

        for i in range(gridPoints.shape[0]-1):
            for j in range(gridPoints.shape[0]-1):
                px = (pdfX[i,j]+pdfX[i+1,j]+pdfX[i, j+1]+pdfX[i+1, j+1])/4
                pxSum += px*squareArea

                pY1X = (pdfY1X[i,j]+pdfY1X[i+1,j]+pdfY1X[i, j+1]+pdfY1X[i+1, j+1])/4
                pY1XSum += pY1X*squareArea

                pYminus1X = (pdfYminus1X[i,j]+pdfYminus1X[i+1,j]+pdfYminus1X[i, j+1]+pdfYminus1X[i+1, j+1])/4
                pYminus1XSum += pYminus1X*squareArea

                px2Sum += px*pY1X*squareArea + px*pYminus1X*squareArea

        self.assertAlmostEquals(pxSum, 1)
        print(pY1XSum)
        print(pYminus1XSum)

        self.assertAlmostEquals(px2Sum, 1)
示例#11
0
    def testPredict2(self):
        #Test on Gauss2D dataset
        dataDir = PathDefaults.getDataDir()

        fileName = dataDir + "Gauss2D_learn.csv"
        XY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        X = XY[:, 0:2]
        y = XY[:, 2]

        fileName = dataDir + "Gauss2D_test.csv"
        testXY = numpy.loadtxt(fileName, skiprows=1, usecols=(1,2,3), delimiter=",")
        testX = testXY[:, 0:2]
        testY = testXY[:, 2]

        X = Standardiser().standardiseArray(X)
        testX = Standardiser().standardiseArray(testX)

        maxDepths = range(3, 10)
        trainAucs = numpy.array([0.7194734, 0.7284824, 0.7332185, 0.7348198, 0.7366152, 0.7367508, 0.7367508, 0.7367508])
        testAucs = numpy.array([0.6789078, 0.6844632, 0.6867918, 0.6873420, 0.6874820, 0.6874400, 0.6874400, 0.6874400])
        i = 0
        
        #The results are approximately the same, but not exactly 
        for maxDepth in maxDepths:
            treeRank = TreeRank(self.leafRanklearner)
            treeRank.setMaxDepth(maxDepth)
            treeRank.learnModel(X, y)
            trainScores = treeRank.predict(X)
            testScores = treeRank.predict(testX)

            self.assertAlmostEquals(Evaluator.auc(trainScores, y), trainAucs[i], 2)
            self.assertAlmostEquals(Evaluator.auc(testScores, testY), testAucs[i], 1)
            i+=1 
示例#12
0
    def testEstimate(self):
        #Lets set up a simple model based on normal dist
        abcParams = ABCParameters()
        
        epsilonArray = numpy.array([0.5, 0.2, 0.1])
        posteriorSampleSize = 20

        #Lets get an empirical estimate of Sprime
        model = NormalModel(abcMetrics)
        model.setMu(theta[0])
        model.setSigma(theta[1])
        
        Sprime = abcMetrics.summary(model.simulate()) 
        logging.debug(("Real summary statistic: " + str(Sprime)))

        thetaDir = PathDefaults.getTempDir()
        
        abcSMC = ABCSMC(epsilonArray, createNormalModel, abcParams, thetaDir)
        abcSMC.maxRuns = 100000
        abcSMC.setPosteriorSampleSize(posteriorSampleSize)
        thetasArray = abcSMC.run()
        thetasArray = numpy.array(thetasArray)

        meanTheta = numpy.mean(thetasArray, 0)
        logging.debug((thetasArray.shape))
        logging.debug(thetasArray)
        logging.debug(meanTheta)

        print(thetasArray.shape[0], posteriorSampleSize)

        #Note only mean needs to be similar
        self.assertTrue(thetasArray.shape[0] >= posteriorSampleSize)
        self.assertEquals(thetasArray.shape[1], 2)
        self.assertTrue(numpy.linalg.norm(theta[0] - meanTheta[0]) < 0.2)
示例#13
0
 def testGraphFromMatFile(self):
     matFileName = PathDefaults.getDataDir() +  "infoDiffusion/EgoAlterTransmissions1000.mat"
     sGraph = EgoUtils.graphFromMatFile(matFileName)
     
     examplesList = ExamplesList.readFromMatFile(matFileName)
     numFeatures = examplesList.getDataFieldSize("X", 1)
     
     self.assertEquals(examplesList.getNumExamples(), sGraph.getNumEdges())
     self.assertEquals(examplesList.getNumExamples()*2, sGraph.getNumVertices())
     self.assertEquals(numFeatures/2+1, sGraph.getVertexList().getNumFeatures())
     
     #Every even vertex has information, odd does not 
     for i in range(0, sGraph.getNumVertices()): 
         vertex = sGraph.getVertex(i)
         
         if i%2 == 0: 
             self.assertEquals(vertex[sGraph.getVertexList().getNumFeatures()-1], 1)
         else: 
             self.assertEquals(vertex[sGraph.getVertexList().getNumFeatures()-1], 0)
             
     #Test the first few vertices are the same 
     for i in range(0, 10): 
         vertex1 = sGraph.getVertex(i*2)[0:numFeatures/2]
         vertex2 = sGraph.getVertex(i*2+1)[0:numFeatures/2]
         vertexEx1 = examplesList.getSubDataField("X", numpy.array([i])).ravel()[0:numFeatures/2]
         vertexEx2 = examplesList.getSubDataField("X", numpy.array([i])).ravel()[numFeatures/2:numFeatures]
         
         self.assertTrue((vertex1 == vertexEx1).all())
         self.assertTrue((vertex2 == vertexEx2).all())
示例#14
0
    def testReadFromMatFile(self):
        numExamples = 10
        dir = PathDefaults.getTempDir()
        fileName = dir + "examplesList1"
        X = rand(numExamples, 10)
        
        ml = ExamplesList(numExamples)
        ml.addDataField("X", X)
        ml.writeToMatFile(fileName)
        
        ml2 = ExamplesList.readFromMatFile(fileName)
        self.assertTrue(ml == ml2)

        Y = rand(numExamples, 20)

        ml.addDataField("Y", Y)
        ml.writeToMatFile(fileName)
        
        ml2 = ExamplesList.readFromMatFile(fileName)
        self.assertTrue(ml == ml2)
        
        Z = rand(numExamples, 50)

        ml.addDataField("Z", Z)
        ml.writeToMatFile(fileName)
        
        ml2 = ExamplesList.readFromMatFile(fileName)
        self.assertTrue(ml == ml2)
    def testComputeIdealPenalty(self):
        dataDir = PathDefaults.getDataDir() + "modelPenalisation/toy/"
        data = numpy.load(dataDir + "toyData.npz")
        gridPoints, X, y, pdfX, pdfY1X, pdfYminus1X = data["arr_0"], data["arr_1"], data["arr_2"], data["arr_3"], data["arr_4"], data["arr_5"]

        sampleSize = 100
        trainX, trainY = X[0:sampleSize, :], y[0:sampleSize]
        testX, testY = X[sampleSize:, :], y[sampleSize:]

        #We form a test set from the grid points
        fullX = numpy.zeros((gridPoints.shape[0]**2, 2))
        for m in range(gridPoints.shape[0]):
            fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 0] = gridPoints
            fullX[m*gridPoints.shape[0]:(m+1)*gridPoints.shape[0], 1] = gridPoints[m]

        C = 1.0
        gamma = 1.0
        args = (trainX, trainY, fullX, C, gamma, gridPoints, pdfX, pdfY1X, pdfYminus1X)
        penalty = computeIdealPenalty(args)


        #Now compute penalty using data
        args = (trainX, trainY, testX, testY, C, gamma)
        penalty2 = computeIdealPenalty2(args)

        self.assertAlmostEquals(penalty2, penalty, 2)
示例#16
0
 def __init__(self):
     self.labelNames = ["Cortisol.val", "Testosterone.val", "IGF1.val"]
     self.dataDir = PathDefaults.getDataDir() +  "metabolomic/"
     self.boundsDict = {}
     self.boundsDict["Cortisol"] = numpy.array([0, 89, 225, 573])
     self.boundsDict["Testosterone"] = numpy.array([0, 3, 9, 13])
     self.boundsDict["IGF1"] = numpy.array([0, 200, 441, 782])
示例#17
0
 def getLsos(self):
     """
     Return a function to display R memory usage
     """
     fileName = PathDefaults.getSourceDir() + "/apgl/metabolomics/R/Util.R"
     robjects.r["source"](fileName)
     return robjects.r['lsos']
示例#18
0
    def __init__(self, YList, X, featuresName, ages, args):
        super(MetabolomicsExpRunner, self).__init__(args=args)
        self.X = X
        self.YList = YList #The list of concentrations 
        self.featuresName = featuresName
        self.args = args
        self.ages = ages 

        self.maxDepth = 10
        self.numTrees = 10
        self.sampleSize = 1.0
        self.sampleReplace = True
        self.folds = 5
        self.resultsDir = PathDefaults.getOutputDir() + "metabolomics/"

        self.leafRankGenerators = []
        self.leafRankGenerators.append((LinearSvmGS.generate(), "SVM"))
        self.leafRankGenerators.append((SvcGS.generate(), "RBF-SVM"))
        self.leafRankGenerators.append((DecisionTree.generate(), "CART"))

        self.pcaLeafRankGenerators = [(LinearSvmPca.generate(), "LinearSVM-PCA")]

        self.funcLeafRankGenerators = []
        self.funcLeafRankGenerators.append((LinearSvmFGs.generate, "SVMF"))
        self.funcLeafRankGenerators.append((SvcFGs.generate, "RBF-SVMF"))
        self.funcLeafRankGenerators.append((DecisionTreeF.generate, "CARTF"))

        #Store all the label vectors and their missing values
        YIgf1Inds, YICortisolInds, YTestoInds = MetabolomicsUtils.createIndicatorLabels(YList)
        self.hormoneInds = [YIgf1Inds, YICortisolInds, YTestoInds]
        self.hormoneNames = MetabolomicsUtils.getLabelNames()
示例#19
0
    def __init__(self, maxIter=None, iterStartTimeStamp=None):
        """
        Return a training and test set for movielens based on the time each 
        rating was made. 
        """
        self.timeStep = timedelta(30).total_seconds()

        # iterStartDate is the starting date of the iterator
        if iterStartTimeStamp != None:
            self.iterStartTimeStamp = iterStartTimeStamp
        else:
            self.iterStartTimeStamp = 789652009

        outputDir = PathDefaults.getOutputDir() + "recommend/erasm/"

        self.numRatings = 402872
        self.minContacts = 10

        if not os.path.exists(outputDir):
            os.mkdir(outputDir)

        self.ratingFileName = outputDir + "data.npz"
        self.userDictFileName = outputDir + "userIdDict.pkl"
        self.isTrainRatingsFileName = outputDir + "is_train.npz"

        self.maxIter = maxIter
        self.trainSplit = 4.0 / 5

        self.processRatings()
        self.splitDataset()
        self.loadProcessedData()

        if self.maxIter != None:
            logging.debug("Maximum number of iterations: " + str(self.maxIter))
示例#20
0
    def testWriteToFile3(self):
        """
        We will test out writing out some random graphs to Pajek
        """
        numVertices = 20
        numFeatures = 0
        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList)

        p = 0.1
        generator = ErdosRenyiGenerator(p)
        graph = generator.generate(graph)

        pw = PajekWriter()
        directory = PathDefaults.getOutputDir() + "test/"
        pw.writeToFile(directory + "erdosRenyi20", graph)

        #Now write a small world graph
        p = 0.2
        k = 3

        graph.removeAllEdges()
        generator = SmallWorldGenerator(p, k)
        graph = generator.generate(graph)

        pw.writeToFile(directory + "smallWorld20", graph)
    def testEdgeFile(self):
        """
        Figure out the problem with the edge file 
        """
        dataDir = PathDefaults.getDataDir() + "cluster/"
        edgesFilename = dataDir + "Cit-HepTh.txt"

        edges = {}
        file = open(edgesFilename, 'r')
        file.readline()
        file.readline()
        file.readline()
        file.readline()

        vertices = {}

        for line in file:
            (vertex1, sep, vertex2) = line.partition("\t")
            vertex1 = vertex1.strip()
            vertex2 = vertex2.strip()
            edges[(vertex1, vertex2)] = 0
            vertices[vertex1] = 0
            vertices[vertex2] = 0

        #It says there are 352807 edges in paper and 27770 vertices
        self.assertEquals(len(edges), 352807)
        self.assertEquals(len(vertices), 27770)
示例#22
0
    def __init__(self, maxIter=None, iterStartTimeStamp=None): 
        """
        Return a training and test set for netflix based on the time each 
        rating was made. There are 62 iterations. 
        """ 
        self.timeStep = timedelta(30).total_seconds()  
        
        #startDate is used to convert dates into ints 
        #self.startDate = datetime(1998,1,1)
        #self.endDate = datetime(2005,12,31)
        
        #iterStartDate is the starting date of the iterator 
        if iterStartTimeStamp != None: 
            self.iterStartTimeStamp = iterStartTimeStamp
        else: 
            self.iterStartTimeStamp = time.mktime(datetime(2001,1,1).timetuple()) 

        self.startMovieID = 1 
        self.endMovieID = 17770
        
        self.numMovies = 17770
        self.numRatings = 100480507
        self.numProbeMovies = 16938
        self.numProbeRatings = 1408395
        self.numCustomers = 480189
        
        outputDir = PathDefaults.getOutputDir() + "recommend/netflix/"

        if not os.path.exists(outputDir): 
            os.mkdir(outputDir)
                
        self.ratingFileName = outputDir + "data.npz"  
        self.custDictFileName = outputDir + "custIdDict.pkl"
        self.probeFileName = PathDefaults.getDataDir() + "netflix/probe.txt"    
        self.testRatingsFileName = outputDir + "test_data.npz"
        self.isTrainRatingsFileName = outputDir + "is_train.npz"
        
        self.maxIter = maxIter 
        self.trainSplit = 4.0/5 

        self.processRatings()
        #self.processProbe()
        self.splitDataset()        
        self.loadProcessedData()
        
        if self.maxIter != None: 
            logging.debug("Maximum number of iterations: " + str(self.maxIter))
示例#23
0
    def processRatings(self): 
        """
        Convert the dataset into a matrix and save the results for faster 
        access. 
        """
        if not os.path.exists(self.ratingFileName) or not os.path.exists(self.custDictFileName): 
            dataDir = PathDefaults.getDataDir() + "netflix/training_set/"

            logging.debug("Processing ratings given in " + dataDir)

            custIdDict = {} 
            custIdSet = set([])        
            
            movieIds = array.array("I")
            custIds = array.array("I")
            ratings = array.array("B")
            dates = array.array("L")
            j = 0
            
            for i in range(self.startMovieID, self.endMovieID+1): 
                Util.printIteration(i-1, 1, self.endMovieID-1)
                ratingsFile = open(dataDir + "mv_" + str(i).zfill(7) + ".txt")
                ratingsFile.readline()
                
                for line in ratingsFile: 
                    vals = line.split(",")
                    
                    custId = int(vals[0])
                    
                    if custId not in custIdSet: 
                        custIdSet.add(custId)
                        custIdDict[custId] = j
                        custInd = j 
                        j += 1 
                    else: 
                        custInd = custIdDict[custId]
                    
                    rating = int(vals[1])     
                    t = datetime.strptime(vals[2].strip(), "%Y-%m-%d")
                
                    movieIds.append(i-1)
                    custIds.append(custInd)   
                    ratings.append(rating)
                    dates.append(int(time.mktime(t.timetuple()))) 
                    
            movieIds = numpy.array(movieIds, numpy.uint32)
            custIds = numpy.array(custIds, numpy.uint32)
            ratings = numpy.array(ratings, numpy.uint8)
            dates = numpy.array(dates, numpy.uint32)
            
            assert ratings.shape[0] == self.numRatings            
            
            numpy.savez(self.ratingFileName, movieIds, custIds, ratings, dates) 
            logging.debug("Saved ratings file as " + self.ratingFileName)
            
            pickle.dump(custIdDict, open(self.custDictFileName, 'wb'))
            logging.debug("Saved custIdDict as " + self.custDictFileName)
        else: 
            logging.debug("Ratings file " + str(self.ratingFileName) + " already processed")
示例#24
0
    def loadData():
        """
        Return the raw spectra and the MDS transformed data as well as the DataFrame
        for the MDS data. 
        """
        utilsLib = importr('utils')

        dataDir = PathDefaults.getDataDir() +  "metabolomic/"
        fileName = dataDir + "data.RMN.total.6.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        maxNMRIndex = 951
        X = df.rx(robjects.IntVector(range(1, maxNMRIndex)))
        X = numpy.array(X).T

        #Load age and normalise (missing values are assinged the mean) 
        ages = numpy.array(df.rx(robjects.StrVector(["Age"]))).ravel()
        meanAge = numpy.mean(ages[numpy.logical_not(numpy.isnan(ages))])
        ages[numpy.isnan(ages)] = meanAge
        ages = Standardiser().standardiseArray(ages)

        Xs = X.copy()
        standardiser = Standardiser()
        Xs = standardiser.standardiseArray(X)

        fileName = dataDir + "data.sportsmen.log.AP.1.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        maxNMRIndex = 419
        X2 = df.rx(robjects.IntVector(range(1, maxNMRIndex)))
        X2 = numpy.array(X2).T

        #Load the OPLS corrected files
        fileName = dataDir + "IGF1.log.OSC.1.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        minNMRIndex = 22
        maxNMRIndex = 441
        Xopls1 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
        Xopls1 = numpy.array(Xopls1).T

        fileName = dataDir + "cort.log.OSC.1.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        minNMRIndex = 20
        maxNMRIndex = 439
        Xopls2 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
        Xopls2 = numpy.array(Xopls2).T

        fileName = dataDir + "testo.log.OSC.1.txt"
        df = utilsLib.read_table(fileName, header=True, row_names=1, sep=",")
        minNMRIndex = 22
        maxNMRIndex = 441
        Xopls3 = df.rx(robjects.IntVector(range(minNMRIndex, maxNMRIndex)))
        Xopls3 = numpy.array(Xopls3).T

        #Let's load all the label data here
        labelNames = MetabolomicsUtils.getLabelNames()
        YList = MetabolomicsUtils.createLabelList(df, labelNames)
        
        return X, X2, Xs, (Xopls1, Xopls2, Xopls3), YList, ages, df
示例#25
0
    def testWriteToFile(self):
        sgw = SimpleGraphWriter()
        directory = PathDefaults.getOutputDir() + "test/"

        #Have to check the files
        fileName1 = directory + "dictTestUndirected"
        sgw.writeToFile(fileName1, self.dctGraph1)

        fileName2 = directory + "dictTestDirected"
        sgw.writeToFile(fileName2, self.dctGraph2)
 def profileClusterFromIterator(self):
     iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList)
     dataDir = PathDefaults.getDataDir() + "cluster/"
     #iterator = getBemolGraphIterator(dataDir)
     
     def run(): 
         clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True)
         print(timeList.cumsum(0))
         
     ProfileUtils.profile('run()', globals(), locals())
示例#27
0
 def getIterator(): 
     dataDir = PathDefaults.getDataDir() + "cluster/"
     
     nbUser = 10000 # set to 'None' to have all users
     nbPurchasesPerIt = 500 # set to 'None' to take all the purchases per date
     startingIteration = 300
     endingIteration = 600 # set to 'None' to have all iterations
     stepSize = 1    
     
     return itertools.islice(BemolData.getGraphIterator(dataDir, nbUser, nbPurchasesPerIt), startingIteration, endingIteration, stepSize)
示例#28
0
    def testReadGraph(self):
        fileName = PathDefaults.getDataDir() + "test/simpleGraph.txt"

        graphReader = SimpleGraphReader()
        graph = graphReader.readFromFile(fileName)

        logging.debug((graph.getAllEdges()))

        self.assertEquals(graph.isUndirected(), True)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1)
        self.assertEquals(graph.getEdge(4, 0), 1)

        #Now test reading a file with the same graph but vertices indexed differently
        fileName = PathDefaults.getDataDir() + "test/simpleGraph2.txt"
        graph = graphReader.readFromFile(fileName)

        self.assertEquals(graph.isUndirected(), True)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1.1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1.6)
        self.assertEquals(graph.getEdge(4, 0), 1)

        #Now test a file with directed edges
        fileName = PathDefaults.getDataDir() + "test/simpleGraph3.txt"
        graph = graphReader.readFromFile(fileName)

        self.assertEquals(graph.isUndirected(), False)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1)
        self.assertEquals(graph.getEdge(4, 0), 1)
示例#29
0
    def testReadGraph(self):
        fileName = PathDefaults.getDataDir() +  "test/simpleGraph.txt"

        graphReader = SimpleGraphReader()
        graph = graphReader.readFromFile(fileName)

        logging.debug((graph.getAllEdges()))

        self.assertEquals(graph.isUndirected(), True)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1)
        self.assertEquals(graph.getEdge(4, 0), 1)

        #Now test reading a file with the same graph but vertices indexed differently
        fileName = PathDefaults.getDataDir() + "test/simpleGraph2.txt"
        graph = graphReader.readFromFile(fileName)

        self.assertEquals(graph.isUndirected(), True)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1.1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1.6)
        self.assertEquals(graph.getEdge(4, 0), 1)

        #Now test a file with directed edges
        fileName = PathDefaults.getDataDir() +  "test/simpleGraph3.txt"
        graph = graphReader.readFromFile(fileName)

        self.assertEquals(graph.isUndirected(), False)
        self.assertEquals(graph.getNumVertices(), 5)
        self.assertEquals(graph.getNumEdges(), 4)

        self.assertEquals(graph.getEdge(0, 1), 1)
        self.assertEquals(graph.getEdge(2, 4), 1)
        self.assertEquals(graph.getEdge(2, 2), 1)
        self.assertEquals(graph.getEdge(4, 0), 1)
示例#30
0
 def __init__(self):
     dataDir = PathDefaults.getDataDir() + "cluster/"
     nbUser = 2000 # set to 'None' to have all users
     nbPurchasesPerIt = 50 # set to 'None' to take all the purchases
                                           # per date
     startingIteration = 20
     endingIteration = None # set to 'None' to have all iterations
     stepSize = 10    
     
     iterator = itertools.islice(BemolData.getGraphIterator(dataDir, nbUser, nbPurchasesPerIt), startingIteration, endingIteration, stepSize)
     self.iterator = iterator 
示例#31
0
    def getOutputFileName(graphType, p, k, infoProb):
        outputDirectory = PathDefaults.getOutputDir()

        if graphType == "SmallWorld":
            outputFileName = outputDirectory + "SvmEgoOutput_type=" + graphType + "_p=" + str(p) + "_k=" + str(k) + "_q=" + str(infoProb)
        elif graphType == "ErdosRenyi":
            outputFileName = outputDirectory + "SvmEgoOutput_type=" + graphType + "_p=" + str(p) + "_q=" + str(infoProb)
        else:
            raise ValueError("Invalid graph type: " + graphType)

        return outputFileName
    def testGetTrainIteratorFunc(self):
        dataFilename = PathDefaults.getDataDir() + "reference/author_document_count" 
        dataset = Static2IdValDataset(dataFilename)

        trainIterator = dataset.getTrainIteratorFunc()()      
        testIterator = dataset.getTestIteratorFunc()()
        
        for trainX in trainIterator: 
            testX = testIterator.next() 
            
            print(trainX.shape, trainX.nnz, testX.nnz)
            self.assertEquals(trainX.shape, testX.shape)
示例#33
0
    def testCreateIndicatorLabels(self):
        metaUtils = MetabolomicsUtils()
        X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()
        
        YCortisol = YCortisol[numpy.logical_not(numpy.isnan(YCortisol))]
        YCortisolIndicators = metaUtils.createIndicatorLabel(YCortisol, metaUtils.boundsDict["Cortisol"])
        
        YTesto = YTesto[numpy.logical_not(numpy.isnan(YTesto))]
        YTestoIndicators = metaUtils.createIndicatorLabel(YTesto, metaUtils.boundsDict["Testosterone"])
        
        YIgf1 = YIgf1[numpy.logical_not(numpy.isnan(YIgf1))]
        YIgf1Indicators = metaUtils.createIndicatorLabel(YIgf1, metaUtils.boundsDict["IGF1"])

        s = numpy.sum(YCortisolIndicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        s = numpy.sum(YTestoIndicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        s = numpy.sum(YIgf1Indicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        #Now compare to those labels in the file
        X, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()
        dataDir = PathDefaults.getDataDir() +  "metabolomic/"
        fileName = dataDir + "data.RMN.total.6.txt"
        data = pandas.read_csv(fileName, delimiter=",") 

        YCortisolIndicators = metaUtils.createIndicatorLabel(YCortisol, metaUtils.boundsDict["Cortisol"])
        YCortisolIndicators2 = numpy.array(data[["Ind.Cortisol.1", "Ind.Cortisol.2", "Ind.Cortisol.3"]])
        
        for i in range(YCortisolIndicators.shape[0]): 
            if not numpy.isnan(YCortisol[i]) and not numpy.isnan(YCortisolIndicators2[i, :]).any(): 
                #nptst.assert_almost_equal(YCortisolIndicators2[i, :], YCortisolIndicators[i, :])
                pass 
        
        YTestoIndicators = metaUtils.createIndicatorLabel(YTesto, metaUtils.boundsDict["Testosterone"])
        YTestoIndicators2 = numpy.array(data[["Ind.Testo.1", "Ind.Testo.2", "Ind.Testo.3"]])
        
        for i in range(YTestoIndicators.shape[0]): 
            if not numpy.isnan(YTesto[i]) and not numpy.isnan(YTestoIndicators2[i, :]).any(): 
                #print(i, YTesto[i])
                nptst.assert_almost_equal(YTestoIndicators2[i, :], YTestoIndicators[i, :])
                
        YIgf1Indicators = metaUtils.createIndicatorLabel(YIgf1, metaUtils.boundsDict["IGF1"])
        YIgf1Indicators2 = numpy.array(data[["Ind.IGF1.1", "Ind.IGF1.2", "Ind.IGF1.3"]])
        
        for i in range(YIgf1Indicators.shape[0]): 
            if not numpy.isnan(YIgf1[i]) and not numpy.isnan(YIgf1Indicators2[i, :]).any(): 
                #print(i, YIgf1[i])
                #nptst.assert_almost_equal(YIgf1Indicators2[i, :], YIgf1Indicators[i, :])
                pass
示例#34
0
    def testWriteToFile(self):
        graph = DictGraph()

        numVertices = 5
        numFeatures = 3

        V = numpy.random.rand(numVertices, numFeatures)

        for i in range(0, numVertices):
            graph.setVertex(i, V[i, :])

        fileName = PathDefaults.getOutputDir() + "test/vertices"
        verterWriter = CsvVertexWriter()
        verterWriter.writeToFile(fileName, graph)

        logging.debug(V)
    def testSaveLoad(self):
        try:
            vList = VertexList(self.numVertices, self.numFeatures)
            vList.setVertex(0, numpy.array([1, 2, 3]))
            vList.setVertex(1, numpy.array([4, 5, 6]))
            vList.setVertex(2, numpy.array([7, 8, 9]))

            tempDir = PathDefaults.getTempDir()
            fileName = tempDir + "vList"

            vList.save(fileName)
            vList2 = VertexList.load(fileName)

            self.assertTrue(
                (vList.getVertices() == vList2.getVertices()).all())
        except IOError as e:
            logging.warn(e)
            pass
    def testSaveLoad(self):
        try:
            vList = GeneralVertexList(self.numVertices)
            vList.setVertex(0, "abc")
            vList.setVertex(1, 12)
            vList.setVertex(2, "num")

            tempDir = PathDefaults.getTempDir()
            fileName = tempDir + "vList"

            vList.save(fileName)

            vList2 = GeneralVertexList.load(fileName)

            for i in range(self.numVertices):
                self.assertEquals(vList.getVertex(i), vList2.getVertex(i))
        except IOError as e:
            logging.warn(e)
            pass
示例#37
0
    def testWriteToFile(self):
        pw = PajekWriter()
        directory = PathDefaults.getOutputDir() + "test/"

        #Have to check the files
        fileName1 = directory + "denseTestUndirected"
        pw.writeToFile(fileName1, self.dGraph1)

        fileName2 = directory + "denseTestDirected"
        pw.writeToFile(fileName2, self.dGraph2)

        fileName3 = directory + "sparseTestUndirected"
        pw.writeToFile(fileName3, self.sGraph1)

        fileName4 = directory + "sparseTestDirected"
        pw.writeToFile(fileName4, self.sGraph2)

        fileName5 = directory + "dictTestUndirected"
        pw.writeToFile(fileName5, self.dctGraph1)

        fileName6 = directory + "dictTestDirected"
        pw.writeToFile(fileName6, self.dctGraph2)
示例#38
0
    def testWriteToFile2(self):
        pw = PajekWriter()
        directory = PathDefaults.getOutputDir() + "test/"

        def setVertexColour(vertexIndex, graph):
            colours = ["grey05", "grey10", "grey15", "grey20", "grey25"]
            return colours[vertexIndex]

        def setVertexSize(vertexIndex, graph):
            return vertexIndex

        def setEdgeColour(vertexIndex1, vertexIndex2, graph):
            colours = ["grey05", "grey10", "grey15", "grey20", "grey25"]
            return colours[vertexIndex1]

        def setEdgeSize(vertexIndex1, vertexIndex2, graph):
            return vertexIndex1 + vertexIndex2

        pw.setVertexColourFunction(setVertexColour)
        fileName1 = directory + "vertexColourTest"
        pw.writeToFile(fileName1, self.dGraph1)
        pw.setVertexColourFunction(None)

        pw.setVertexSizeFunction(setVertexSize)
        fileName1 = directory + "vertexSizeTest"
        pw.writeToFile(fileName1, self.dGraph1)
        pw.setVertexSizeFunction(None)

        pw.setEdgeColourFunction(setEdgeColour)
        fileName1 = directory + "edgeColourTest"
        pw.writeToFile(fileName1, self.dGraph1)
        pw.setEdgeColourFunction(None)

        pw.setEdgeSizeFunction(setEdgeSize)
        fileName1 = directory + "edgeSizeTest"
        pw.writeToFile(fileName1, self.dGraph1)
        pw.setEdgeColourFunction(None)
示例#39
0
def test():
    """
    A function which uses the unittest library to find all tests within apgl (those files
    matching "*Test.py"), and run those tests. In python 2.7 and above the unittest framework
    is used otherwise one needs unittest2 for python 2.3-2.6.
    """
    try:
        import traceback
        import sys
        import os
        
        import logging
        from apgl.util.PathDefaults import PathDefaults

        logging.disable(logging.WARNING)
        #logging.disable(logging.INFO)
        sourceDir = PathDefaults.getSourceDir() 
        print("Running tests from " + sourceDir)
        version = getPythonVersion()

        if version >= 2.7:
            import unittest
        else:
            import unittest2 as unittest

        
        overallTestSuite = unittest.TestSuite()
        overallTestSuite.addTest(unittest.defaultTestLoader.discover(os.path.join(sourceDir, "generator"), pattern='*Test.py', top_level_dir=sourceDir))
        overallTestSuite.addTest(unittest.defaultTestLoader.discover(os.path.join(sourceDir, "graph"), pattern='*Test.py', top_level_dir=sourceDir))
        overallTestSuite.addTest(unittest.defaultTestLoader.discover(os.path.join(sourceDir, "util"), pattern='*Test.py', top_level_dir=sourceDir))

        unittest.TextTestRunner(verbosity=1).run(overallTestSuite)

        
    except ImportError as error:
        traceback.print_exc(file=sys.stdout)
示例#40
0
 def setUp(self):
     tempDir = PathDefaults.getTempDir()
     self.fileName = tempDir + "abc"
示例#41
0
    def testReadGraph(self):

        dir = PathDefaults.getDataDir()
        vertexFileName = dir + "test/deggraf10.csv"
        edgeFileNames = [dir + "test/testEdges1.csv", dir + "test/testEdges2.csv"]

        def genderConv(x):
            genderDict = {'"M"': 0, '"F"': 1}
            return genderDict[x]

        def orientConv(x):
            orientDict = {'"HT"': 0, '"HB"': 1}
            return orientDict[x]

        def fteConv(x):
            fteDict = {'"INTER"': 0, '"CONTA"': 1}
            return fteDict[x]

        def provConv(x):
            provDict = {'"CH"': 0, '"SC"': 1, '"SS"': 2, '"LH"' : 3, '"GM"' : 4}
            return provDict[x]

        converters = {3: genderConv, 4: orientConv, 5:fteConv, 6:provConv}

        idIndex = 0
        featureIndices = list(range(1,11))
        multiGraphCsvReader = MultiGraphCsvReader(idIndex, featureIndices, converters)
        sparseMultiGraph = multiGraphCsvReader.readGraph(vertexFileName, edgeFileNames)

        vertexValues = numpy.zeros((10, 10))
        vertexValues[0, :] = numpy.array([1986, 32, 0, 0, 0, 0, 0, 3, 3, 1])
        vertexValues[1, :] = numpy.array([1986, 27, 0, 0, 0, 1, 0, 4, 4, 1])
        vertexValues[2, :] = numpy.array([1986, 20, 0, 0, 0, 1, 0, 1, 1, 0])
        vertexValues[3, :] = numpy.array([1986, 20, 0, 0, 0, 1, 0, 2, 2, 0])
        vertexValues[4, :] = numpy.array([1986, 20, 0, 0, 0, 2, 0, 5, 5, 0])
        vertexValues[5, :] = numpy.array([1986, 28, 0, 0, 0, 3, 0, 1, 1, 1])
        vertexValues[6, :] = numpy.array([1986, 26, 1, 0, 1, 3, 6, 1, 1, 1])
        vertexValues[7, :] = numpy.array([1986, 35, 0, 0, 0, 2, 0, 0, 0, 0])
        vertexValues[8, :] = numpy.array([1986, 37, 0, 1, 0, 3, 0, 5, 3, 0])
        vertexValues[9, :] = numpy.array([1986, 40, 0, 1, 0, 4, 0, 3, 3, 0])

        #Check if the values of the vertices are correct 
        for i in range(sparseMultiGraph.getNumVertices()):
            self.assertTrue((sparseMultiGraph.getVertex(i) == vertexValues[i]).all())

        #Now check edges
        edges = numpy.zeros((10, 3))
        edges[0, :] = numpy.array([4, 0, 0])
        edges[1, :] = numpy.array([4, 1, 0])
        edges[2, :] = numpy.array([5, 3, 0])
        edges[3, :] = numpy.array([7, 1, 0])
        edges[4, :] = numpy.array([8, 0, 0])
        edges[5, :] = numpy.array([4, 1, 1])
        edges[6, :] = numpy.array([8, 1, 1])
        edges[7, :] = numpy.array([8, 2, 1])
        edges[8, :] = numpy.array([8, 4, 1])
        edges[9, :] = numpy.array([9, 0, 1])

        self.assertTrue((sparseMultiGraph.getAllEdges() == edges).all())

        #Now test directed graphs
        sparseMultiGraph = multiGraphCsvReader.readGraph(vertexFileName, edgeFileNames, False)

        for i in range(sparseMultiGraph.getNumVertices()):
            self.assertTrue((sparseMultiGraph.getVertex(i) == vertexValues[i]).all())


        edges = numpy.zeros((10, 3))
        edges[0, :] = numpy.array([0, 4, 0])
        edges[1, :] = numpy.array([1, 7, 0])
        edges[2, :] = numpy.array([3, 5, 0])
        edges[3, :] = numpy.array([4, 1, 0])
        edges[4, :] = numpy.array([8, 0, 0])
        edges[5, :] = numpy.array([0, 9, 1])
        edges[6, :] = numpy.array([1, 8, 1])
        edges[7, :] = numpy.array([2, 8, 1])
        edges[8, :] = numpy.array([4, 1, 1])
        edges[9, :] = numpy.array([8, 4, 1])
        
        self.assertTrue((sparseMultiGraph.getAllEdges() == edges).all())
示例#42
0
    def testMDLGraphsReader(self):
        reader = MDLGraphsReader()
        dir = PathDefaults.getDataDir()
        fileName = dir + "test/testGraphs.mdl"

        graphs = reader.readFromFile(fileName)
        self.assertEquals(len(graphs), 2)

        #Check the first graph
        self.assertEquals(graphs[0].getNumVertices(), 26)
        self.assertEquals(graphs[0].getNumEdges(), 28)

        def getEdge(graph, i, j):
            return graph.getEdge(i - 1, j - 1)

        self.assertEquals(getEdge(graphs[0], 1, 6), 1)
        self.assertEquals(getEdge(graphs[0], 1, 2), 1)
        self.assertEquals(getEdge(graphs[0], 1, 18), 1)
        self.assertEquals(getEdge(graphs[0], 2, 3), 1)
        self.assertEquals(getEdge(graphs[0], 2, 19), 1)
        self.assertEquals(getEdge(graphs[0], 3, 4), 1)
        self.assertEquals(getEdge(graphs[0], 3, 20), 1)
        self.assertEquals(getEdge(graphs[0], 4, 10), 1)
        self.assertEquals(getEdge(graphs[0], 4, 5), 1)
        self.assertEquals(getEdge(graphs[0], 5, 6), 1)
        self.assertEquals(getEdge(graphs[0], 5, 7), 1)
        self.assertEquals(getEdge(graphs[0], 6, 21), 1)
        self.assertEquals(getEdge(graphs[0], 7, 8), 1)
        self.assertEquals(getEdge(graphs[0], 7, 22), 1)
        self.assertEquals(getEdge(graphs[0], 8, 9), 1)
        self.assertEquals(getEdge(graphs[0], 8, 23), 1)
        self.assertEquals(getEdge(graphs[0], 9, 14), 1)
        self.assertEquals(getEdge(graphs[0], 9, 10), 1)
        self.assertEquals(getEdge(graphs[0], 10, 11), 1)
        self.assertEquals(getEdge(graphs[0], 11, 12), 1)
        self.assertEquals(getEdge(graphs[0], 11, 24), 1)
        self.assertEquals(getEdge(graphs[0], 12, 13), 1)
        self.assertEquals(getEdge(graphs[0], 12, 25), 1)
        self.assertEquals(getEdge(graphs[0], 13, 14), 1)
        self.assertEquals(getEdge(graphs[0], 13, 15), 1)
        self.assertEquals(getEdge(graphs[0], 14, 26), 1)
        self.assertEquals(getEdge(graphs[0], 15, 16), 1)
        self.assertEquals(getEdge(graphs[0], 15, 17), 1)

        #Check the second graph
        self.assertEquals(graphs[1].getNumVertices(), 19)
        self.assertEquals(graphs[1].getNumEdges(), 20)

        self.assertEquals(getEdge(graphs[1], 1, 10), 1)
        self.assertEquals(getEdge(graphs[1], 1, 2), 1)
        self.assertEquals(getEdge(graphs[1], 1, 14), 1)
        self.assertEquals(getEdge(graphs[1], 2, 3), 1)
        self.assertEquals(getEdge(graphs[1], 2, 15), 1)
        self.assertEquals(getEdge(graphs[1], 3, 8), 1)
        self.assertEquals(getEdge(graphs[1], 3, 4), 1)
        self.assertEquals(getEdge(graphs[1], 4, 5), 1)
        self.assertEquals(getEdge(graphs[1], 4, 16), 1)
        self.assertEquals(getEdge(graphs[1], 5, 6), 1)
        self.assertEquals(getEdge(graphs[1], 5, 17), 1)
        self.assertEquals(getEdge(graphs[1], 6, 7), 1)
        self.assertEquals(getEdge(graphs[1], 6, 18), 1)
        self.assertEquals(getEdge(graphs[1], 7, 8), 1)
        self.assertEquals(getEdge(graphs[1], 8, 9), 1)
        self.assertEquals(getEdge(graphs[1], 9, 10), 1)
        self.assertEquals(getEdge(graphs[1], 9, 11), 1)
        self.assertEquals(getEdge(graphs[1], 10, 19), 1)
        self.assertEquals(getEdge(graphs[1], 11, 12), 1)
        self.assertEquals(getEdge(graphs[1], 11, 13), 1)
 def testGetOutputDir(self):
     print((PathDefaults.getOutputDir()))
 def testGetProjectDir(self):
     print((PathDefaults.getSourceDir()))
 def testGetDataDir(self):
     print((PathDefaults.getDataDir()))
示例#46
0
    def testReadFromFile(self):
        vertex1Indices = [0, 2, 3, 4, 5]
        vertex2Indices = [1, 6, 7, 8, 9]

        def genderConv(x):
            genderDict = {'"M"': 0, '"F"': 1}
            return genderDict[x]

        def orientConv(x):
            orientDict = {'"HT"': 0, '"HB"': 1}
            return orientDict[x]

        converters = {2: genderConv, 6: genderConv, 3:orientConv, 7:orientConv}

        csvGraphReader = CsvGraphReader(vertex1Indices, vertex2Indices, converters)

        dir = PathDefaults.getDataDir()
        fileName = dir + "test/infect5.csv"

        graph = csvGraphReader.readFromFile(fileName)

        self.assertTrue((graph.getVertex(0) == numpy.array([0, 0, 28, 1])).all())
        self.assertTrue((graph.getVertex(1) == numpy.array([1, 0, 26, 1])).all())
        self.assertTrue((graph.getVertex(2) == numpy.array([0, 1, 42, 2])).all())
        self.assertTrue((graph.getVertex(3) == numpy.array([1, 0, 33, 1])).all())
        self.assertTrue((graph.getVertex(4) == numpy.array([0, 1, 35, 37])).all())

        self.assertTrue(graph.getEdge(0, 1) == 1)
        self.assertTrue(graph.getEdge(2, 3) == 1)
        self.assertTrue(graph.getEdge(4, 6) == 1)
        self.assertTrue(graph.getEdge(6, 7) == 1)
        self.assertTrue(graph.getEdge(5, 8) == 1)

        self.assertEquals(graph.getNumEdges(), 5)
        self.assertTrue(graph.isUndirected())

        #Test a directed graph
        csvGraphReader = CsvGraphReader(vertex1Indices, vertex2Indices, converters, undirected=False)
        graph = csvGraphReader.readFromFile(fileName)

        self.assertTrue(graph.getEdge(1, 0) == None)
        self.assertTrue(graph.getEdge(3, 2) == None)
        self.assertTrue(graph.getEdge(6, 4) == None)
        self.assertTrue(graph.getEdge(7, 6) == None)
        self.assertTrue(graph.getEdge(8, 5) == None)

        self.assertEquals(graph.getNumEdges(), 5)
        self.assertFalse(graph.isUndirected())

        #Test graph with no vertex information
        vertex1Indices = [0]
        vertex2Indices = [1]
        fileName = dir + "test/infect5-0.csv"
        csvGraphReader = CsvGraphReader(vertex1Indices, vertex2Indices, {})
        graph = csvGraphReader.readFromFile(fileName)

        self.assertTrue(graph.getEdge(0, 1) == 1)
        self.assertTrue(graph.getEdge(2, 3) == 1)
        self.assertTrue(graph.getEdge(4, 6) == 1)
        self.assertTrue(graph.getEdge(6, 7) == 1)
        self.assertTrue(graph.getEdge(5, 8) == 1)

        self.assertEquals(graph.getNumEdges(), 5)
        self.assertTrue(graph.isUndirected())
        self.assertEquals(graph.getVertexList().getNumFeatures(), 0)