def test_2(self): """l19 data should give stress below .13""" ptmtx = array( [ [7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], #idx 5 [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], #idx 10 [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], #idx 15 [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ], 'float') distmtx = dist_euclidean(ptmtx) nm = NMDS(distmtx, verbosity=0) self.assertLessThan(nm.getStress(), .13)
def test_3(self): """l19 data should give stress below .13 in multi-D""" ptmtx = array( [ [7, 1, 0, 0, 0, 0, 0, 0, 0], [4, 2, 0, 0, 0, 1, 0, 0, 0], [2, 4, 0, 0, 0, 1, 0, 0, 0], [1, 7, 0, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 7, 1, 0, 0, 0, 0, 0, 0], #idx 5 [0, 4, 2, 0, 0, 0, 2, 0, 0], [0, 2, 4, 0, 0, 0, 1, 0, 0], [0, 1, 7, 0, 0, 0, 0, 0, 0], [0, 0, 8, 0, 0, 0, 0, 0, 0], [0, 0, 7, 1, 0, 0, 0, 0, 0], #idx 10 [0, 0, 4, 2, 0, 0, 0, 3, 0], [0, 0, 2, 4, 0, 0, 0, 1, 0], [0, 0, 1, 7, 0, 0, 0, 0, 0], [0, 0, 0, 8, 0, 0, 0, 0, 0], [0, 0, 0, 7, 1, 0, 0, 0, 0], #idx 15 [0, 0, 0, 4, 2, 0, 0, 0, 4], [0, 0, 0, 2, 4, 0, 0, 0, 1], [0, 0, 0, 1, 7, 0, 0, 0, 0] ], 'float') distmtx = dist_euclidean(ptmtx) for dim in range(3, 18): nm = NMDS(distmtx, verbosity=0, dimension=dim) self.assertLessThan(nm.getStress(), .13)
print "Removing species with less than two occurrences..." sp_io = np.where(~(sp > 0), sp, 1) column_sums = np.sum(sp_io, 0) to_remove = np.where(column_sums < 2) sp = np.delete(sp, to_remove, 1) colnames = np.delete(colnames, to_remove) print "Removing plots with less than two species..." pl_io = np.where(~(sp > 0), sp, 1) row_sums = np.sum(pl_io, 1) to_remove = np.where(row_sums < 2) sp = np.delete(sp, to_remove, 0) rownames = np.delete(rownames, to_remove) #print sp.shape, len(rownames) #print sp.shape, len(colnames) print "Normalizing species coverage data with McCune logarithm..." sp = log_mccune(sp) from cogent.cluster.nmds import NMDS, metaNMDS from cogent.maths.distance_transform import dist_bray_curtis print "Calculating distance matrix..." distmtx = dist_bray_curtis(sp) nmds = NMDS(distmtx, dimension = 3) print nmds.getPoints() print nmds.getStress() #nmds = NMDS()
def reduce_similarity_matrix(similarity_matrix): #distance_matrix = dist_euclidean(similarity_matrix) distance_matrix = 1 - similarity_matrix return NMDS(distance_matrix).getPoints()
def setUp(self): """creates inputs""" self.mtx = array( [[0, 3, 4, 8], [3, 0, 1, 27], [4, 1, 0, 3.5], [8, 27, 3.5, 0]], 'd') self.nm = NMDS(self.mtx, verbosity=0)
def run(self, tempDistanceMetric=None, iDims=2, strDistanceMatrixFile=None, istrmTree=None, istrmEnvr=None): """ Runs analysis on loaded data. :param tempDistanceMetric: The name of the distance metric to use when performing PCoA. None indicates a distance matrix was already given when loading and will be used. Supports "braycurtis","canberra","chebyshev","cityblock","correlation", "cosine","euclidean","hamming","sqeuclidean",unifrac_unweighted","unifrac_weighted" :type: String Distance matrix name :param iDims: How many dimension to plot the PCoA graphs. (This can be minimally 2; all combinations of dimensions are plotted). iDims start with 1 (not index-based). :type: Integer Positive integer 2 or greater. :param strDistanceMatrixFile: If the underlying distance matrix should be output, this is the file to output to. :type: String Output file for distances of None for indicating it shoudl not be done. :param istrmTree: One of two files needed for unifrac calculations, this is the phylogeny of the features. :type: String Path to file :param istrmEnvr: One of two files needed for unifrac calculations, this is the environment file for the features. :type: String Path to file :return boolean: Indicator of success (True) """ if iDims > 1: self._iDimensions = iDims #If distance metric is none, check to see if the matrix is a distance matrix #If so, run NMDS on the distance matrix #Otherwise return a false and do not run if(tempDistanceMetric==None): if(ValidateData.funcIsTrue(self.isRawData)): print("PCoA:run::Error, no distance metric was specified but the previous load was not of a distance matrix.") return False elif(ValidateData.funcIsFalse(self.isRawData)): self.pcoa = NMDS(dataMatrix, verbosity=0) return True #Make sure the distance metric was a valid string type if(not ValidateData.funcIsValidString(tempDistanceMetric)): print("PCoA:run::Error, distance metric was not a valid string type.") return False #Supported distances distanceMatrix = None if(tempDistanceMetric==self.c_SPEARMAN): distanceMatrix = Metric().funcGetDissimilarity(ldSampleTaxaAbundancies=self.dataMatrix, funcDistanceFunction=lambda u,v: spearmanr(u,v)[0]) if(tempDistanceMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]): distanceMatrix,lsLabels = Metric().funcGetBetaMetric(sMetric=tempDistanceMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr) self.lsIDs = lsLabels else: distanceMatrix = Metric().funcGetBetaMetric(npadAbundancies=self.dataMatrix, sMetric=tempDistanceMetric) if(ValidateData.funcIsFalse(distanceMatrix)): print "PCoA:run::Error, when generating distance matrix." return False # Make squareform distanceMatrix = squareform(distanceMatrix) # Writes distance measures if needed. if strDistanceMatrixFile: csvrDistance = csv.writer(open(strDistanceMatrixFile, 'w')) if self.lsIDs: csvrDistance.writerow(["ID"]+self.lsIDs) for x in xrange(distanceMatrix.shape[0]): strId = [self.lsIDs[x]] if self.lsIDs else [] csvrDistance.writerow(strId+distanceMatrix[x].tolist()) self.pcoa = NMDS(distanceMatrix, dimension=max(self._iDimensions,2), verbosity=0) self.strRecentMetric = tempDistanceMetric return True
def pcoa_coords(dist_arr): arr = NMDS(dist_arr, verbosity=0).getPoints() return map(list, arr)