示例#1
0
    files.sort()
    for i in xrange(len(files)):
        filename = files[i]
        data = np.loadtxt(filename)
        pwo = removeoutliers(data)
        postfix = filename[-4:]
        fileout = filename[:-4] + "_wo_outliers" + postfix
        np.savetxt(os.path.basename(fileout), pwo)
        print "wo outliers saved: %s" % os.path.basename(fileout)

        # calculate avg and std. for the last file.
        # use that those informatoin to convert the z-score back to original score
        if i == len(files) - 1:
            writeoutStdAndAvg(pwo)

        zscoredata = Utils.zscore(pwo)

        fileout = filename[:-4] + "_zscore_wo_outliers" + postfix
        np.savetxt(os.path.basename(fileout), zscoredata)
        print "wo outliers saved: %s" % os.path.basename(fileout)

        if i == 0:  # store the points from first file, for centroid generation
            print zscoredata
            firstpoints = zscoredata

        k = 3  # 3 features
        initialc = np.array(random.sample(zscoredata, k))
        fileoutcentroids = filename[:-4] + "_zscore_wo_outliers.centroids" + postfix
        np.savetxt(os.path.basename(fileoutcentroids), initialc)
        print "wo outliers saved: %s" % os.path.basename(fileout)
    #
示例#2
0
def removeoutliers(points):
    print "total number of points: %s" % len(points)
    tmpzscoredata = Utils.zscore(points)  # To find outliers
    po = points[~(np.abs(tmpzscoredata) > 3).any(1)]  # filter out outlier rows
    print "removed : %s" % (len(points) - len(po))
    return po
            for k,v in d.iteritems():
    #            filebyvalues.write('%s %s\n' % (str(k), ' '.join(map(str, v))))
                filebyvalues.write('%s\n' % ' '.join(map(str, v)))
    #            i += 1
    #            if i == 10:
    #                break
        
    
        print 'done.'
    print 'Alle done!'
    
    

    #Normalize files and pick random distinct initial centroids
    files = glob.glob(filespath + '.dat')
    for filename in files:
        data = np.loadtxt(filename)
        zscoredata = Utils.zscore(data)
        postfix = filename[-4:]     
        
        fileout = filename[:-4] + '_zscore' + postfix
        np.savetxt(fileout, zscoredata)
        
        k = 3 #3 features
        centroids = Utils.getInitialMeans(zscoredata, k)
        fileoutcentroids = filename[:-4] + '_zscore.centroids' + postfix
        np.savetxt(fileoutcentroids, centroids)
        print 'zscore saved: %s' % fileout