示例#1
0
def loadSingleTensor(inFilePattern):
	X = sptensor.loadTensor(inFilePattern.format("data"))
	tensorInfo = shelve.open(inFilePattern.format("info"), "r")
	axisDict = tensorInfo[AXIS]
	classDict = tensorInfo[CLASS]
	tensorInfo.close()
	return X, axisDict, classDict
示例#2
0
def decomposeCountTensor(filename, R, outerIters=20, innerIters=10, convergeTol=1e-2, zeroTol=1e-4):
    """
    Given a file, load the tensor data and then 
    From a file, load the tensor data and 
    then decompose using CP_APR with specified rank
    
    Parameters:
    filename - the file that stores the sparse tensor representation using numpy
    R - the rank of the tensor
    outerIters - the maximum number of outer iterations
    innerIters - the maximum number of inner iterations
    convergeTol - the convergence tolerance
    zeroTol - the amount to zero out the factors
    
    Output:
    
    """
    X = sptensor.loadTensor(filename)
    Y, iterStats, modelStats = CP_APR.cp_apr(X, R, tol=convergeTol, maxiters=outerIters, maxinner=innerIters)
    # normalize the factors using the 1 norm and then sort in descending order
    Y.normalize_sort(1)
    Y = zeroSmallFactors(Y, zeroThr=zeroTol)
    return Y, iterStats, modelStats
## experimental setup
exptID = args.expt
inFile = args.inputFile
R = args.rank
seed = args.seed
outerIters = args.iterations
innerIters = 10
tol = 1e-2
zeroThr = 1e-10

noiseParam = 2
noisePercent = [0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5]

# input file and output file
inputFile = inFile.format("data")
X = sptensor.loadTensor(inputFile)

def factorTensor(X):
    # set the seed for the same initialization
    np.random.seed(seed)
    Y, iterStats, mstats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters)
    Y.normalize_sort(1)
    Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr)
    return Y

print "Starting Tensor Factorization with ID:{0}".format(exptID)
# compute the base comparison
baseTF = factorTensor(X)
totNonzero = len(X.vals)

outfile = open("results/perturb-{0}.json".format(exptID), 'w') 
示例#4
0
                    help="number of bootstrap samples",
                    default=10)
args = parser.parse_args()

inputFile = args.inputFile
nSample = args.sample
exptID = args.expt + nSample
totSamples = args.bootstrap
testSize = args.testSize
seed = 10
innerIter = 10
outerIter = args.iter
R = args.rank
zeroThr = 1e-4

X = sptensor.loadTensor(inputFile.format("data"))
yaxis = decompTools.loadAxisInfo(inputFile.format("info"))
tensorInfo = shelve.open(inputFile.format("info"), "r")
Y = np.array(tensorInfo["class"], dtype='int')
tensorInfo.close()

diagMed = [[a, b] for a, b in itertools.product(yaxis[1], yaxis[2])]

predFile = "results/pred-metric-{0}-{1}.csv".format(exptID, nSample)

ttss = StratifiedShuffleSplit(Y,
                              n_iter=totSamples,
                              test_size=testSize,
                              random_state=seed)
print "Starting Tensor Prediction with ID:{0}".format(exptID)
n = 0
parser.add_argument("rank", type=int, help="rank of the decomposition")
parser.add_argument("iter", type=int, help="the number of iterations")
parser.add_argument("-t", "--testSize", type=float, help="test size", default=0.5)
args = parser.parse_args()

rank = args.rank
iter = args.iter
inputFile = args.inputFile
exptID = args.exptID
patientSet = args.patientSet
outsql = "results/pred-model-{0}-{1}.sql".format(exptID, rank)

print "Using Rank {0} and iterations {1} and test size {2}".format(rank, iter, args.testSize)

## Load information to run the tests
X = sptensor.loadTensor(inputFile.format("data"))
tensorInfo = shelve.open(inputFile.format("info"), "r")
Y = tensorInfo["class"]
XAxis = tensorInfo["axis"]
tensorInfo.close()
Y = np.array(Y, dtype=int)
pm = predictionModel.predictionModel(X, XAxis, Y, rank, testSize=args.testSize, outerIter=iter)
output = pm.evaluatePrediction()
output = np.column_stack((np.repeat(exptID, output.shape[0]), output))
outputFile = "results/pred-model-{0}-{1}.csv".format(exptID, rank)
np.savetxt(outputFile, output, delimiter=",")

sqlOut = file(outsql, "w")
sqlOut.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel, insert into joyceho.predictive_results;\n".format(outputFile))
sqlOut.write("insert into joyceho.predictive_models values({0}, {1}, \'{2}\',{3}, {4});\n".format(exptID, rank, patientSet, iter, 10))
sqlOut.close()
## experimental setup
exptID = args.expt
inFile = args.inputFile
R = args.rank
seed = args.seed
outerIters = args.iterations
innerIters = 10
tol = 1e-2
zeroThr = 1e-10

noiseParam = 2
noisePercent = [0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5]

# input file and output file
inputFile = inFile.format("data")
X = sptensor.loadTensor(inputFile)


def factorTensor(X):
    # set the seed for the same initialization
    np.random.seed(seed)
    Y, iterStats, mstats = CP_APR.cp_apr(X,
                                         R,
                                         tol=tol,
                                         maxiters=outerIters,
                                         maxinner=innerIters)
    Y.normalize_sort(1)
    Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr)
    return Y

示例#7
0
import CP_APR
import sptensor

parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help="input file to parse")
parser.add_argument("outputFile", help="output file for profile information")

parser.add_argument("-r",
                    "--rank",
                    type=int,
                    help="rank of factorization",
                    default=100)
parser.add_argument("-i",
                    "--iters",
                    type=int,
                    help="Number of outer interations",
                    default=100)
args = parser.parse_args()

########## Profile tensor factorization ###############
X = sptensor.loadTensor(args.inputFile)
## Profile
outputFile = args.outputFile
cProfile.run(
    "CP_APR.cp_apr(X,R={0},tol=1e-2, maxiters={1}, maxinner=10)".format(
        args.rank, args.iters),
    filename=outputFile)

p = pstats.Stats(outputFile)
p.sort_stats('time').print_stats()
示例#8
0
R = 50
iters=70
samples=10

pcaModel = RandomizedPCA(n_components=R)
stats = np.zeros((1, 6))

parser = argparse.ArgumentParser()
parser.add_argument("-i", "--infile", help="input file", default='data/hf-tensor-level1-data.dat')
parser.add_argument("-e", "--exptID", help="experiment", default=0)
args = parser.parse_args()

# Load the original data
filename = args.infile
X = sptensor.loadTensor(filename)
pn = args.exptID

xprime = X
flatX = sptenmat.sptenmat(xprime, [0]).tocsrmat() # matricize along the first mode
stats = np.zeros((1,6))

## NMF Timing
for k in range(samples):
    startTime = time.time()
    nmfModel = nimfa.mf(flatX, method="nmf", max_iter=iters, rank=R)
    nmfResult = nimfa.mf_run(nmfModel)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "NMF", elapsed])))
    
## PCA Timing
    -----------------------
    PF : the factor matrix where rows is patients and the column are factor values
    axis : the axis label of patients PIDs
    
    """
    factors = pf.shape[1]  # the number of columns
    rows = pf.shape[0]
    idx = np.flatnonzero(pf[:, 0])
    dbOut = np.column_stack((axis[idx], np.repeat(0, len(idx)), pf[idx, 0]))
    for col in range(1, factors):
        idx = np.flatnonzero(pf[:, col])
        dbOut = np.vstack((dbOut, np.column_stack((axis[idx], np.repeat(col, len(idx)), pf[idx, col]))))
    return dbOut


refX = sptensor.loadTensor(inputFile.format(0, "data"))
refAxis = decompTools.loadAxisInfo(inputFile.format(0, "info"))
## Find the factors for the first one
klp, M, mstats = findFactors(refX, R=rank, outerIter=outerIter, innerIter=10)

## Store off the factors to be loaded into a database
M.writeRawFile(MrawFile)
Mout = decompTools.getDBOutput(M, refAxis)
Mout = np.column_stack((np.repeat(exptID, Mout.shape[0]), Mout))
np.savetxt(Moutfile, Yout, fmt="%s", delimiter="|")

sqlOut = file(Ysqlfile, "w")
## write the factors and the models into the database
sqlOut.write(
    "load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_factors;\n".format(
        Youtfile
示例#10
0
import argparse
import cProfile
import pstats

import CP_APR
import sptensor

parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help="input file to parse")
parser.add_argument("outputFile", help="output file for profile information")

parser.add_argument("-r", "--rank", type=int, help="rank of factorization", default=100)
parser.add_argument("-i", "--iters", type=int, help="Number of outer interations", default=100)
args = parser.parse_args()

########## Profile tensor factorization ###############
X = sptensor.loadTensor(args.inputFile)
## Profile
outputFile = args.outputFile
cProfile.run("CP_APR.cp_apr(X,R={0},tol=1e-2, maxiters={1}, maxinner=10)".format(args.rank, args.iters), filename=outputFile)

p = pstats.Stats(outputFile)
p.sort_stats('time').print_stats()
示例#11
0
    
    """
    factors = pf.shape[1]  # the number of columns
    rows = pf.shape[0]
    idx = np.flatnonzero(pf[:, 0])
    dbOut = np.column_stack((axis[idx], np.repeat(0, len(idx)), pf[idx, 0]))
    for col in range(1, factors):
        idx = np.flatnonzero(pf[:, col])
        dbOut = np.vstack(
            (dbOut,
             np.column_stack((axis[idx], np.repeat(col, len(idx)), pf[idx,
                                                                      col]))))
    return dbOut


refX = sptensor.loadTensor(inputFile.format(0, "data"))
refAxis = decompTools.loadAxisInfo(inputFile.format(0, "info"))
## Find the factors for the first one
klp, M, mstats = findFactors(refX, R=rank, outerIter=outerIter, innerIter=10)

## Store off the factors to be loaded into a database
M.writeRawFile(MrawFile)
Mout = decompTools.getDBOutput(M, refAxis)
Mout = np.column_stack((np.repeat(exptID, Mout.shape[0]), Mout))
np.savetxt(Moutfile, Yout, fmt="%s", delimiter="|")

sqlOut = file(Ysqlfile, "w")
## write the factors and the models into the database
sqlOut.write(
    "load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_factors;\n"
    .format(Youtfile))
示例#12
0
parser.add_argument("-r", "--rank", type=int, help="rank of factorization", default=40)
parser.add_argument("-s", "--seed", type=int, help="random seed", default=0)
parser.add_argument("-i", "--iterations", type=int, help="Number of outer interations", default=70)
args = parser.parse_args()

R = args.rank
seed = args.seed
iters = args.iterations
filename = args.inputFile
exptID = args.expt

innerIter = 10
patThresh = 1e-50
modeThr = 1e-2

X = sptensor.loadTensor(filename.format("data"))
yaxis = decompTools.loadAxisInfo(filename.format("info"))

## calculate diagnosis-medication combination
diagMed = [[a, b] for a, b in itertools.product(yaxis[1], yaxis[2])] 

def getDBEntry(featureName, m):
    output = np.zeros((1, 4))
    for r in range(R):
        # get the nonzero indices
        idx = np.flatnonzero(m[:, r])
        tmp = np.column_stack((np.array(diagMed)[idx], np.repeat(r, len(idx)), m[idx, r]))
        output = np.vstack((output, tmp))
    output = np.delete(output, (0), axis=0)
    output = np.column_stack((np.repeat(exptID, output.shape[0]), np.repeat(featureName, output.shape[0]), output))
    return output