示例#1
0
def eigenbase (h1, d1, E, v0, pow0, pow1, rest):
  # compute all eigenvalues and eigenvectors
  pt0 = 'out/impacting-bar/MK_%g_%g_%g_%g_%d_%d'%(h1, d1, E, v0, pow0, pow1)
  sl0 = SOLFEC ('DYNAMIC', 1E-3, pt0)
  bl0 = BULK_MATERIAL (sl0, model = 'KIRCHHOFF', young = E, poisson = PoissonRatio, density = MassDensity)
  bod = BODY (sl0, 'FINITE_ELEMENT', COPY (mesh), bl0)
  eval = [] # selected eigenvalue list
  evec = [] # selected eigenvector list (BODY command takes a tuple (eval, evec) argument for the RO formulation)
  vsel = (0,1,2,3,4,5,13,18,25,33,38)
  if 0:
    BODY_MM_EXPORT (bod, pt0+'/M.mtx', pt0+'/K.mtx')
    M = mmread (pt0+'/M.mtx').todense()
    K = mmread (pt0+'/K.mtx').todense()
    for j in range (0, K.shape[1]):
      for i in range (j+1, K.shape[0]):
	K [j, i] = K [i, j] # above diagonal = below diagonal
    x, y = eigh (K, M) # this produces y.T M y = 1 and y.T K y = x */
    for j in vsel:
      eval.append (x[j].real)
      for z in y[:,j]:
	evec.append (z.real)
  else:
    data0 = MODAL_ANALYSIS (bod, 45, pt0 + '/modal.data', verbose = 'ON', abstol = 1E-14)
    ndofs = mesh.nnod * 3
    for j in vsel:
      eval.append (data0[0][j])
      for k in range (j*ndofs,(j+1)*ndofs):
	evec.append (data0[1][k])
  return (eval, evec)
示例#2
0
def get_debug(data):
    full_train = sio.mmread('data/%s_train.mtx' % data).tocsr()
    (nu, nm) = full_train.shape

    print 'sampling'
    debug_mids = sample(range(nm), nm / 5)
    debug_uids = sample(range(nu), nu / 5)

    debug = full_train[debug_uids][:, debug_mids].tocoo()
    nr = debug.nnz
    train_ids, _, test_ids = sample_split(nr)

    # build matrix from given indices
    print 'writing debug_train'
    debug_train = coo_matrix(
        (debug.data[train_ids], (debug.row[train_ids], debug.col[train_ids])), debug.shape)
    sio.mmwrite('data/%s_debug_train.mtx' % data, debug_train)
    print 'writing debug_test'
    debug_test = coo_matrix(
        (debug.data[test_ids], (debug.row[test_ids], debug.col[test_ids])), debug.shape)
    sio.mmwrite('data/%s_debug_test.mtx' % data, debug_test)

    # build movie mtx from debug_mids
    print 'movie debug'
    movies = sio.mmread('data/movies.mtx').tocsr()
    movies_debug = movies[debug_mids]
    sio.mmwrite('data/movies_%s_debug.mtx' % data, movies_debug)

    return debug, debug_train, debug_test, movies_debug
def read_input_tensor(headers_filename, data_file_names, tensor_slices, adjustDim=False, offerString="Attr: OFFER",
                      wantString="Attr: WANT"):

    #load the header file
    _log.info("Read header input file: " + headers_filename)
    input = codecs.open(headers_filename,'r',encoding='utf8')
    headers = input.read().splitlines()
    input.close()

    # get the largest dimension of all slices
    if adjustDim:
        maxDim = 0
        for data_file in data_file_names:
            matrix = mmread(data_file)
            if maxDim < matrix.shape[0]:
                maxDim = matrix.shape[0]
            if maxDim < matrix.shape[1]:
                maxDim = matrix.shape[1]

    # load the data files
    slice = 0
    tensor = SparseTensor(headers, offerString, wantString)
    for data_file in data_file_names:
        if adjustDim:
            adjusted = adjust_mm_dimension(data_file, maxDim)
            if adjusted:
                _log.warn("Adujst dimension to (%d,%d) of matrix file: %s" % (maxDim, maxDim, data_file))
        _log.info("Read as slice %d the data input file: %s" % (slice, data_file))
        matrix = mmread(data_file)
        tensor.addSliceMatrix(matrix, tensor_slices[slice])
        slice = slice + 1
    return tensor
def applySVMWithPCA():
    '''
    Same as the previous function, just change the file names..
    '''
    data = io.mmread(ROOTDIR+"TRAINDATA.mtx")
    label = np.load(ROOTDIR+"label_train.npy")
    testdata = io.mmread(ROOTDIR+"TESTDATA.mtx")
    testLabel = np.load(ROOTDIR + "label_test.npy")
    
    linear_svm = LinearSVC(C=1.0, class_weight=None, loss='hinge', dual=True, fit_intercept=True,
    intercept_scaling=1, multi_class='ovr', penalty='l2',
    random_state=None, tol=0.0001, verbose=1, max_iter=2000)
     
    data = scale(data, with_mean=False)
     
    linear_svm.fit(data, label)
    joblib.dump(linear_svm, ROOTDIR+'originalTrain_hinge_2000.pkl') 
#     linear_svm = joblib.load(ROOTDIR+'originalTrain_hinge_2000.pkl')
    
    print 'Trainning Done!'
    scr = linear_svm.score(data, label)
    print 'accuracy on the training set is:' + str(scr)

    predLabel = linear_svm.predict(data)
    calcualteRMSE(label, predLabel)
    
    scr = linear_svm.score(testdata, testLabel)
    print 'accuracy on the testing set is:' + str(scr)

    predLabel = linear_svm.predict(testdata)
    calcualteRMSE(testLabel, predLabel)      
示例#5
0
def load(ppt, samples, l_tau, l_lc, l_regtype, b_tau, b_lc, b_regtype):

    ln = np.loadtxt('lin-models/bestlinwtln'+l_regtype+samples+'tau'+l_tau+'lc'+l_lc+ppt+'.txt')
    lv = np.loadtxt('lin-models/bestlinwtlv'+l_regtype+samples+'tau'+l_tau+'lc'+l_lc+ppt+'.txt')
    bv = np.loadtxt('bil-models/bestbilwtbn'+b_regtype+samples+'tau'+b_tau+'eta'+b_lc+ppt+'.txt')
    bn = np.loadtxt('bil-models/bestbilwtbv'+b_regtype+samples+'tau'+b_tau+'eta'+b_lc+ppt+'.txt')

    traindata = [(d.strip().split()[1:5], d.strip().split()[5]) for d in open('clean/cleantrain.txt')]
    devdata = [(d.strip().split()[1:5], d.strip().split()[5]) for d in open('clean/cleandev.txt')]
    testdata = [(d.strip().split()[1:5], d.strip().split()[5]) for d in open('clean/cleantest.txt')]

    traindata = traindata[:int(samples)]

    phih = sio.mmread('clean/trh1k.mtx')
    phim = sio.mmread('clean/trm1k.mtx')
    phidh = sio.mmread('clean/devh1k.mtx')
    phidm = sio.mmread('clean/devm1k.mtx')
    maph = np.loadtxt('clean/forhead.txt', dtype=str)
    mapm = np.loadtxt('clean/formod.txt', dtype=str)
    mapdh = np.loadtxt('clean/devheads.txt', dtype=str)
    mapdm = np.loadtxt('clean/devmods.txt', dtype=str)


    trainingdat = bilme.BilinearMaxentFeatEncoding.train(traindata, phih, phim, maph, mapm, pptype=ppt)
    traintoks = trainingdat.train_toks()
    traintokens = [(co.word_features(t),l) for t,l in trainingdat.train_toks()]
    devencode = bilme.BilinearMaxentFeatEncoding.train(devdata, phidh, phidm, mapdh, mapdm, pptype=ppt)
    devtoks = devencode.train_toks()
    devtokens = [(co.word_features(t),l) for t,l in devencode.train_toks()]

    data = [devtoks, devtokens]

    trlinencoding = maxent.BinaryMaxentFeatureEncoding.train(traintokens)

    return trlinencoding, devencode, [ln, lv], [bn, bv], data
示例#6
0
def generate_valid_repos_and_times(dataset_dir):
    """Function called to generate VALID_REPOS_AND_TIMES in `dataset_dir`
    """
    valid_repos_and_times = []

    repos_users_times_fn = join(dataset_dir, TIMED_INTERESTS_FN)
    u_r_t = mmread(repos_users_times_fn).transpose().tocsr()

    validation_repos_fn = join(dataset_dir, VALIDATING_FN)
    validation_matrix = mmread(validation_repos_fn).tocsr()

    v_u_r_t = u_r_t.multiply(validation_matrix).tolil()

    for uidx in xrange(v_u_r_t.shape[0]):
        v_r_t_coo = v_u_r_t.getrowview(uidx).tocoo()
        sorted_index = np.argsort(v_r_t_coo.data)

        times = v_r_t_coo.data[sorted_index]
        repos = v_r_t_coo.col[sorted_index]
        valid_repos_and_times.append(np.vstack((times,repos)))

    pt_fn = join(dataset_dir, VALID_REPOS_AND_TIMES)
    with open(pt_fn, "wb") as pf:
        cPickle.dump(valid_repos_and_times, pf, cPickle.HIGHEST_PROTOCOL)
    return pt_fn
示例#7
0
def ro0_modal_base (use_scipy=False, verbose='OFF'):
  sol = ro0_model (1E-3, 0.0)
  bod = sol.bodies[0]
  eval = [] # selected eigenvalue list
  evec = [] # selected eigenvector list
  vsel = (0,1,2,3,4,5,13,18,25,33,38)
  if use_scipy:
    BODY_MM_EXPORT (bod, 'out/reduced-order0/M.mtx',
                         'out/reduced-order0/K.mtx')
    M = mmread ('out/reduced-order0/M.mtx').todense()
    K = mmread ('out/reduced-order0/K.mtx').todense()
    for j in range (0, K.shape[1]):
      for i in range (j+1, K.shape[0]):
	K [j, i] = K [i, j] # above diagonal = below diagonal
    x, y = eigh (K, M) # this produces y.T M y = 1 and y.T K y = x
    for j in vsel:
      eval.append (x[j].real)
      for z in y[:,j]:
	evec.append (z.real)
  else:
    data0 = MODAL_ANALYSIS (bod, 45, 'out/reduced-order0/modal',
                            1E-13, 1000, verbose)
    dofs = len(bod.velo)
    for j in vsel:
      eval.append (data0[0][j])
      for k in range (j*dofs,(j+1)*dofs):
	evec.append (data0[1][k])

  return (eval, evec)
def goMusic(K=80,steps=200,resume=False,normalize=True,R=None,V=None,mean_center=False,beta=0.0,betaO=0.0,normalizer=normalizer,doBias=True,every=1,doFactors=True,biasSteps=10):
    #R = mmread("reviews_Musical_Instruments.mtx").tocsr()
    if R == None:
        R = mmread("training.mtx").tocsr().toarray()
    else:
        R = R.toarray()

    if V == None:
        V = mmread("validation.mtx").todok()
    
    mu = np.finfo(float).eps



    if normalize:
        R = normalizer(R,1,0)
        print "normalizing, min/max", R.min(),R.max()

    
    #R = R[0:424,:]
    if not resume:
        P = normalizer(np.random.rand(R.shape[0],K),.1,0)
        Q = normalizer(np.asfortranarray(np.random.rand(K,R.shape[1])),.1,0)

        #bP,bQ = makeAvgBaseline(R)
        #print bP,bQ
        bP = None # np.zeros(R.shape[0])#None
        bQ = None #np.zeros(R.shape[1])#None#(R > 0).mean(axis=0)
        #bP,bQ = makeAvgBaseline(R)
    else:
        P = np.loadtxt("P.txt")
        Q = np.loadtxt("Q.txt")
        bP = np.loadtxt("bP.txt")
        bQ = np.loadtxt("bQ.txt")

    print R.shape,P.shape,Q.shape
    print "starting doFactO"
    #chunkFactO(R,P,Q,K,steps=steps,chunks=1,discard=0)#chunks=800,discard=0)

    #R,P,Q,bP,bQ = factO(R,P,Q,K,steps=steps,discard=0,bP=bP,bQ=bQ,beta=beta,betaO=betaO)
    rmses,maes,errs = [],[],[]

    def validation(P,Q,bP,bQ):
        rmse,mae,err = validate(T=R,V=V,P=P,Q=Q,bP=bP,bQ=bQ)
        rmses.append(rmse)
        maes.append(mae)
        errs.append(err)

    R,P,Q,bP,bQ,t_rmses = sigFactO(R,P,Q,K,bP=bP,bQ=bQ,steps=steps,discard=0.0,beta=beta,betaO=betaO,mean_center=mean_center,doBias=doBias,validate=validation,every=every,doFactors=doFactors,biasSteps=biasSteps)    

    if normalize:
        R = renormalizer(R,1,0,5,0)

    dumparrays(R,P,Q,bP,bQ)


    return t_rmses,rmses,maes,errs
示例#9
0
def main():

    import os
    import logging
    import subprocess
    from optparse import OptionParser
    import numpy as np
    from scipy.io import mmread

    from mrec import save_recommender
    from mrec.mf.recommender import MatrixFactorizationRecommender
    from filename_conventions import get_modelfile

    logging.basicConfig(level=logging.INFO,format='[%(asctime)s] %(levelname)s: %(message)s')

    parser = OptionParser()
    parser.add_option('--factor_format',dest='factor_format',help='format of factor files tsv | mm (matrixmarket) | npy (numpy array)')
    parser.add_option('--user_factors',dest='user_factors',help='user factors filepath')
    parser.add_option('--item_factors',dest='item_factors',help='item factors filepath')
    parser.add_option('--train',dest='train',help='filepath to training data, just used to apply naming convention to output model saved here')
    parser.add_option('--outdir',dest='outdir',help='directory for output')
    parser.add_option('--description',dest='description',help='optional description of how factors were computed, will be saved with model so it can be output with evaluation results')

    (opts,args) = parser.parse_args()
    if not opts.factor_format or not opts.user_factors or not opts.item_factors \
            or not opts.outdir:
        parser.print_help()
        raise SystemExit

    model = MatrixFactorizationRecommender()

    logging.info('loading factors...')

    if opts.factor_format == 'npy':
        model.U = np.load(opts.user_factors)
        model.V = np.load(opts.item_factors)
    elif opts.factor_format == 'mm':
        model.U = mmread(opts.user_factors)
        model.V = mmread(opts.item_factors)
    elif opts.factor_format == 'tsv':
        model.U = np.loadtxt(opts.user_factors)
        model.V = np.loadtxt(opts.item_factors)
    else:
        raise ValueError('unknown factor format: {0}'.format(factor_format))

    if opts.description:
        model.description = opts.description

    logging.info('saving model...')

    logging.info('creating output directory {0}...'.format(opts.outdir))
    subprocess.check_call(['mkdir','-p',opts.outdir])

    modelfile = get_modelfile(opts.train,opts.outdir)
    save_recommender(model,modelfile)

    logging.info('done')
示例#10
0
def fit_lightfm_model():
	""" Fit the lightFM model 
	
	returns d_user_pred, list_user, list_coupon
	list_coupon = list of test coupons 
	list_user = list of user ID 
	d_user_pred : key = user, value = predicted ranking of coupons in list_coupon
	"""

	#Load data
	Mui_train = spi.mmread("../Data/Data_translated/biclass_user_item_train_mtrx.mtx")
	uf        = spi.mmread("../Data/Data_translated/user_feat_mtrx.mtx")
	itrf      = spi.mmread("../Data/Data_translated/train_item_feat_mtrx.mtx")
	itef      = spi.mmread("../Data/Data_translated/test_item_feat_mtrx.mtx")
	
	#Print shapes as a check
	print "user_features shape: %s,\nitem train features shape: %s,\nitem test features shape: %s"   % (uf.shape, itrf.shape, itef.shape)
	
	#Load test coupon  and user lists
	cplte       = pd.read_csv("../Data/Data_translated/coupon_list_test_translated.csv")
	ulist       = pd.read_csv("../Data/Data_translated/user_list_translated.csv")
	list_coupon = cplte["COUPON_ID_hash"].values
	list_user   = ulist["USER_ID_hash"].values
	
	#Build model
	no_comp, lr, ep = 10, 0.01, 5
	model = LightFM(no_components=no_comp, learning_rate=lr, loss='warp')
	model.fit_partial(Mui_train, user_features = uf, item_features = itrf, epochs = ep, num_threads = 4, verbose = True)

	test               = sps.csr_matrix((len(list_user), len(list_coupon)), dtype = np.int32)
	no_users, no_items = test.shape
	pid_array          = np.arange(no_items, dtype=np.int32)

	#Create and initialise dict to store predictions
	d_user_pred = {}
	for user in list_user :
		d_user_pred[user] = []
	
	# Loop over users and compute predictions
	for user_id, row in enumerate(test):
		sys.stdout.write("\rProcessing user " + str(user_id)+"/ "+str(len(list_user)))
		sys.stdout.flush()
		uid_array         = np.empty(no_items, dtype=np.int32)
		uid_array.fill(user_id)
		predictions       = model.predict(uid_array, pid_array,user_features = uf, item_features = itef, num_threads=4)
		user              = str(list_user[user_id])
		# apply MinMaxScaler for blending later on
		MMS               = MinMaxScaler()
		pred              = MMS.fit_transform(np.ravel(predictions))
		d_user_pred[user] = pred

	# Pickle the predictions for future_use
	d_pred = {"list_coupon" : list_coupon.tolist(), "d_user_pred" : d_user_pred}
	with open("../Data/Data_translated/d_pred_lightfm.pickle", "w") as f:
		pickle.dump(d_pred, f, protocol = pickle.HIGHEST_PROTOCOL)

	return d_user_pred, list_user, list_coupon
示例#11
0
def main():
    train_tfidf = sio.mmread(tfidf_train_file)
    test_tfidf = sio.mmread(tfidf_test_file)

    svd = TruncatedSVD(400)
    svd_X_train = svd.fit_transform(train_tfidf)
    svd_X_test = svd.transform(test_tfidf)

    sio.mmwrite('train_tfidf_2013_svd_400_mtx', svd_X_train)
    sio.mmwrite('test_tfidf_svd_400_mtx', svd_X_test)
示例#12
0
def main():
    FORMAT = '%(asctime)s %(levelname)s %(message)s'
    logging.basicConfig(format=FORMAT)
    logging.getLogger().setLevel(logging.INFO)
    args = parse_args()
    lang_map = {i: fn for i, fn in enumerate(sorted(listdir(args.lang_map)))}
    if args.train.endswith('.mtx'):
        mtx = mmread(args.train).todense()
        t_mtx = mmread(args.test).todense()
    else:
        with open(args.train) as stream:
            mtx = np.loadtxt(stream, np.float64)
        with open(args.test) as stream:
            t_mtx = np.loadtxt(stream, np.float64)
    labels = np.ravel(mtx[:, 0])
    test_labels = t_mtx[:, 0]
    test_mtx = t_mtx[:, 1:]
    if args.scale:
        train = scale(mtx[:, 1:], with_mean=False)
    else:
        train = mtx[:, 1:]
    kwargs = {}
    for a in args.params:
        k, v = a.split('=')
        try:
            v = int(v)
        except:
            pass
        kwargs[k] = v
    r = Representation(args.encoder, args.classifier, **kwargs)
    r.encode(train)
    logging.info('Matrix encoded')
    r.train_classifier(labels)
    logging.info('Model trained')
    acc = 0
    N = 0
    for vec_ in test_mtx:
        vec = np.ravel(vec_)
        cl = r.classify_vector(vec, with_probs=args.with_probs)
        try:
            lab = test_labels[N, 0]
        except IndexError:
            lab = test_labels[N]
        N += 1
        if args.with_probs:
            guess = max(enumerate(cl[0, :]), key=lambda x: x[1])[0]
            print('{0}\t{1}\t{2}'.format('\t'.join(map(str, cl[0, :])), lang_map[guess], lang_map[int(lab)]))
        else:
            try:
                guess = int(cl[0, 0])
            except IndexError:
                guess = int(cl + 0.5)
            print('{0}\t{1}'.format(lang_map[guess], lang_map[int(lab)]))
        if int(guess) == int(lab):
            acc += 1
示例#13
0
def create_tox21(sparsity_cutoff, validation_fold, dtype=np.float32, download_directory=_DATA_DIRECTORY):
    urlbase = "http://www.bioinf.jku.at/research/deeptox/"
    dst = os.path.join(download_directory, "raw")
    fn_x_tr_d = _download_file(urlbase, "tox21_dense_train.csv.gz", dst)
    fn_x_tr_s = _download_file(urlbase, "tox21_sparse_train.mtx.gz", dst)
    fn_y_tr = _download_file(urlbase, "tox21_labels_train.csv", dst)
    fn_x_te_d = _download_file(urlbase, "tox21_dense_test.csv.gz", dst)
    fn_x_te_s = _download_file(urlbase, "tox21_sparse_test.mtx.gz", dst)
    fn_y_te = _download_file(urlbase, "tox21_labels_test.csv", dst)
    cpd = _download_file(urlbase, "tox21_compoundData.csv", dst)

    y_tr = pd.read_csv(fn_y_tr, index_col=0)
    y_te = pd.read_csv(fn_y_te, index_col=0)
    x_tr_dense = pd.read_csv(fn_x_tr_d, index_col=0).values
    x_te_dense = pd.read_csv(fn_x_te_d, index_col=0).values
    x_tr_sparse = io.mmread(fn_x_tr_s).tocsc()
    x_te_sparse = io.mmread(fn_x_te_s).tocsc()

    # filter out very sparse features
    sparse_col_idx = ((x_tr_sparse > 0).mean(0) >= sparsity_cutoff).A.ravel()
    x_tr_sparse = x_tr_sparse[:, sparse_col_idx].A
    x_te_sparse = x_te_sparse[:, sparse_col_idx].A

    dense_col_idx = np.where(x_tr_dense.var(0) > 1e-6)[0]
    x_tr_dense = x_tr_dense[:, dense_col_idx]
    x_te_dense = x_te_dense[:, dense_col_idx]

    # The validation set consists of those samples with
    # cross validation fold #5
    info = pd.read_csv(cpd, index_col=0)
    f = info.CVfold[info.set != "test"].values
    idx_va = f == float(validation_fold)

    # normalize features
    from sklearn.preprocessing import StandardScaler

    s = StandardScaler()
    s.fit(x_tr_dense[~idx_va])
    x_tr_dense = s.transform(x_tr_dense)
    x_te_dense = s.transform(x_te_dense)

    x_tr_sparse = np.tanh(x_tr_sparse)
    x_te_sparse = np.tanh(x_te_sparse)

    x_tr = np.hstack([x_tr_dense, x_tr_sparse])
    x_te = np.hstack([x_te_dense, x_te_sparse])

    return (
        x_tr[~idx_va].astype(dtype, order="C"),
        y_tr[~idx_va].values.astype(dtype, order="C"),
        x_tr[idx_va].astype(dtype, order="C"),
        y_tr[idx_va].values.astype(dtype, order="C"),
        x_te.astype(dtype, order="C"),
        y_te.values.astype(dtype, order="C"),
    )
def validate(trunc = False,T = None,V = None,doRound=False,activation=sigmoid,P=None,Q=None,bP=None,bQ=None):
    if T == None:
        Rtraining = mmread('training.mtx').tocsr()
    else:
        Rtraining = T

    if V == None:
        R = mmread('validation.mtx').todok()
    else:
        R = V.todok()
    mean = (Rtraining.sum()) / (Rtraining > 0).sum()
    if not (P != None or Q != None or bP != None or bQ != None):
        P,Q,bP,bQ = np.loadtxt("P.txt"),np.loadtxt("Q.txt"),np.loadtxt("bP.txt"),np.loadtxt("bQ.txt")

    print R.shape,P.shape,Q.shape
    i = 0
    sum = 0
    sumAbs = 0
    lte1 = 0
    sumlte1 = 0
    errors = []
    for k,v in R.items():
        g = bP[k[0]] + bQ[k[1]] + np.dot(P[k[0],:],Q[:,k[1]]) 
        #if trunc:
        #    g = min(1,max(5,g))
        #for i in xrange(P.shape[1]):
        #    g += (P[k[0],i]) * (Q[i,k[1]])
        #    
        #    if trunc:
        #        g = max(1,min(g,5))
        g = activation(mean + g)
        g = renormalizefloat(g,1,0,5,0)

        
        if doRound:
            g = round(g)
        e = (v - g)**2
        sumAbs += math.sqrt((v - g)**2)
        errors.append(e)
        if e < 1.00001:
            lte1 += 1
            sumlte1 += e
        sum += e
        #if e > 5:
        #print i,v,g,e
        i+=1
    rmse = math.sqrt(sum/R.nnz)
    mae = sumAbs / R.nnz
    print "rmse",rmse
    print "mae",sumAbs / R.nnz
    print "lte1",lte1,len(R.items()), lte1/float(len(R.items()))
    print "lte1 rmse",math.sqrt((sumlte1 +1) / (lte1+1))
    print "validation mean",mean
    return rmse,mae,np.array(errors)
示例#15
0
def create_bars (h1, E, frict, damp, formulation):

  # compute all eigenvalues and eigenvectors
  if formulation == 'RO':
    pt0 = 'out/16-bars/MK_%g_%g_%g_%g'%(h1, E, frict, damp)
    sl0 = SOLFEC ('DYNAMIC', 1E-3, pt0)
    bl0 = BULK_MATERIAL (sl0, model = 'KIRCHHOFF', young = E, poisson = PoissonRatio, density = MassDensity)
    bod = BODY (sl0, 'FINITE_ELEMENT', COPY (mesh), bl0)
    eval = [] # selected eigenvalue list
    evec = [] # selected eigenvector list (BODY command takes a tuple (eval, evec) argument for the RO formulation)
    vsel = range (0, 32)

    if 0:
      BODY_MM_EXPORT (bod, pt0+'/M.mtx', pt0+'/K.mtx')
      M = mmread (pt0+'/M.mtx').todense()
      K = mmread (pt0+'/K.mtx').todense()
      for j in range (0, K.shape[1]):
	for i in range (j+1, K.shape[0]):
	  K [j, i] = K [i, j] # above diagonal = below diagonal
      x, y = eigh (K, M) # this produces y.T M y = 1 and y.T K y = x */
      for j in vsel:
	eval.append (x[j].real)
	for z in y[:,j]:
	  evec.append (z.real)
    else:
      data0 = MODAL_ANALYSIS (bod, 45, pt0 + '/modal.data', verbose = 'ON', abstol = 1E-14)
      ndofs = mesh.nnod * 3
      for j in vsel:
	eval.append (data0[0][j])
	for k in range (j*ndofs,(j+1)*ndofs):
	  evec.append (data0[1][k])
    data = (eval, evec)

  # 16 bars domain
  sl2 = SOLFEC ('DYNAMIC', h1, 'out/16-bars/%s_%g_%g_%g_%g'%(formulation, h1, E, frict, damp))
  SURFACE_MATERIAL (sl2, model = 'SIGNORINI_COULOMB', friction = frict, restitution = 0.0)
  bl2 = BULK_MATERIAL (sl2, model = 'KIRCHHOFF', young = E, poisson = PoissonRatio, density = MassDensity)
  GRAVITY (sl2, (0, 0, -9.8))
  for i in range (0, nw):
    for j in range (0, nw):
      shp = COPY (mesh)
      TRANSLATE (shp, ((1-nw)*0.05+0.1*i, (1-nw)*0.05+0.1*j, 0))
      if formulation == 'RO':
	bd2 = BODY (sl2, 'FINITE_ELEMENT', shp, bl2, form = formulation, modal = data)
	bd2.scheme = 'DEF_LIM'
	bd2.damping = damp
      elif formulation == 'BC':
	bd2 = BODY (sl2, 'FINITE_ELEMENT', shp, bl2, form = formulation)
	bd2.scheme = 'DEF_LIM'
	bd2.damping = damp
      else: bd2 = BODY (sl2, 'RIGID', shp, bl2)
  BODY (sl2, 'OBSTACLE', COPY (obsm), bl2)

  return sl2
示例#16
0
 def __init__(self,train_file,test_file):
     """
     Read datasets from the specified files.
     """
     train = mmread(train_file)
     test = mmread(test_file)
     
     train = train.tocsc() 
     test = test.tocsc() 
     
     self.trainXList = [train]
     self.testXList = [test]
示例#17
0
def load_or_create_matrices():
    try:
        csr_sparse_ing = spio.mmread("csr_sparse_ing.mtx")
    except IOError:
        csr_sparse_ing = create_csr_sparse_ing()

    csr_filtered_ing = []
    for i in np.arange(1, 11):
        try:
            csr_filtered_ing.append(spio.mmread("csr_filtered_ing" + str(i) + ".mtx"))
        except IOError:
            csr_filtered_ing.append(create_filtered_csr_ing(csr_sparse_ing, i))
    return csr_sparse_ing, csr_filtered_ing
示例#18
0
def main(X_fname, Y_fname, result_fname=None): 
    le = LabelEncoder()
    moves = pandas.read_csv(Y_fname, index_col=0)
    Y = moves.values.ravel()
    Y = le.fit_transform(Y)
    X = io.mmread(X_fname)
    print X.shape, Y.shape, len(le.classes_) 

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33)

    xg_train = xgboost.DMatrix( X_train, label=y_train)
    xg_test = xgboost.DMatrix(X_test, label=y_test)

    param = {}
    # use softmax multi-class classification
    param['objective'] = 'multi:softprob'
    param['eta'] = 0.002
    param['max_depth'] = 7
    param['nthread'] = 7
    param['num_class'] = len(le.classes_)
    param['eval_metric'] = 'merror'
    evals = [ (xg_train, 'train'), (xg_test, 'eval') ]

    # Train xgboost
    print "Training"
    t1 = time.time()
    bst = xgboost.train(param, xg_train, 500, evals, early_stopping_rounds=3)
    t2 = time.time()
    print t2-t1

    if result_fname is None:
        result_fname = str(datetime.now())

    bst.save_model("%s.bst"%result_fname)
示例#19
0
def main(argv):
    assert len(argv) == 2, "Usage: ./%s NAME.mm" % argv[0]
    mm_filename = argv[1]
    x = mmread(mm_filename)

    w, h = matplotlib.figure.figaspect(x)
    fig = plt.figure(figsize=(w,h))
    ax = fig.add_subplot(111)

    if SORT:
        print "Re-arranging rows."
        xd = x.todense()
        m, n = xd.shape
        for i in range(0,m):
            for j in range(i,m):
                if xd[j].tolist() > xd[i].tolist():
                    swap(xd, i, j)
        x = xd

    ax.spy(x, markersize=1)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)

    extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted())
    fig.savefig("%s.pdf" % mm_filename[:-3], bbox_inches=extent)
示例#20
0
def load_sparse_matrix(input_format,filepath):
    """
    Load a scipy.sparse.csr_matrix from an input file of the specified format.

    Parameters
    ----------
    input_format : str
        Specifies the file format:
        - tsv
        - csv
        - mm  (MatrixMarket)
        - npz (scipy.sparse.csr_matrix serialized with mrec.sparse.savez())
        - fsm (mrec.sparse.fast_sparse_matrix)
    filepath : str
        The file to load.
    """
    if input_format == 'tsv':
        return loadtxt(filepath)
    elif input_format == 'csv':
        return loadtxt(filepath,delimiter=',')
    elif input_format == 'mm':
        return mmread(filepath).tocsr()
    elif input_format == 'npz':
        return loadz(filepath).tocsr()
    elif input_format == 'fsm':
        return fast_sparse_matrix.load(filepath).X
    raise ValueError('unknown input format: {0}'.format(input_format))
示例#21
0
def get_sample_data(n_sess, full_brain=False, subj=1):
    """
    Download the data for the current session and subject

    Parameters
    ----------
    n_sess: int
        number of session, one of {0, 1, 2, 3, 4}
    subj: int 
        number of subject, one of {1, 2}
    """
    DIR = tempfile.mkdtemp()
    ds = np.DataSource(DIR)
    BASEDIR = 'http://fa.bianp.net/projects/hrf_estimation/data'
    BASEDIR_COMMON = BASEDIR + '/data_common/'
    if full_brain:
        BASEDIR += '/full_brain'
    BASEDIR_SUBJ = BASEDIR + '/data_subj%s/' % subj
    event_matrix = io.mmread(ds.open(
        BASEDIR_COMMON + 'event_matrix.mtx')).toarray()
    print('Downloading BOLD signal')
    voxels = np.load(ds.open(
        BASEDIR_SUBJ + 'voxels_%s.npy' % n_sess))
    # print('Downloading Scatting Stim')
    # scatt_stim = np.load(ds.open(
    #     BASEDIR_SUBJ + 'scatt_stim_%s.npy' % n_sess))

    em = sparse.coo_matrix(event_matrix)
    fir_matrix = utils.convolve_events(event_matrix, np.eye(HRF_LENGTH))
    events_train = sparse.block_diag([event_matrix] * 5).toarray()
    conditions_train = sparse.coo_matrix(events_train).col
    onsets_train = sparse.coo_matrix(events_train).row

    return voxels, conditions_train, onsets_train
示例#22
0
 def __init__(self, interest_metric, dataset_dir, store_atmost):
     self.interest_metric = interest_metric
     self.dataset_dir = dataset_dir
     self.store_atmost = store_atmost
     self.u_r_t = mmread(join(dataset_dir, TIMED_INTERESTS_FN)).transpose()
     self.prediction_times = cPickle.load(open(join(dataset_dir, PREDICTION_TIMES_FN),"rb"))
     self.NU, self.NR = self.u_r_t.shape
示例#23
0
def check_buy():
    begin_date = datetime.datetime(2014, 11, 18)
    end_date = datetime.datetime(2014, 12, 17)
    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST)

    cf_dir = utils.get_data_dir(utils.FLAG_CF)
    frate_str = "%s/rate_%s_%s" % (cf_dir, begin_date.strftime("%m%d"), end_date.strftime("%m%d"))
    user_ids_list, item_ids_list, user_ids_dict, item_ids_dict = bcbf.compute_user_item_list(frate_str)
    rate_matrix = io.mmread("data")
    rate_matrix = rate_matrix.tolil()
    buy_date = datetime.datetime(2014, 12, 18)
    fbuy_str = "%s/data_buy_%s" % (data_dir, buy_date.strftime("%m%d"))
    count = 0

    with open(fbuy_str) as fin:
        for line in fin:
            cols = line.strip().split(",")
            user = cols[0]
            item = cols[1]
            if item in item_ids_dict and user in user_ids_dict:
                u_ix = user_ids_dict[user]

                i_ix = item_ids_dict[item]
                print >> sys.stdout, "%s,%s,%d" % (user, item, rate_matrix[(u_ix, i_ix)])
            else:
                count += 1
def get_content_similarity_scores(readmes, dataset_dir, profile="tfidf",
                                  similarity="cos"):
    """Return CSR matrix of similarity_{r,r} for all r in `readmes`.

       `dataset_dir`      the directory where the similarity scores are
       `profile`    bool or tfidf
       `similarity` cos or ijd (inverse Jacquard Distance)
    """
    if profile == "tfidf":
        sim_fn = join(dataset_dir, TF_IDF_FN)

    if exists(sim_fn):
        return mmread(sim_fn).tocsr()

    if profile == "bool":
        #readme_words = COUNTVECTORIZER readmes
        pass
    else:
        tfidf = TfidfVectorizer(input='file', #sublinear_tf=True,
                                max_df=0.5, stop_words='english',
                                decode_error="ignore")
        #max_df=0.5: if a word occurs in more than half of the readmes it is
        #            ignored
        readme_words = tfidf.fit_transform(readmes)

    if similarity == "cos":
        similarity_scores = csr_matrix(cosine_similarity(readme_words))
    else:
        # similarity_scores = csr_matrix(ijd(readme_words))
        pass

    mmwrite(sim_fn, similarity_scores, comment=profile+"_"+similarity+"_similarity_{r,r}")
    return similarity_scores
示例#25
0
def loadMatrix(name):
  name=name.split("/").pop()
  fileName=localRoot+"/"+name+".mtx"
  if os.path.exists(fileName):
    return ios.mmread(fileName).tocsc()
  else:
    print "Matrix not found! " + fileName
示例#26
0
def compute(directory, qdevel, cdevel, constr_dev):
    qdevel = np.array(sio.mmread(qdevel).todense())
    cdevel = np.array(sio.mmread(cdevel).todense()).transpose()
    constrdevel = np.array(sio.mmread(constr_dev).todense())
    os.chdir(directory)
    for file in glob.glob('*.npy'):
        flname = file.split('.npy')[0]
        rep = np.load(file)
        sio.mmwrite(str(flname)+'.mtx', ss.coo_matrix(rep))
        mat = np.dot(qdevel, np.dot(rep, cdevel))
        score = evalEdge(mat, constrdevel)
        fle = open('new_output.txt', 'a')
        fle.write('file: '+flname+'\t'+str(score)+'\n')
        fle.close()

    return 'Done'
示例#27
0
def generate_training_validating_rt(version, r_to_i, u_to_i, r_u_t_fn,
                                    split, is_test=False):
    """Function called to generate training.mtx, validating.mtx and
       recommendation_times.npy
    """
    if is_test:
        data_processed_dir = join(PROCESSED_DATA_DIR, "test")
    else:
        data_processed_dir = PROCESSED_DATA_DIR

    u_r_times = mmread(r_u_t_fn).transpose().tolil()
    nu, nr = u_r_times.shape

    training_matrix = lil_matrix((nu,nr), dtype=np.int_)
    validating_matrix = lil_matrix((nu,nr), dtype=np.int_)
    recommendation_times = np.zeros(nu, dtype=np.int_)

    valid_repositories_table = version+"_repositories"
    cursor = getDB(is_test=is_test).cursor()

    for uidx in xrange(nu):
        cursor.execute("""SELECT vr.id
                          FROM repositories as r,
                               {} as vr
                          WHERE vr.id = r.id AND r.owner_id = %s
                       """.format(valid_repositories_table), (u_to_i.r(uidx),))
        owned_rs = np.array([r_to_i[r[0]] for r in cursor])
        interests = u_r_times.getrowview(uidx)
        interested_rs = np.unique(interests.tocoo().col)
        ext_rs = np.setdiff1d(interested_rs, owned_rs, assume_unique=True)
        times = interests[0,ext_rs].toarray()[0]
        sorted_indices = times.argsort()
        threshold = int(floor(split*len(ext_rs)))
        training = [ext_rs[i] for i in sorted_indices[:threshold]]
        threshold_time = times[sorted_indices[threshold]]
        training += [r for r in owned_rs if interests[0,r] < threshold_time]
        validating = [ext_rs[i] for i in sorted_indices[threshold:]]

        for t in training:
            training_matrix[uidx,t] = 1
        for v in validating:
            validating_matrix[uidx,v] = 1
        recommendation_times[uidx] = threshold_time

    comment="""
Training interests are before validating interests.
The split is as follows:
    Training: all internals before first last 1/3 externals + first 2/3 externals
    Testing: last 1/3 externals"""

    version_dir = join(data_processed_dir, version)
    tfn = join(version_dir, TRAINING_FN)
    vfn = join(version_dir, VALIDATING_FN)
    rtfn = join(version_dir, RECOMMENDATION_TIMES_FN)

    mmwrite(tfn, training_matrix, comment=comment)
    mmwrite(vfn, validating_matrix, comment=comment)
    np.save(rtfn, recommendation_times)

    return (tfn, vfn, rtfn)
示例#28
0
def load_rate_data(fin_str,user_ids_dict,item_ids_dict,theta = 0.0):
    #设为2014-12-17 23
    """
    split_date = datetime.datetime(2014,12,17,23)
    rate_matrix = sparse.lil_matrix((len(user_ids_dict),len(item_ids_dict)))
    i = 0
    with open(fin_str) as fin:
        for line in fin:
            #userid,itemid,cate,rate,lasttime
            i +=1
            '''
            if i%100 == 0:
                print i
            '''
            cols = line.strip().split(',')

            #cur_date = datetime.datetime.strptime(cols[-1],'%Y-%m-%s %H:%M:%S')
            cur_date = datetime.datetime.strptime(cols[-1],'%Y-%m-%d %H')
            days_delta = (split_date-cur_date).days
            #带时间衰减的分数
            rate = int(cols[-2]) * math.exp(-theta * days_delta)
            u_ix = user_ids_dict[cols[0]]
            i_ix = item_ids_dict[cols[1]]
            rate_matrix[u_ix,i_ix] = rate
    io.mmwrite('rate_data_buy',rate_matrix)
    print >> sys.stdout,rate_matrix.nnz
    """
    rate_matrix = io.mmread('rate_data_buy')
    rate_matrix = rate_matrix.tolil()
    print rate_matrix.shape
    print rate_matrix[1,[1,2]].toarray()
    return rate_matrix
示例#29
0
文件: kmeans.py 项目: eshwaran/matsya
def main():
    """
        Main entry point to script to perform kmeans.

        Returns:

        - `0` or `1` on success or failure respectively.
        - Saves `centroids`, `centroiddict`, and `clusters` in working dir.

    """
    parser = gen_args()
    args = parser.parse_args()
    sessionid = args.sessionid
    data = spio.mmread(args.data).tocsc()
    logger = logging.getLogger(__name__)
    logger.addHandler(logging.StreamHandler())
    if args.verbose:
        logger.setLevel(logging.DEBUG)
    if args.k:
        k = args.k
    kmeans = KMeans(data, k, args.n, args.delta, args.randomcentroids, \
                    args.classical, args.verbose)
    result = kmeans.run()
    clusters = result['clusters']
    centroids = result['centroids']
    centroiddict = result['centroiddict']
    cPickle.dump(clusters, open("data_clusters_" + sessionid + '.pck', 'w'))
    cPickle.dump(centroiddict, open("centroid_dict_" + \
                                    sessionid + '.pck', 'w'))
    spio.mmwrite(open("data_centroids_" + sessionid + '.mtx', 'w'), \
                 centroids, comment="CSC Matrix", field='real')
    logger.info(" %d Clusters Generated ", len(clusters))
    return 0
示例#30
0
    def reduce_to_eig_problem(self):
        n = self.n
        range_n = range(n)

        self.W.export_mtx(self.file_tmp)
        self.W.delete_rowcols(zeros(n, dtype=int))
        self.W.compress()
        del self.W
        self.W = 0
        collect()
        self.W = csr_matrix(mmread(self.file_tmp))

        # D is a diagonal matrix with sum_j(W[i, j]) at ith diag element
        data = zeros(n, dtype=float16)
        for i in range_n:
            data[i] = self.W.getrow(i).sum()
        D = csr_matrix((data, (range_n, range_n)), shape=(n, n), dtype=float16)

        # D^(-1/2)
        data2 = zeros(n, dtype=float16)
        for i in range_n:
            data2[i] = 1 / sqrt(data[i] + 1E-4)
        D_minus_1_2 = csr_matrix((data2, (range_n, range_n)), shape=(n, n), dtype=float16)

        A = D_minus_1_2 * (D - self.W) * D_minus_1_2
        return A
示例#31
0
import torch

import sys
sys.path.append('.')
sys.path.append('build')
import pygunrock as pyg

import numpy as np
from time import time
from tqdm import trange
from scipy.io import mmread

np.set_printoptions(linewidth=240)

# Load graph
csr = mmread('chesapeake.mtx').tocsr()
# csr = mmread('cit-Patents-sub.mtx').tocsr()

n_vertices = csr.shape[0]
n_edges    = csr.nnz

# Convert data to torch + move to GPU
indptr   = torch.IntTensor(csr.indptr).cuda()
indices  = torch.IntTensor(csr.indices).cuda()
data     = torch.FloatTensor(csr.data).cuda()

# Allocate memory for output
distances    = torch.zeros(csr.shape[0]).float().cuda()
predecessors = torch.zeros(csr.shape[0]).int().cuda()

# Create graph
示例#32
0
if __name__ == "__main__":

    solver_names = ['tpfa', 'vem', 'rt0', 'mpfa']
    refinements = ['0', '1', '2']

    for refinement in refinements:
        for solver in solver_names:
            folder = "./" + solver + "_results_" + refinement + "/"

            # 1) matrix and grid information
            file_in = folder + "info.txt"
            data = read_csv(file_in)[0]
            data = map(int, map(float, data[:0:-1]))

            file_in = folder + "matrix.mtx"
            A = mmread(file_in)
            data.append(A.shape[0])
            data.append(A.nnz)

            with open(solver + "_results.csv", 'a+') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(data)

            # 2) $\int_{\Omega_3,3} \porosity c \, \mathrm{d}x$ $([-])$ vs. time
            field = "tracer"
            step = 101

            transport_root = folder + "tracer_3_"

            # in this file the constant data are saved
            file_in = folder + "sol_3_000000.vtu"
示例#33
0
import scipy.sparse as sparse
from scipy.io import mmread
import petsc4py
petsc4py.init()
from petsc4py import PETSc
import numpy as np

# read mtx
matrix_name = "bcsstk06.mtx"
matrix = mmread(matrix_name)
matrix = matrix.toarray()
N = matrix.shape[0]

# create PETSc comm
comm = PETSc.COMM_WORLD
size = comm.getSize()
rank = comm.getRank()

# create PETSc vectors
x = PETSc.Vec().create(comm=comm)
x.setSizes(N)
x.setFromOptions()

b = x.duplicate()
u = x.duplicate()

rstart, rend = x.getOwnershipRange()
nlocal = x.getLocalSize()

# Create PETSc matrix
A = PETSc.Mat().create(comm=comm)
示例#34
0
# limitations under the License.

import numpy as np
from scipy.io import mmread
from scipy.sparse import hstack

import morpheus.normalized_matrix as nm
from morpheus.algorithms.logistic_regression import NormalizedLogisticRegression

s = np.matrix(
    np.genfromtxt('./data/Walmart/MLSraw.txt', skip_header=True, dtype=int)).T

join_set1 = np.genfromtxt('./data/Walmart/MLFK1.csv',
                          skip_header=True,
                          dtype=int)
r1 = mmread('./data/Walmart/MLR1Sparse.txt')

join_set2 = np.genfromtxt('./data/Walmart/MLFK2.csv',
                          skip_header=True,
                          dtype=int)
r2 = mmread('./data/Walmart/MLR2Sparse.txt')

k = [join_set1 - 1, join_set2 - 1]
T = hstack((s, r1.tocsr()[k[0]], r2.tocsr()[k[1]]))
Y = np.matrix(
    np.genfromtxt('./data/Walmart/MLY.csv', skip_header=True, dtype=int)).T

w_init = np.matrix(np.random.randn(T.shape[1], 1))
w_init2 = np.matrix(w_init, copy=True)
gamma = 0.000001
iterations = 20
示例#35
0
文件: mle.py 项目: yuqin/statsmodels
def Rp(v):
    """ Gradient """
    result = 2 * (A * v - R(v) * B * v) / dot(v.T, B * v)
    #print "Rp: ", result
    return result


def Rpp(v):
    """ Hessian """
    result = 2 * (A - R(v) * B - outer(B * v, Rp(v)) -
                  outer(Rp(v), B * v)) / dot(v.T, B * v)
    #print "Rpp: ", result
    return result


A = io.mmread('nos4.mtx')  # clustered eigenvalues
#B = io.mmread('bcsstm02.mtx.gz')
#A = io.mmread('bcsstk06.mtx.gz') # clustered eigenvalues
#B = io.mmread('bcsstm06.mtx.gz')
n = A.shape[0]
B = speye(n, n)
random.seed(1)
v_0 = random.rand(n)

print("try fmin_bfgs")
full_output = 1
data = []
v,fopt, gopt, Hopt, func_calls, grad_calls, warnflag, allvecs = \
        optimize.fmin_bfgs(R,v_0,fprime=Rp,full_output=full_output,retall=1)
if warnflag == 0:
    plt.semilogy(np.arange(0, len(data)), data)
示例#36
0
def SubNxLost(ts, lost_nodes):
    Nx = nx.from_numpy_matrix(
        mmread(MakeSample_node_prediction_lost_InputDir + '/adjacency' +
               str(ts - 1)).toarray())
    return nx.Graph(Nx.edges(lost_nodes))
示例#37
0
    parser.add_argument('--cuda', help='If use GPU', action='store_true')
    parser.add_argument('--test', help='If test', action='store_true')
    parser.add_argument('--sparse',
                        help='If use sparse matrix',
                        action='store_true')
    args = parser.parse_args()

    # Prepare the training set
    print('Loading {} data ...'.format('pos'))
    if not args.sparse:
        file = os.path.join(args.input, 'pos/feature.npy')
        with open(file, 'rb') as f:
            pos_x = np.load(f)
    else:
        file = os.path.join(args.input, 'pos/feature.mtx')
        pos_x = io.mmread(file).tocsc()
    print('Pos data: ' + str(pos_x.shape[0]))

    print('Loading {} data ...'.format('neg'))
    if not args.sparse:
        file = os.path.join(args.input, 'neg/feature.npy')
        with open(file, 'rb') as f:
            neg_x = np.load(f)
    else:
        file = os.path.join(args.input, 'neg/feature.mtx')
        neg_x = io.mmread(file).tocsc()
    print('Neg data: ' + str(neg_x.shape[0]))

    if args.feature > 0:
        pos_x = pos_x[:, :args.feature]
        neg_x = neg_x[:, :args.feature]
示例#38
0
def train_test_split(X: sp.csr_matrix):
    row_indices = get_unique_nonzero_indices(X)

    train_data = []
    test_data = []

    for row_index in row_indices:
        col_indices = X.getrow(row_index).indices

        test_index = np.random.choice(col_indices, 1)[0]
        train_data.extend([(row_index, col_index) for col_index in col_indices if col_index != test_index])
        test_data.append((row_index, test_index))
    return train_data, test_data


def save_pair_data(data: list, file_path):
    with open(file_path, 'w') as f:
        f.write("uid, tid\n")
        for uid, tid in data:
            f.write("%d %d\n" % (uid, tid))
    return

if __name__ == '__main__':
    print("Reading .mtx file...")
    mtx_data: sp.csr_matrix = mmread(percent_subset_data_path).tocsr()
    train_data, test_data = train_test_split(mtx_data)

    train_data_path = "30music_train.txt"
    save_pair_data(train_data, train_data_path)
    test_data_path = "30music_test.txt"
    save_pair_data(test_data, test_data_path)
示例#39
0
def LoadMTX(path):
    mtx = mmread(str(path))
    hypergraph = FromSparseMatrix(mtx.T)
    return hypergraph
示例#40
0
from scipy.io import mminfo, mmread
A = mmread('MatrixMarket_MHM_subproblem.mtx')
import matplotlib
matplotlib.use('PDF')
import matplotlib.pyplot as plt
plt.spy(A, markersize=1)
plt.savefig('MatrixMarket_MHM_subproblem')
示例#41
0
def link_prediction(n_appeared, p_appeared, n_disappeared, p_disappeared,
                    n_new, p_new, n_lost, p_lost, is_train, is_valid, is_test):

    probability_appeared_InputDir, num_appeared_InputDir = get_appeared_InputDirs(
        p_appeared, n_appeared)
    appeared_edge_pred_set_list, appeared_edge_true_set_list, recall_appeared_edge, precision_appeared_edge, f1_score_appeared_edge = get_component_result(
        "edge", probability_appeared_InputDir, num_appeared_InputDir,
        all_node_num, is_train, is_valid, is_test)

    probability_disappeared_InputDir, num_disappeared_InputDir = get_disappeared_InputDirs(
        p_disappeared, n_disappeared)
    disappeared_edge_pred_set_list, disappeared_edge_true_set_list, recall_disappeared_edge, precision_disappeared_edge, f1_score_disappeared_edge = get_component_result(
        "edge", probability_disappeared_InputDir, num_disappeared_InputDir,
        all_node_num, is_train, is_valid, is_test)

    probability_new_InputDir, num_new_InputDir = get_new_InputDirs(
        p_new, n_new)
    new_edge_pred_set_list, new_edge_true_set_list, recall_new_edge, precision_new_edge, f1_score_new_edge = get_component_result(
        "edge", probability_new_InputDir, num_new_InputDir,
        all_node_num + n_expanded, is_train, is_valid, is_test)

    probability_lost_InputDir, num_lost_InputDir = get_lost_InputDirs(
        p_lost, n_lost)
    lost_node_pred_set_list, lost_node_true_set_list, recall_lost_node, precision_lost_node, f1_score_lost_node = get_component_result(
        "node", probability_lost_InputDir, num_lost_InputDir, all_node_num,
        is_train, is_valid, is_test)
    lost_edge_pred_set_list, lost_edge_true_set_list, recall_lost_edge, precision_lost_edge, f1_score_lost_edge = get_edge_connected_lost_node(
        probability_lost_InputDir, lost_node_pred_set_list,
        lost_node_true_set_list, is_train, is_valid, is_test)

    # 総合結果を計算
    # 「tのlink集合 」 + 「appeared (link) 集合」+ 「new (link) 集合」- 「disappeared (link) 集合」- 「lost (link) 集合」
    ts_list = get_ts_list(probability_appeared_InputDir, is_train, is_valid,
                          is_test)
    ts_c_pred_A = []
    for i, ts in enumerate(ts_list):
        ts_train, ts_test, ts_all = TsSplit(ts, L)
        t_edge_set = set()
        for edge in nx.from_numpy_matrix(
                mmread(MakeSample_node_prediction_lost_InputDir +
                       '/adjacency' + str(ts_train[-1])).toarray()).edges:
            t_edge_set.add(frozenset({edge[0], edge[1]}))

        appeared_edge_pred_set = appeared_edge_pred_set_list[i]
        appeared_edge_true_set = appeared_edge_true_set_list[i]
        assert len(
            t_edge_set
            & appeared_edge_true_set) == 0, "tのlink集合とappeared(link)集合は被らない"
        assert len(
            t_edge_set
            & appeared_edge_pred_set) == 0, "tのlink集合とappeared(link)集合は被らない"

        disappeared_edge_pred_set = disappeared_edge_pred_set_list[i]
        disappeared_edge_true_set = disappeared_edge_true_set_list[i]
        assert len(t_edge_set & disappeared_edge_true_set) == len(
            disappeared_edge_true_set), "tのlink集合とdisappeared(link)集合は被るべき"
        assert len(t_edge_set & disappeared_edge_pred_set) == len(
            disappeared_edge_pred_set), "tのlink集合とdisappeared(link)集合は被るべき"

        new_edge_pred_set = new_edge_pred_set_list[i]
        new_edge_true_set = new_edge_true_set_list[i]
        assert len(t_edge_set
                   & new_edge_true_set) == 0, "tのlink集合とnew(link)集合は被らない"
        assert len(t_edge_set
                   & new_edge_pred_set) == 0, "tのlink集合とnew(link)集合は被らない"

        lost_node_pred_set = lost_node_pred_set_list[i]
        lost_edge_pred_set = lost_edge_pred_set_list[i]
        lost_edge_true_set = lost_edge_true_set_list[i]
        assert len(t_edge_set & lost_edge_true_set) == len(
            lost_edge_true_set), "tのlink集合とlost(link)集合は被るべき"
        assert len(t_edge_set & lost_edge_pred_set) == len(
            lost_edge_pred_set), "tのlink集合とlost(link)集合は被るべき"

        pred_set = [set() for _ in range(16)]

        # appeared : disappeared : new : lost
        # 何もしない場合 0000
        pred_set[0] = t_edge_set
        # lostのみをbest methodにした時 0001
        pred_set[1] = t_edge_set - lost_edge_pred_set
        pred_set[1] = delete_lost_node(pred_set[1], lost_node_pred_set)
        # newのみをbest methodにした時 0010
        pred_set[2] = t_edge_set | new_edge_pred_set
        # lostとnewのみをbest methodにした時 0011
        pred_set[3] = (t_edge_set | new_edge_pred_set) - lost_edge_pred_set
        pred_set[3] = delete_lost_node(pred_set[3], lost_node_pred_set)
        # disappearedのみをbest methodにした時 0100
        pred_set[4] = t_edge_set - disappeared_edge_pred_set
        # disappearedとlostをbest methodにした時 0101
        pred_set[5] = (t_edge_set -
                       disappeared_edge_pred_set) - lost_edge_pred_set
        pred_set[5] = delete_lost_node(pred_set[5], lost_node_pred_set)
        # disappearedとnewをbest methodにした時 0110
        pred_set[6] = (t_edge_set
                       | new_edge_pred_set) - disappeared_edge_pred_set
        # disappearedとnewとlostをbest methodにした時 0111
        pred_set[7] = ((t_edge_set | new_edge_pred_set) -
                       disappeared_edge_pred_set) - lost_edge_pred_set
        pred_set[7] = delete_lost_node(pred_set[7], lost_node_pred_set)
        # appearedのみをbest methodにした時 1000
        pred_set[8] = t_edge_set | appeared_edge_pred_set
        # appearedとlostをbest methodにした時 1001
        pred_set[9] = (t_edge_set
                       | appeared_edge_pred_set) - lost_edge_pred_set
        pred_set[9] = delete_lost_node(pred_set[9], lost_node_pred_set)
        # appearedとnewをbest methodにした時 1010
        pred_set[10] = (t_edge_set
                        | appeared_edge_pred_set) | new_edge_pred_set
        # appearedとnewとlostをbest methodにした時 1011
        pred_set[11] = ((t_edge_set | appeared_edge_pred_set)
                        | new_edge_pred_set) - lost_edge_pred_set
        pred_set[11] = delete_lost_node(pred_set[11], lost_node_pred_set)
        # appearedとdisappearedのみをbest methodにした時 1100
        pred_set[12] = (t_edge_set
                        | appeared_edge_pred_set) - disappeared_edge_pred_set
        # appearedとdisappearedとlostのみをbest methodにした時 1101
        pred_set[13] = ((t_edge_set | appeared_edge_pred_set) -
                        disappeared_edge_pred_set) - lost_edge_pred_set
        pred_set[13] = delete_lost_node(pred_set[13], lost_node_pred_set)
        # appearedとdisappearedとnewのみをbest methodにした時 1110
        pred_set[14] = ((t_edge_set | appeared_edge_pred_set)
                        | new_edge_pred_set) - disappeared_edge_pred_set
        # appearedとdisappearedとnewとlostをbest methodにした時 1111
        pred_set[15] = ((
            (t_edge_set | appeared_edge_pred_set) | new_edge_pred_set) -
                        disappeared_edge_pred_set) - lost_edge_pred_set
        pred_set[15] = delete_lost_node(pred_set[15], lost_node_pred_set)

        pred_A_list = []
        for c_idx in range(16):
            pred_G = nx.Graph()
            pred_G.add_edges_from(
                [tuple(froset) for froset in pred_set[c_idx]])
            pred_A = np.array(
                nx.to_numpy_matrix(
                    pred_G,
                    nodelist=[
                        node for node in range(all_node_num + n_expanded)
                    ]))
            pred_A_list.append(pred_A)
        ts_c_pred_A.append(pred_A_list)

    return np.array(ts_c_pred_A)
示例#42
0
# coding: utf-8

# In[3]:

from __future__ import division
import scipy as sp
import numpy as np
from scipy import io
import itertools
import math
import time

# In[4]:

data = io.mmread("data/netflix_mm_10000_1000")
data.shape

# In[18]:


def RMSE(data, latent):
    userOffset = 0
    movieOffset = data.shape[0]

    cx = data.tocoo()
    err = 0
    for user, movie, rating in itertools.izip(cx.row, cx.col, cx.data):
        vUser = latent[user + userOffset]
        vMovie = latent[movie + movieOffset]
        err += (vUser.dot(vMovie) - rating)**2
        #print "%f %f" % (vUser.dot(vMovie), rating)
示例#43
0
import pandas as pd
from scipy.io import mmread

data_mtx = mmread("salmon_output/alevin/quants_mat.mtx").toarray()

cols = open("salmon_output/alevin/quants_mat_cols.txt", "r")
rows = open("salmon_output/alevin/quants_mat_rows.txt", "r")

cols_list = []
rows_list = []

for line in cols:
    cols_list.append(line.strip("\n"))

for line in rows:
    rows_list.append(line.strip("\n"))

df = pd.DataFrame(data_mtx, columns=cols_list, index=rows_list)

df = df.T

x = open("final_mtx.csv", "w")

x.write(df.to_csv())
示例#44
0
文件: pyprecond.py 项目: Sbai7/amgcl
from make_poisson import *

#----------------------------------------------------------------------------
parser = argparse.ArgumentParser(sys.argv[0])

parser.add_argument('-A,--matrix', dest='A', help='System matrix in MatrixMarket format')
parser.add_argument('-f,--rhs',    dest='f', help='RHS in MatrixMarket format')
parser.add_argument('-n,--size',   dest='n', type=int, default=64, help='The size of the Poisson problem to solve when no system matrix is given')
parser.add_argument('-o,--out',    dest='x', help='Output file name')
parser.add_argument('-p,--prm',    dest='p', help='AMGCL parameters: key1=val1 key2=val2', nargs='+', default=[])

args = parser.parse_args(sys.argv[1:])

#----------------------------------------------------------------------------
if args.A:
    A = mmread(args.A)
    f = mmread(args.f).flatten() if args.f else np.ones(A.rows())
else:
    A,f = make_poisson(args.n)

# Parse parameters
prm = {p[0]: p[1] for p in map(lambda s: s.split('='), args.p)}

# Create preconditioner
P = amg.make_preconditioner(A, prm)
print(P)

iters = [0]
def count_iters(x):
    iters[0] += 1
from scipy import sparse, io

from sklearn import svm
from sklearn.model_selection import KFold
from sklearn.metrics import precision_recall_fscore_support
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_selection import chi2, RFE
from sklearn.linear_model import LogisticRegression
import pickle

import matplotlib.pyplot as plt
'''NEW SCRIPT'''

scores = []
stats_path = "./NEW_STATS_1/ALL_BALANCED"
lieb_train = io.mmread('./lieb/lieb_balanced_train.mtx')
lieb_train = lieb_train.tocsc()
gonz_train = io.mmread('./gonzalez/gonz_balanced_train.mtx')
gonz_train = gonz_train.tocsc()
bush_train = io.mmread('./bush/bush_balanced_train.mtx')
bush_train = bush_train.tocsc()
josh_train = io.mmread('./joshi/jc_balanced_train.mtx')
josh_train = josh_train.tocsc()

train_labels = np.loadtxt('./ddata/balanced_train_labels.txt', dtype=np.int32)

all_feat_train = sparse.hstack(
    (lieb_train, gonz_train[:, -7:], bush_train[:, -8:-4],
     josh_train[:, -64:-9], josh_train[:, -5:]))

model = LogisticRegression()
filename = ""

# create variable for output filename
outputFile = "APSP_report.txt"

# loop over the list
for i in graphList:

    # zero out the inFile
    filename = ""

    # create the new one
    filename = str(i) + "-vertices_degree-6.mtx"

    # read graph into a SciPy sparse graph
    A = csc_matrix(sio.mmread(filename), dtype=np.int32)

    # get the start time
    start_time = time.time()

    # run the APSP algorithm on the graph
    path = floyd_warshall(A)

    # get the stop time and compute the difference
    elapsed_time = time.time() - start_time

    # open the report file in APPEND mode
    outFile = open(outputFile, "a")

    # write the runtime for the graph to the file
    outFile.write("Runtime for %s is %.6f ms\n\n" % (filename, (elapsed_time*1000)))
示例#47
0
# ######################## Initialise AnnData ############################### #
# ########################################################################### #

if not args.loom == "none":

    adata = scv.read(args.loom)
    # get directory with metadata + barcodes
    metadata_dir = args.rdims.split("/")[0]

elif not args.dropest_dir == "none":

    exon_matrix = os.path.join(args.dropest_dir, "exons.mtx.gz")
    intron_matrix = os.path.join(args.dropest_dir, "introns.mtx.gz")
    spanning_matrix = os.path.join(args.dropest_dir, "spanning.mtx.gz")

    exons = io.mmread(exon_matrix).transpose().tocsr()
    introns = io.mmread(intron_matrix).transpose().tocsr()
    spanning = io.mmread(spanning_matrix).transpose().tocsr()

    adata = ad.AnnData(X=exons)
    adata.layers["spliced"] = adata.X
    adata.layers["unspliced"] = introns
    adata.layers["ambiguous"] = spanning

    adata.obs.index = [
        x
        for x in pd.read_csv(os.path.join(args.dropest_dir, "barcodes.tsv.gz"),
                             header=None)[0].values
    ]
    metadata_dir = args.dropest_dir
                "weight":J_E,
                "delay":delay})

nest.CopyModel("static_synapse_hom_w",
               "inhibitory",
               {"weight":J_I, 
                "delay":delay})

nest.CopyModel("static_synapse_hom_w",
               "excitatory-input",
               {"weight":J_E, "delay":delay})

# ###########################################
# Projections
# ###########################################
A=mmread('../ee.wmat')
rows,cols = A.nonzero()
if (plastic):
    nest.Connect(rows+1, cols+1, {"rule": "one_to_one"},  model="excitatory-plastic")
else:
    nest.Connect(rows+1, cols+1, {"rule": "one_to_one"},  model="excitatory-input")

A=mmread('../ei.wmat')
rows,cols = A.nonzero()
nest.Connect(rows+1, cols+N_E+1, {"rule": "one_to_one"},  model="excitatory-input")

A=mmread('../ie.wmat')
rows,cols = A.nonzero()
nest.Connect(rows+1+N_E, cols+1, {"rule": "one_to_one"}, syn_spec="inhibitory")

A=mmread('../ii.wmat')
示例#49
0
文件: main.py 项目: jleamer/DMM
        #Setup solver parameters
        solver_parameters = NT.SolverParameters()
        solver_parameters.SetConvergeDiff(convergence_threshold)
        solver_parameters.SetThreshold(threshold)
        solver_parameters.SetVerbosity(False)

        #Run the ScipyMatrixGenerator script to generate a random hamiltonian of size rows x rows
        #Also construct the overlap matrix
        subprocess.run([
            "python3", "ScipyMatrixGenerator.py", '--rows',
            str(rows), '--density',
            str(density)
        ])
        #hamiltonian = mmread("hamiltonian.mtx").toarray()

        hamiltonians.append(mmread("hamiltonian.mtx").toarray())

        overlap = sparse.identity(rows, format='coo', dtype='complex')
        mmwrite("overlap", overlap)
        ntpoly_hamiltonian = NT.Matrix_ps("hamiltonian.mtx")
        ntpoly_overlap = NT.Matrix_ps("overlap.mtx")
        Density = NT.Matrix_ps(ntpoly_hamiltonian.GetActualDimension())

        #Compute the density matrix
        energy_value, chemical_potential = \
         NT.DensityMatrixSolvers.TRS2(ntpoly_hamiltonian, ntpoly_overlap, number_of_electrons, Density, solver_parameters)
        print(chemical_potential)

        #Output density matrix
        Density.WriteToMatrixMarket(density_file_out)
        ntpoly_hamiltonian.WriteToMatrixMarket("test.mtx")
示例#50
0
def NodeAttribute(ts):
    return mmread(InputDir + '/node_attribute' + str(ts)).toarray()
示例#51
0
def Nx(ts):
    return  nx.from_numpy_matrix(mmread(InputDir + '/adjacency' + str(ts)).toarray())
示例#52
0
        weights = model.get_weights()
    weights = [np.random.permutation(w.flat).reshape(w.shape) for w in weights]
    model.set_weights(weights)


if args.run_on_cpu:
    with tf.device("/cpu:0"):
        for i in range(0, 1):

            encoding_dim = 2
            n_hidden_1 = 200
            n_hidden_2 = 50

            print "Building positive and negative report matrices..."

            pos_reports = io.mmread('model_0_posreports.mtx')
            pos_reports = pos_reports.tocsr()

            neg_reports = io.mmread('model_0_negreports.mtx')
            neg_reports = neg_reports.tocsr()

            for reportblock in range(1, 50):
                print "Procesing", reportblock
                thispos = io.mmread('model_' + str(reportblock) +
                                    '_posreports.mtx')
                thispos = thispos.tocsr()
                pos_reports = vstack((pos_reports, thispos))

                thisneg = io.mmread('model_' + str(reportblock) +
                                    '_negreports.mtx')
                thisneg = thisneg.tocsr()
示例#53
0
#usage: converts binary .mat file to csr file

import sys
import numpy as np
import scipy.io as sio

# load the input file
matfile = sys.argv[1]
mat = sio.mmread(matfile)

# symmetricize the matrix
mat = mat + mat.transpose()

# convert to csr
mat = mat.tocsr()

# eliminate the diagonal
mat.setdiag(0)
mat.eliminate_zeros()

# output format fgraph
outfile = matfile.split('.')[0] + '.graph'
f = open(outfile, 'w')

#writing list of list in the outfile
mlol = [list(line.nonzero()[1]) for line in mat]

#getting dimension
nv = mat.get_shape()[0]
nnz = mat.getnnz()
# number of non-zeros in the sparse matrix
示例#54
0
    #random((K,1))  随机生成一个0-1之间的Kx1矩阵
    #reshape(random((K,1))/10*(np.sqrt(K)),K) 将生成的K行1列矩阵,变换成一个含有10个元素的列表
    pu[uid] = np.reshape(random((K, 1)) / 10 * (np.sqrt(K)), K)
for pid in range(n_items):
    qi[pid] = np.reshape(random((K, 1)) / 10 * (np.sqrt(K)), K)

#加载模型
# 用户和item的索引
users_index = pickle.load(open("users_index.pkl", 'rb'))
items_index = pickle.load(open("items_index.pkl", 'rb'))

n_users = len(users_index)
n_items = len(items_index)

# 用户-物品关系矩阵R
user_item_scores = sio.mmread("user_item_scores")

# 倒排表
##每个用户播放的歌曲
user_items = pickle.load(open("user_items.pkl", 'rb'))
##事件参加的用户
item_users = pickle.load(open("item_users.pkl", 'rb'))

# 所有用户之间的相似度
#similarity_matrix_users = pickle.load(open("/data/weixin-38664232/my-dataset/users_similarity_playcount.pkl", 'rb'))

# 所有item之间的相似度
#similarity_matrix_items = pickle.load(open("/data/weixin-38664232/my-dataset/items_similarity_playcount.pkl", 'rb'))


#每个用户的平均打分
示例#55
0
from morpheus.algorithms.kmeans import NormalizedKMeans

import numpy as np
from scipy.io import mmread
from scipy.sparse import hstack
import morpheus.normalized_matrix as nm

s = np.matrix([])

join_set1 = np.genfromtxt('./data/BookCrossing/MLFK1.csv',
                          skip_header=True,
                          dtype=int)
num_s = len(join_set1)
num_r1 = max(join_set1)
r1 = mmread('./data/BookCrossing/MLR1Sparse.txt', )

join_set2 = np.genfromtxt('./data/BookCrossing/MLFK2.csv',
                          skip_header=True,
                          dtype=int)
num_s = len(join_set2)
num_r2 = max(join_set2)
r2 = mmread('./data/BookCrossing/MLR2Sparse.txt', )

Y = np.matrix(
    np.genfromtxt('./data/BookCrossing/MLY.csv', skip_header=True,
                  dtype=int)).T
k = [join_set1 - 1, join_set2 - 1]
T = hstack((r1.tocsr()[k[0]], r2.tocsr()[k[1]]))

iterations = 1
示例#56
0
def configure_and_run_brian2genn(simtime_in_s, num_threads):
    # ###########################################
    # Configuration
    # ###########################################
    numpy.random.seed(98765)
    set_device('genn')
    defaultclock.dt = 0.1*ms
    #prefs.devices.genn.path="..." alternative to GENN_PATH

    # ###########################################
    # Network parameters
    # ###########################################
    taum = 20*ms
    taue = 5*ms
    taui = 10*ms
    Vt = -50*mV
    Vr = -60*mV
    El = -60*mV
    Erev_exc = 0.*mV
    Erev_inh = -80.*mV
    I = 20. * mvolt

    # ###########################################
    # Neuron model
    # ###########################################
    eqs = '''
    dv/dt  = (ge*(Erev_exc-v)+gi*(Erev_inh-v)-(v-El) + I)*(1./taum) : volt (unless refractory)
    dge/dt = -ge/taue : 1 
    dgi/dt = -gi/taui : 1 
    '''

    # ###########################################
    # Population
    # ###########################################
    NE = 3200
    NI = NE/4
    P = NeuronGroup(NE+NI, eqs, threshold='v>Vt', reset='v = Vr', refractory=5*ms, method='euler')
    P.v = (randn(len(P)) * 5. - 55.) * mvolt
    Pe = P[:NE]
    Pi = P[NE:]

    # ###########################################
    # Projections
    # ###########################################
    we = 0.6 # excitatory synaptic weight (voltage)
    wi = 6.7 # inhibitory synaptic weight
    conn_ee = Synapses(Pe,Pe,model="w:1",on_pre='ge += w', method='euler')
    conn_ei = Synapses(Pe,Pi,model="w:1",on_pre='ge += w', method='euler')
    conn_ie = Synapses(Pi,Pe,model="w:1",on_pre='gi += w', method='euler')
    conn_ii = Synapses(Pi,Pi,model="w:1",on_pre='gi += w', method='euler')

    ee_mat = mmread('ee.wmat')
    ei_mat = mmread('ei.wmat')
    ie_mat = mmread('ie.wmat')
    ii_mat = mmread('ii.wmat')

    conn_ee.connect(i=ee_mat.row, j=ee_mat.col)
    conn_ee.w=we

    conn_ei.connect(i=ei_mat.row, j=ei_mat.col)
    conn_ei.w=we

    conn_ie.connect(i=ie_mat.row, j=ie_mat.col)
    conn_ie.w=wi

    conn_ii.connect(i=ii_mat.row, j=ii_mat.col)
    conn_ii.w=wi



    # ###########################################
    # Simulation
    # ###########################################
    s_mon = SpikeMonitor(P)
    # Run for 0 second in order to measure compilation time
    run(simtime_in_s * second)
    totaltime = device._last_run_time

    print('Done in', totaltime)

    # ###########################################
    # Data analysis
    # ###########################################
    f = figure()
    plot(s_mon.t/ms, s_mon.i, '.')
    xlabel('Time (ms)')
    ylabel('Neuron index')
    f.savefig("brian2genn_raster_plot.png")

    return totaltime
示例#57
0
def read_mtx_file(mm_file):
    print('Reading ' + str(mm_file) + '...')
    return mmread(mm_file).asfptype()
示例#58
0
    def populate(self):
        logger.info("Preprocessing dataset")

        was_extracted = False
        if len(self.filenames) > 0:
            file_path = os.path.join(self.save_path, self.filenames[0])
            if not os.path.exists(file_path[:-7]):  # nothing extracted yet
                if tarfile.is_tarfile(file_path):
                    logger.info("Extracting tar file")
                    tar = tarfile.open(file_path, "r:gz")
                    tar.extractall(path=self.save_path)
                    was_extracted = True
                    tar.close()

        # get exact path of the extract, for robustness to changes is the 10X storage logic
        path_to_data, suffix = self.find_path_to_data()

        # get filenames, according to 10X storage logic
        measurements_filename = "genes.tsv" if suffix == "" else "features.tsv.gz"
        barcode_filename = "barcodes.tsv" + suffix

        matrix_filename = "matrix.mtx" + suffix
        expression_data = sp_io.mmread(
            os.path.join(path_to_data, matrix_filename)).T
        if self.dense:
            expression_data = expression_data.A
        else:
            expression_data = csr_matrix(expression_data)

        # group measurements by type (e.g gene, protein)
        # in case there are multiple measurements, e.g protein
        # they are indicated in the third column
        gene_expression_data = expression_data
        measurements_info = pd.read_csv(os.path.join(path_to_data,
                                                     measurements_filename),
                                        sep="\t",
                                        header=None)
        Ys = None
        if measurements_info.shape[1] < 3:
            gene_names = measurements_info[
                self.measurement_names_column].astype(np.str)
        else:
            gene_names = None
            for measurement_type in np.unique(measurements_info[2]):
                # .values required to work with sparse matrices
                measurement_mask = (
                    measurements_info[2] == measurement_type).values
                measurement_data = expression_data[:, measurement_mask]
                measurement_names = measurements_info[
                    self.measurement_names_column][measurement_mask].astype(
                        np.str)
                if measurement_type == "Gene Expression":
                    gene_expression_data = measurement_data
                    gene_names = measurement_names
                else:
                    Ys = [] if Ys is None else Ys
                    if measurement_type == "Antibody Capture":
                        measurement_type = "protein_expression"
                        columns_attr_name = "protein_names"
                        # protein counts do not have many zeros so always make dense
                        if self.dense is not True:
                            measurement_data = measurement_data.A
                    else:
                        measurement_type = measurement_type.lower().replace(
                            " ", "_")
                        columns_attr_name = measurement_type + "_names"
                    measurement = CellMeasurement(
                        name=measurement_type,
                        data=measurement_data,
                        columns_attr_name=columns_attr_name,
                        columns=measurement_names,
                    )
                    Ys.append(measurement)
            if gene_names is None:
                raise ValueError(
                    "When loading measurements, no 'Gene Expression' category was found."
                )

        batch_indices, cell_attributes_dict = None, None
        if os.path.exists(os.path.join(path_to_data, barcode_filename)):
            barcodes = pd.read_csv(os.path.join(path_to_data,
                                                barcode_filename),
                                   sep="\t",
                                   header=None)
            cell_attributes_dict = {
                "barcodes": np.squeeze(np.asarray(barcodes, dtype=str))
            }
            # As of 07/01, 10X barcodes have format "%s-%d" where the digit is a batch index starting at 1
            batch_indices = np.asarray([
                barcode.split("-")[-1]
                for barcode in cell_attributes_dict["barcodes"]
            ])
            batch_indices = batch_indices.astype(np.int64) - 1

        logger.info("Finished preprocessing dataset")

        self.populate_from_data(
            X=gene_expression_data,
            batch_indices=batch_indices,
            gene_names=gene_names,
            cell_attributes_dict=cell_attributes_dict,
            Ys=Ys,
        )
        self.filter_cells_by_count()

        # cleanup if required
        if was_extracted and self.remove_extracted_data:
            logger.info("Removing extracted data at {}".format(file_path[:-7]))
            shutil.rmtree(file_path[:-7])
示例#59
0
    def load(self, data):
        try:
            if isinstance(data, str) and ('.csv' in data or '.tsv' in data
                                          or '.txt' in data):
                logger.info('Reading data...')
                sep = self.which_sep(data)
                if self.to_transpose(sep, data):
                    dat = pd.read_csv(data, sep=sep, header=0, index_col=0).T
                else:
                    dat = pd.read_csv(data, sep=sep, header=0, index_col=0)
            elif isinstance(data, str):
                logger.info(
                    'Importing 10X data from directory. Directory must contain barcodes.tsv, features.tsv, matrix.mtx, tissue_positions_list.csv'
                )
                # find the barcodes file from 10X directory
                file_barcodes = [
                    str(x) for x in Path(data).rglob("*barcodes.tsv*")
                ]
                if len(file_barcodes) == 0:
                    logger.error(
                        'There is no barcode.tsv file in the 10X directory.')
                file_barcodes = file_barcodes[0]
                barcodes = np.asarray(pd.read_csv(file_barcodes,
                                                  header=None)).flatten()
                # find the features file from 10X directory
                file_features = [
                    str(x) for x in Path(data).rglob("*features.tsv*")
                ]
                if len(file_features) == 0:
                    logger.error(
                        'There is no features.tsv file in the 10X directory.')
                file_features = file_features[0]
                genes = np.asarray(
                    pd.read_csv(file_features, sep='\t', header=None))
                genes = genes[:, 1]
                # find the tissue_position_list file from 10X directory
                file_coords = [
                    str(x)
                    for x in Path(data).rglob("*tissue_positions_list.csv*")
                ]
                if len(file_coords) == 0:
                    logger.error(
                        'There is no tissue_positions_list.csv file in the 10X directory.'
                    )
                file_coords = file_coords[0]
                coords = np.asarray(
                    pd.read_csv(file_coords, sep=',', header=None))
                d = dict()
                for row in coords:
                    d[row[0]] = str(row[2]) + 'x' + str(row[3])
                inds = []
                coords2 = []
                for i, barcode in enumerate(barcodes):
                    if barcode in d.keys():
                        inds.append(i)
                        coords2.append(d[barcode])
                # find the count matrix file
                file_matrix = [
                    str(x) for x in Path(data).rglob("*matrix.mtx*")
                ]
                if len(file_matrix) == 0:
                    logger.error(
                        'There is no matrix.mtx file in the 10X directory.')
                file_matrix = file_matrix[0]
                matrix = mmread(file_matrix).toarray()
                logger.info(str(barcodes) + ' ' + str(barcodes.shape))
                logger.info(str(genes) + ' ' + str(genes.shape))
                logger.info(str(coords) + ' ' + str(coords.shape))
                logger.info(str(matrix.shape))

                matrix = matrix[:, inds]
                genes, inds2 = np.unique(genes, return_index=True)
                matrix = matrix[inds2, :]
                dat = pd.DataFrame(matrix, index=genes, columns=coords2)

                logger.info(str(dat))
            else:
                dat = pd.DataFrame(data)
        except:
            raise Exception("Incorrect input format")
        logger.info('coords ' + str(len(dat.columns.values)))
        logger.info('genes ' + str(len(dat.index.values)))
        data = dat.values
        logger.info(str(data.shape))
        self.rows = dat.index.values
        self.columns = dat.columns.values
        return (dat, data)
示例#60
0
文件: clean_test.py 项目: jleamer/DMM
    basis='631g',
    symmetry=True,
)

mf = dft.RKS(mol)
#mf.xc = 'blyp' # shorthand for b88,lyp
mf.xc = 'pbe'  # shorthand for pbe,pbe
#mf.xc = 'lda,vwn_rpa'
#mf.xc = 'pbe0'
#mf.xc = 'b3lyp'

# this where self-content diagonalization happens
mf.kernel()

# Orbital energies, Mulliken population etc.
mf.analyze()

# Get the converged density matrix (it generates the density matrix)
dm = mf.make_rdm1()
mmwrite('dft_density.mtx', sparse.coo_matrix(dm))

# Get the nuclear-nuclear repulsion energy
e_nuc = mf.energy_nuc()

# Get the 'core' hamiltonian, corresponding to kinetic energy and e-nuclear repulsion terms
h1e = mf.get_hcore()
ovlp = mmread("dft_overlap.mtx").toarray()

h1e_eigs = linalg.eigvalsh(h1e, ovlp)
print(h1e_eigs)