def eigenbase (h1, d1, E, v0, pow0, pow1, rest): # compute all eigenvalues and eigenvectors pt0 = 'out/impacting-bar/MK_%g_%g_%g_%g_%d_%d'%(h1, d1, E, v0, pow0, pow1) sl0 = SOLFEC ('DYNAMIC', 1E-3, pt0) bl0 = BULK_MATERIAL (sl0, model = 'KIRCHHOFF', young = E, poisson = PoissonRatio, density = MassDensity) bod = BODY (sl0, 'FINITE_ELEMENT', COPY (mesh), bl0) eval = [] # selected eigenvalue list evec = [] # selected eigenvector list (BODY command takes a tuple (eval, evec) argument for the RO formulation) vsel = (0,1,2,3,4,5,13,18,25,33,38) if 0: BODY_MM_EXPORT (bod, pt0+'/M.mtx', pt0+'/K.mtx') M = mmread (pt0+'/M.mtx').todense() K = mmread (pt0+'/K.mtx').todense() for j in range (0, K.shape[1]): for i in range (j+1, K.shape[0]): K [j, i] = K [i, j] # above diagonal = below diagonal x, y = eigh (K, M) # this produces y.T M y = 1 and y.T K y = x */ for j in vsel: eval.append (x[j].real) for z in y[:,j]: evec.append (z.real) else: data0 = MODAL_ANALYSIS (bod, 45, pt0 + '/modal.data', verbose = 'ON', abstol = 1E-14) ndofs = mesh.nnod * 3 for j in vsel: eval.append (data0[0][j]) for k in range (j*ndofs,(j+1)*ndofs): evec.append (data0[1][k]) return (eval, evec)
def get_debug(data): full_train = sio.mmread('data/%s_train.mtx' % data).tocsr() (nu, nm) = full_train.shape print 'sampling' debug_mids = sample(range(nm), nm / 5) debug_uids = sample(range(nu), nu / 5) debug = full_train[debug_uids][:, debug_mids].tocoo() nr = debug.nnz train_ids, _, test_ids = sample_split(nr) # build matrix from given indices print 'writing debug_train' debug_train = coo_matrix( (debug.data[train_ids], (debug.row[train_ids], debug.col[train_ids])), debug.shape) sio.mmwrite('data/%s_debug_train.mtx' % data, debug_train) print 'writing debug_test' debug_test = coo_matrix( (debug.data[test_ids], (debug.row[test_ids], debug.col[test_ids])), debug.shape) sio.mmwrite('data/%s_debug_test.mtx' % data, debug_test) # build movie mtx from debug_mids print 'movie debug' movies = sio.mmread('data/movies.mtx').tocsr() movies_debug = movies[debug_mids] sio.mmwrite('data/movies_%s_debug.mtx' % data, movies_debug) return debug, debug_train, debug_test, movies_debug
def read_input_tensor(headers_filename, data_file_names, tensor_slices, adjustDim=False, offerString="Attr: OFFER", wantString="Attr: WANT"): #load the header file _log.info("Read header input file: " + headers_filename) input = codecs.open(headers_filename,'r',encoding='utf8') headers = input.read().splitlines() input.close() # get the largest dimension of all slices if adjustDim: maxDim = 0 for data_file in data_file_names: matrix = mmread(data_file) if maxDim < matrix.shape[0]: maxDim = matrix.shape[0] if maxDim < matrix.shape[1]: maxDim = matrix.shape[1] # load the data files slice = 0 tensor = SparseTensor(headers, offerString, wantString) for data_file in data_file_names: if adjustDim: adjusted = adjust_mm_dimension(data_file, maxDim) if adjusted: _log.warn("Adujst dimension to (%d,%d) of matrix file: %s" % (maxDim, maxDim, data_file)) _log.info("Read as slice %d the data input file: %s" % (slice, data_file)) matrix = mmread(data_file) tensor.addSliceMatrix(matrix, tensor_slices[slice]) slice = slice + 1 return tensor
def applySVMWithPCA(): ''' Same as the previous function, just change the file names.. ''' data = io.mmread(ROOTDIR+"TRAINDATA.mtx") label = np.load(ROOTDIR+"label_train.npy") testdata = io.mmread(ROOTDIR+"TESTDATA.mtx") testLabel = np.load(ROOTDIR + "label_test.npy") linear_svm = LinearSVC(C=1.0, class_weight=None, loss='hinge', dual=True, fit_intercept=True, intercept_scaling=1, multi_class='ovr', penalty='l2', random_state=None, tol=0.0001, verbose=1, max_iter=2000) data = scale(data, with_mean=False) linear_svm.fit(data, label) joblib.dump(linear_svm, ROOTDIR+'originalTrain_hinge_2000.pkl') # linear_svm = joblib.load(ROOTDIR+'originalTrain_hinge_2000.pkl') print 'Trainning Done!' scr = linear_svm.score(data, label) print 'accuracy on the training set is:' + str(scr) predLabel = linear_svm.predict(data) calcualteRMSE(label, predLabel) scr = linear_svm.score(testdata, testLabel) print 'accuracy on the testing set is:' + str(scr) predLabel = linear_svm.predict(testdata) calcualteRMSE(testLabel, predLabel)
def load(ppt, samples, l_tau, l_lc, l_regtype, b_tau, b_lc, b_regtype): ln = np.loadtxt('lin-models/bestlinwtln'+l_regtype+samples+'tau'+l_tau+'lc'+l_lc+ppt+'.txt') lv = np.loadtxt('lin-models/bestlinwtlv'+l_regtype+samples+'tau'+l_tau+'lc'+l_lc+ppt+'.txt') bv = np.loadtxt('bil-models/bestbilwtbn'+b_regtype+samples+'tau'+b_tau+'eta'+b_lc+ppt+'.txt') bn = np.loadtxt('bil-models/bestbilwtbv'+b_regtype+samples+'tau'+b_tau+'eta'+b_lc+ppt+'.txt') traindata = [(d.strip().split()[1:5], d.strip().split()[5]) for d in open('clean/cleantrain.txt')] devdata = [(d.strip().split()[1:5], d.strip().split()[5]) for d in open('clean/cleandev.txt')] testdata = [(d.strip().split()[1:5], d.strip().split()[5]) for d in open('clean/cleantest.txt')] traindata = traindata[:int(samples)] phih = sio.mmread('clean/trh1k.mtx') phim = sio.mmread('clean/trm1k.mtx') phidh = sio.mmread('clean/devh1k.mtx') phidm = sio.mmread('clean/devm1k.mtx') maph = np.loadtxt('clean/forhead.txt', dtype=str) mapm = np.loadtxt('clean/formod.txt', dtype=str) mapdh = np.loadtxt('clean/devheads.txt', dtype=str) mapdm = np.loadtxt('clean/devmods.txt', dtype=str) trainingdat = bilme.BilinearMaxentFeatEncoding.train(traindata, phih, phim, maph, mapm, pptype=ppt) traintoks = trainingdat.train_toks() traintokens = [(co.word_features(t),l) for t,l in trainingdat.train_toks()] devencode = bilme.BilinearMaxentFeatEncoding.train(devdata, phidh, phidm, mapdh, mapdm, pptype=ppt) devtoks = devencode.train_toks() devtokens = [(co.word_features(t),l) for t,l in devencode.train_toks()] data = [devtoks, devtokens] trlinencoding = maxent.BinaryMaxentFeatureEncoding.train(traintokens) return trlinencoding, devencode, [ln, lv], [bn, bv], data
def generate_valid_repos_and_times(dataset_dir): """Function called to generate VALID_REPOS_AND_TIMES in `dataset_dir` """ valid_repos_and_times = [] repos_users_times_fn = join(dataset_dir, TIMED_INTERESTS_FN) u_r_t = mmread(repos_users_times_fn).transpose().tocsr() validation_repos_fn = join(dataset_dir, VALIDATING_FN) validation_matrix = mmread(validation_repos_fn).tocsr() v_u_r_t = u_r_t.multiply(validation_matrix).tolil() for uidx in xrange(v_u_r_t.shape[0]): v_r_t_coo = v_u_r_t.getrowview(uidx).tocoo() sorted_index = np.argsort(v_r_t_coo.data) times = v_r_t_coo.data[sorted_index] repos = v_r_t_coo.col[sorted_index] valid_repos_and_times.append(np.vstack((times,repos))) pt_fn = join(dataset_dir, VALID_REPOS_AND_TIMES) with open(pt_fn, "wb") as pf: cPickle.dump(valid_repos_and_times, pf, cPickle.HIGHEST_PROTOCOL) return pt_fn
def ro0_modal_base (use_scipy=False, verbose='OFF'): sol = ro0_model (1E-3, 0.0) bod = sol.bodies[0] eval = [] # selected eigenvalue list evec = [] # selected eigenvector list vsel = (0,1,2,3,4,5,13,18,25,33,38) if use_scipy: BODY_MM_EXPORT (bod, 'out/reduced-order0/M.mtx', 'out/reduced-order0/K.mtx') M = mmread ('out/reduced-order0/M.mtx').todense() K = mmread ('out/reduced-order0/K.mtx').todense() for j in range (0, K.shape[1]): for i in range (j+1, K.shape[0]): K [j, i] = K [i, j] # above diagonal = below diagonal x, y = eigh (K, M) # this produces y.T M y = 1 and y.T K y = x for j in vsel: eval.append (x[j].real) for z in y[:,j]: evec.append (z.real) else: data0 = MODAL_ANALYSIS (bod, 45, 'out/reduced-order0/modal', 1E-13, 1000, verbose) dofs = len(bod.velo) for j in vsel: eval.append (data0[0][j]) for k in range (j*dofs,(j+1)*dofs): evec.append (data0[1][k]) return (eval, evec)
def goMusic(K=80,steps=200,resume=False,normalize=True,R=None,V=None,mean_center=False,beta=0.0,betaO=0.0,normalizer=normalizer,doBias=True,every=1,doFactors=True,biasSteps=10): #R = mmread("reviews_Musical_Instruments.mtx").tocsr() if R == None: R = mmread("training.mtx").tocsr().toarray() else: R = R.toarray() if V == None: V = mmread("validation.mtx").todok() mu = np.finfo(float).eps if normalize: R = normalizer(R,1,0) print "normalizing, min/max", R.min(),R.max() #R = R[0:424,:] if not resume: P = normalizer(np.random.rand(R.shape[0],K),.1,0) Q = normalizer(np.asfortranarray(np.random.rand(K,R.shape[1])),.1,0) #bP,bQ = makeAvgBaseline(R) #print bP,bQ bP = None # np.zeros(R.shape[0])#None bQ = None #np.zeros(R.shape[1])#None#(R > 0).mean(axis=0) #bP,bQ = makeAvgBaseline(R) else: P = np.loadtxt("P.txt") Q = np.loadtxt("Q.txt") bP = np.loadtxt("bP.txt") bQ = np.loadtxt("bQ.txt") print R.shape,P.shape,Q.shape print "starting doFactO" #chunkFactO(R,P,Q,K,steps=steps,chunks=1,discard=0)#chunks=800,discard=0) #R,P,Q,bP,bQ = factO(R,P,Q,K,steps=steps,discard=0,bP=bP,bQ=bQ,beta=beta,betaO=betaO) rmses,maes,errs = [],[],[] def validation(P,Q,bP,bQ): rmse,mae,err = validate(T=R,V=V,P=P,Q=Q,bP=bP,bQ=bQ) rmses.append(rmse) maes.append(mae) errs.append(err) R,P,Q,bP,bQ,t_rmses = sigFactO(R,P,Q,K,bP=bP,bQ=bQ,steps=steps,discard=0.0,beta=beta,betaO=betaO,mean_center=mean_center,doBias=doBias,validate=validation,every=every,doFactors=doFactors,biasSteps=biasSteps) if normalize: R = renormalizer(R,1,0,5,0) dumparrays(R,P,Q,bP,bQ) return t_rmses,rmses,maes,errs
def main(): import os import logging import subprocess from optparse import OptionParser import numpy as np from scipy.io import mmread from mrec import save_recommender from mrec.mf.recommender import MatrixFactorizationRecommender from filename_conventions import get_modelfile logging.basicConfig(level=logging.INFO,format='[%(asctime)s] %(levelname)s: %(message)s') parser = OptionParser() parser.add_option('--factor_format',dest='factor_format',help='format of factor files tsv | mm (matrixmarket) | npy (numpy array)') parser.add_option('--user_factors',dest='user_factors',help='user factors filepath') parser.add_option('--item_factors',dest='item_factors',help='item factors filepath') parser.add_option('--train',dest='train',help='filepath to training data, just used to apply naming convention to output model saved here') parser.add_option('--outdir',dest='outdir',help='directory for output') parser.add_option('--description',dest='description',help='optional description of how factors were computed, will be saved with model so it can be output with evaluation results') (opts,args) = parser.parse_args() if not opts.factor_format or not opts.user_factors or not opts.item_factors \ or not opts.outdir: parser.print_help() raise SystemExit model = MatrixFactorizationRecommender() logging.info('loading factors...') if opts.factor_format == 'npy': model.U = np.load(opts.user_factors) model.V = np.load(opts.item_factors) elif opts.factor_format == 'mm': model.U = mmread(opts.user_factors) model.V = mmread(opts.item_factors) elif opts.factor_format == 'tsv': model.U = np.loadtxt(opts.user_factors) model.V = np.loadtxt(opts.item_factors) else: raise ValueError('unknown factor format: {0}'.format(factor_format)) if opts.description: model.description = opts.description logging.info('saving model...') logging.info('creating output directory {0}...'.format(opts.outdir)) subprocess.check_call(['mkdir','-p',opts.outdir]) modelfile = get_modelfile(opts.train,opts.outdir) save_recommender(model,modelfile) logging.info('done')
def fit_lightfm_model(): """ Fit the lightFM model returns d_user_pred, list_user, list_coupon list_coupon = list of test coupons list_user = list of user ID d_user_pred : key = user, value = predicted ranking of coupons in list_coupon """ #Load data Mui_train = spi.mmread("../Data/Data_translated/biclass_user_item_train_mtrx.mtx") uf = spi.mmread("../Data/Data_translated/user_feat_mtrx.mtx") itrf = spi.mmread("../Data/Data_translated/train_item_feat_mtrx.mtx") itef = spi.mmread("../Data/Data_translated/test_item_feat_mtrx.mtx") #Print shapes as a check print "user_features shape: %s,\nitem train features shape: %s,\nitem test features shape: %s" % (uf.shape, itrf.shape, itef.shape) #Load test coupon and user lists cplte = pd.read_csv("../Data/Data_translated/coupon_list_test_translated.csv") ulist = pd.read_csv("../Data/Data_translated/user_list_translated.csv") list_coupon = cplte["COUPON_ID_hash"].values list_user = ulist["USER_ID_hash"].values #Build model no_comp, lr, ep = 10, 0.01, 5 model = LightFM(no_components=no_comp, learning_rate=lr, loss='warp') model.fit_partial(Mui_train, user_features = uf, item_features = itrf, epochs = ep, num_threads = 4, verbose = True) test = sps.csr_matrix((len(list_user), len(list_coupon)), dtype = np.int32) no_users, no_items = test.shape pid_array = np.arange(no_items, dtype=np.int32) #Create and initialise dict to store predictions d_user_pred = {} for user in list_user : d_user_pred[user] = [] # Loop over users and compute predictions for user_id, row in enumerate(test): sys.stdout.write("\rProcessing user " + str(user_id)+"/ "+str(len(list_user))) sys.stdout.flush() uid_array = np.empty(no_items, dtype=np.int32) uid_array.fill(user_id) predictions = model.predict(uid_array, pid_array,user_features = uf, item_features = itef, num_threads=4) user = str(list_user[user_id]) # apply MinMaxScaler for blending later on MMS = MinMaxScaler() pred = MMS.fit_transform(np.ravel(predictions)) d_user_pred[user] = pred # Pickle the predictions for future_use d_pred = {"list_coupon" : list_coupon.tolist(), "d_user_pred" : d_user_pred} with open("../Data/Data_translated/d_pred_lightfm.pickle", "w") as f: pickle.dump(d_pred, f, protocol = pickle.HIGHEST_PROTOCOL) return d_user_pred, list_user, list_coupon
def main(): train_tfidf = sio.mmread(tfidf_train_file) test_tfidf = sio.mmread(tfidf_test_file) svd = TruncatedSVD(400) svd_X_train = svd.fit_transform(train_tfidf) svd_X_test = svd.transform(test_tfidf) sio.mmwrite('train_tfidf_2013_svd_400_mtx', svd_X_train) sio.mmwrite('test_tfidf_svd_400_mtx', svd_X_test)
def main(): FORMAT = '%(asctime)s %(levelname)s %(message)s' logging.basicConfig(format=FORMAT) logging.getLogger().setLevel(logging.INFO) args = parse_args() lang_map = {i: fn for i, fn in enumerate(sorted(listdir(args.lang_map)))} if args.train.endswith('.mtx'): mtx = mmread(args.train).todense() t_mtx = mmread(args.test).todense() else: with open(args.train) as stream: mtx = np.loadtxt(stream, np.float64) with open(args.test) as stream: t_mtx = np.loadtxt(stream, np.float64) labels = np.ravel(mtx[:, 0]) test_labels = t_mtx[:, 0] test_mtx = t_mtx[:, 1:] if args.scale: train = scale(mtx[:, 1:], with_mean=False) else: train = mtx[:, 1:] kwargs = {} for a in args.params: k, v = a.split('=') try: v = int(v) except: pass kwargs[k] = v r = Representation(args.encoder, args.classifier, **kwargs) r.encode(train) logging.info('Matrix encoded') r.train_classifier(labels) logging.info('Model trained') acc = 0 N = 0 for vec_ in test_mtx: vec = np.ravel(vec_) cl = r.classify_vector(vec, with_probs=args.with_probs) try: lab = test_labels[N, 0] except IndexError: lab = test_labels[N] N += 1 if args.with_probs: guess = max(enumerate(cl[0, :]), key=lambda x: x[1])[0] print('{0}\t{1}\t{2}'.format('\t'.join(map(str, cl[0, :])), lang_map[guess], lang_map[int(lab)])) else: try: guess = int(cl[0, 0]) except IndexError: guess = int(cl + 0.5) print('{0}\t{1}'.format(lang_map[guess], lang_map[int(lab)])) if int(guess) == int(lab): acc += 1
def create_tox21(sparsity_cutoff, validation_fold, dtype=np.float32, download_directory=_DATA_DIRECTORY): urlbase = "http://www.bioinf.jku.at/research/deeptox/" dst = os.path.join(download_directory, "raw") fn_x_tr_d = _download_file(urlbase, "tox21_dense_train.csv.gz", dst) fn_x_tr_s = _download_file(urlbase, "tox21_sparse_train.mtx.gz", dst) fn_y_tr = _download_file(urlbase, "tox21_labels_train.csv", dst) fn_x_te_d = _download_file(urlbase, "tox21_dense_test.csv.gz", dst) fn_x_te_s = _download_file(urlbase, "tox21_sparse_test.mtx.gz", dst) fn_y_te = _download_file(urlbase, "tox21_labels_test.csv", dst) cpd = _download_file(urlbase, "tox21_compoundData.csv", dst) y_tr = pd.read_csv(fn_y_tr, index_col=0) y_te = pd.read_csv(fn_y_te, index_col=0) x_tr_dense = pd.read_csv(fn_x_tr_d, index_col=0).values x_te_dense = pd.read_csv(fn_x_te_d, index_col=0).values x_tr_sparse = io.mmread(fn_x_tr_s).tocsc() x_te_sparse = io.mmread(fn_x_te_s).tocsc() # filter out very sparse features sparse_col_idx = ((x_tr_sparse > 0).mean(0) >= sparsity_cutoff).A.ravel() x_tr_sparse = x_tr_sparse[:, sparse_col_idx].A x_te_sparse = x_te_sparse[:, sparse_col_idx].A dense_col_idx = np.where(x_tr_dense.var(0) > 1e-6)[0] x_tr_dense = x_tr_dense[:, dense_col_idx] x_te_dense = x_te_dense[:, dense_col_idx] # The validation set consists of those samples with # cross validation fold #5 info = pd.read_csv(cpd, index_col=0) f = info.CVfold[info.set != "test"].values idx_va = f == float(validation_fold) # normalize features from sklearn.preprocessing import StandardScaler s = StandardScaler() s.fit(x_tr_dense[~idx_va]) x_tr_dense = s.transform(x_tr_dense) x_te_dense = s.transform(x_te_dense) x_tr_sparse = np.tanh(x_tr_sparse) x_te_sparse = np.tanh(x_te_sparse) x_tr = np.hstack([x_tr_dense, x_tr_sparse]) x_te = np.hstack([x_te_dense, x_te_sparse]) return ( x_tr[~idx_va].astype(dtype, order="C"), y_tr[~idx_va].values.astype(dtype, order="C"), x_tr[idx_va].astype(dtype, order="C"), y_tr[idx_va].values.astype(dtype, order="C"), x_te.astype(dtype, order="C"), y_te.values.astype(dtype, order="C"), )
def validate(trunc = False,T = None,V = None,doRound=False,activation=sigmoid,P=None,Q=None,bP=None,bQ=None): if T == None: Rtraining = mmread('training.mtx').tocsr() else: Rtraining = T if V == None: R = mmread('validation.mtx').todok() else: R = V.todok() mean = (Rtraining.sum()) / (Rtraining > 0).sum() if not (P != None or Q != None or bP != None or bQ != None): P,Q,bP,bQ = np.loadtxt("P.txt"),np.loadtxt("Q.txt"),np.loadtxt("bP.txt"),np.loadtxt("bQ.txt") print R.shape,P.shape,Q.shape i = 0 sum = 0 sumAbs = 0 lte1 = 0 sumlte1 = 0 errors = [] for k,v in R.items(): g = bP[k[0]] + bQ[k[1]] + np.dot(P[k[0],:],Q[:,k[1]]) #if trunc: # g = min(1,max(5,g)) #for i in xrange(P.shape[1]): # g += (P[k[0],i]) * (Q[i,k[1]]) # # if trunc: # g = max(1,min(g,5)) g = activation(mean + g) g = renormalizefloat(g,1,0,5,0) if doRound: g = round(g) e = (v - g)**2 sumAbs += math.sqrt((v - g)**2) errors.append(e) if e < 1.00001: lte1 += 1 sumlte1 += e sum += e #if e > 5: #print i,v,g,e i+=1 rmse = math.sqrt(sum/R.nnz) mae = sumAbs / R.nnz print "rmse",rmse print "mae",sumAbs / R.nnz print "lte1",lte1,len(R.items()), lte1/float(len(R.items())) print "lte1 rmse",math.sqrt((sumlte1 +1) / (lte1+1)) print "validation mean",mean return rmse,mae,np.array(errors)
def create_bars (h1, E, frict, damp, formulation): # compute all eigenvalues and eigenvectors if formulation == 'RO': pt0 = 'out/16-bars/MK_%g_%g_%g_%g'%(h1, E, frict, damp) sl0 = SOLFEC ('DYNAMIC', 1E-3, pt0) bl0 = BULK_MATERIAL (sl0, model = 'KIRCHHOFF', young = E, poisson = PoissonRatio, density = MassDensity) bod = BODY (sl0, 'FINITE_ELEMENT', COPY (mesh), bl0) eval = [] # selected eigenvalue list evec = [] # selected eigenvector list (BODY command takes a tuple (eval, evec) argument for the RO formulation) vsel = range (0, 32) if 0: BODY_MM_EXPORT (bod, pt0+'/M.mtx', pt0+'/K.mtx') M = mmread (pt0+'/M.mtx').todense() K = mmread (pt0+'/K.mtx').todense() for j in range (0, K.shape[1]): for i in range (j+1, K.shape[0]): K [j, i] = K [i, j] # above diagonal = below diagonal x, y = eigh (K, M) # this produces y.T M y = 1 and y.T K y = x */ for j in vsel: eval.append (x[j].real) for z in y[:,j]: evec.append (z.real) else: data0 = MODAL_ANALYSIS (bod, 45, pt0 + '/modal.data', verbose = 'ON', abstol = 1E-14) ndofs = mesh.nnod * 3 for j in vsel: eval.append (data0[0][j]) for k in range (j*ndofs,(j+1)*ndofs): evec.append (data0[1][k]) data = (eval, evec) # 16 bars domain sl2 = SOLFEC ('DYNAMIC', h1, 'out/16-bars/%s_%g_%g_%g_%g'%(formulation, h1, E, frict, damp)) SURFACE_MATERIAL (sl2, model = 'SIGNORINI_COULOMB', friction = frict, restitution = 0.0) bl2 = BULK_MATERIAL (sl2, model = 'KIRCHHOFF', young = E, poisson = PoissonRatio, density = MassDensity) GRAVITY (sl2, (0, 0, -9.8)) for i in range (0, nw): for j in range (0, nw): shp = COPY (mesh) TRANSLATE (shp, ((1-nw)*0.05+0.1*i, (1-nw)*0.05+0.1*j, 0)) if formulation == 'RO': bd2 = BODY (sl2, 'FINITE_ELEMENT', shp, bl2, form = formulation, modal = data) bd2.scheme = 'DEF_LIM' bd2.damping = damp elif formulation == 'BC': bd2 = BODY (sl2, 'FINITE_ELEMENT', shp, bl2, form = formulation) bd2.scheme = 'DEF_LIM' bd2.damping = damp else: bd2 = BODY (sl2, 'RIGID', shp, bl2) BODY (sl2, 'OBSTACLE', COPY (obsm), bl2) return sl2
def __init__(self,train_file,test_file): """ Read datasets from the specified files. """ train = mmread(train_file) test = mmread(test_file) train = train.tocsc() test = test.tocsc() self.trainXList = [train] self.testXList = [test]
def load_or_create_matrices(): try: csr_sparse_ing = spio.mmread("csr_sparse_ing.mtx") except IOError: csr_sparse_ing = create_csr_sparse_ing() csr_filtered_ing = [] for i in np.arange(1, 11): try: csr_filtered_ing.append(spio.mmread("csr_filtered_ing" + str(i) + ".mtx")) except IOError: csr_filtered_ing.append(create_filtered_csr_ing(csr_sparse_ing, i)) return csr_sparse_ing, csr_filtered_ing
def main(X_fname, Y_fname, result_fname=None): le = LabelEncoder() moves = pandas.read_csv(Y_fname, index_col=0) Y = moves.values.ravel() Y = le.fit_transform(Y) X = io.mmread(X_fname) print X.shape, Y.shape, len(le.classes_) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33) xg_train = xgboost.DMatrix( X_train, label=y_train) xg_test = xgboost.DMatrix(X_test, label=y_test) param = {} # use softmax multi-class classification param['objective'] = 'multi:softprob' param['eta'] = 0.002 param['max_depth'] = 7 param['nthread'] = 7 param['num_class'] = len(le.classes_) param['eval_metric'] = 'merror' evals = [ (xg_train, 'train'), (xg_test, 'eval') ] # Train xgboost print "Training" t1 = time.time() bst = xgboost.train(param, xg_train, 500, evals, early_stopping_rounds=3) t2 = time.time() print t2-t1 if result_fname is None: result_fname = str(datetime.now()) bst.save_model("%s.bst"%result_fname)
def main(argv): assert len(argv) == 2, "Usage: ./%s NAME.mm" % argv[0] mm_filename = argv[1] x = mmread(mm_filename) w, h = matplotlib.figure.figaspect(x) fig = plt.figure(figsize=(w,h)) ax = fig.add_subplot(111) if SORT: print "Re-arranging rows." xd = x.todense() m, n = xd.shape for i in range(0,m): for j in range(i,m): if xd[j].tolist() > xd[i].tolist(): swap(xd, i, j) x = xd ax.spy(x, markersize=1) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) extent = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) fig.savefig("%s.pdf" % mm_filename[:-3], bbox_inches=extent)
def load_sparse_matrix(input_format,filepath): """ Load a scipy.sparse.csr_matrix from an input file of the specified format. Parameters ---------- input_format : str Specifies the file format: - tsv - csv - mm (MatrixMarket) - npz (scipy.sparse.csr_matrix serialized with mrec.sparse.savez()) - fsm (mrec.sparse.fast_sparse_matrix) filepath : str The file to load. """ if input_format == 'tsv': return loadtxt(filepath) elif input_format == 'csv': return loadtxt(filepath,delimiter=',') elif input_format == 'mm': return mmread(filepath).tocsr() elif input_format == 'npz': return loadz(filepath).tocsr() elif input_format == 'fsm': return fast_sparse_matrix.load(filepath).X raise ValueError('unknown input format: {0}'.format(input_format))
def get_sample_data(n_sess, full_brain=False, subj=1): """ Download the data for the current session and subject Parameters ---------- n_sess: int number of session, one of {0, 1, 2, 3, 4} subj: int number of subject, one of {1, 2} """ DIR = tempfile.mkdtemp() ds = np.DataSource(DIR) BASEDIR = 'http://fa.bianp.net/projects/hrf_estimation/data' BASEDIR_COMMON = BASEDIR + '/data_common/' if full_brain: BASEDIR += '/full_brain' BASEDIR_SUBJ = BASEDIR + '/data_subj%s/' % subj event_matrix = io.mmread(ds.open( BASEDIR_COMMON + 'event_matrix.mtx')).toarray() print('Downloading BOLD signal') voxels = np.load(ds.open( BASEDIR_SUBJ + 'voxels_%s.npy' % n_sess)) # print('Downloading Scatting Stim') # scatt_stim = np.load(ds.open( # BASEDIR_SUBJ + 'scatt_stim_%s.npy' % n_sess)) em = sparse.coo_matrix(event_matrix) fir_matrix = utils.convolve_events(event_matrix, np.eye(HRF_LENGTH)) events_train = sparse.block_diag([event_matrix] * 5).toarray() conditions_train = sparse.coo_matrix(events_train).col onsets_train = sparse.coo_matrix(events_train).row return voxels, conditions_train, onsets_train
def __init__(self, interest_metric, dataset_dir, store_atmost): self.interest_metric = interest_metric self.dataset_dir = dataset_dir self.store_atmost = store_atmost self.u_r_t = mmread(join(dataset_dir, TIMED_INTERESTS_FN)).transpose() self.prediction_times = cPickle.load(open(join(dataset_dir, PREDICTION_TIMES_FN),"rb")) self.NU, self.NR = self.u_r_t.shape
def check_buy(): begin_date = datetime.datetime(2014, 11, 18) end_date = datetime.datetime(2014, 12, 17) data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) cf_dir = utils.get_data_dir(utils.FLAG_CF) frate_str = "%s/rate_%s_%s" % (cf_dir, begin_date.strftime("%m%d"), end_date.strftime("%m%d")) user_ids_list, item_ids_list, user_ids_dict, item_ids_dict = bcbf.compute_user_item_list(frate_str) rate_matrix = io.mmread("data") rate_matrix = rate_matrix.tolil() buy_date = datetime.datetime(2014, 12, 18) fbuy_str = "%s/data_buy_%s" % (data_dir, buy_date.strftime("%m%d")) count = 0 with open(fbuy_str) as fin: for line in fin: cols = line.strip().split(",") user = cols[0] item = cols[1] if item in item_ids_dict and user in user_ids_dict: u_ix = user_ids_dict[user] i_ix = item_ids_dict[item] print >> sys.stdout, "%s,%s,%d" % (user, item, rate_matrix[(u_ix, i_ix)]) else: count += 1
def get_content_similarity_scores(readmes, dataset_dir, profile="tfidf", similarity="cos"): """Return CSR matrix of similarity_{r,r} for all r in `readmes`. `dataset_dir` the directory where the similarity scores are `profile` bool or tfidf `similarity` cos or ijd (inverse Jacquard Distance) """ if profile == "tfidf": sim_fn = join(dataset_dir, TF_IDF_FN) if exists(sim_fn): return mmread(sim_fn).tocsr() if profile == "bool": #readme_words = COUNTVECTORIZER readmes pass else: tfidf = TfidfVectorizer(input='file', #sublinear_tf=True, max_df=0.5, stop_words='english', decode_error="ignore") #max_df=0.5: if a word occurs in more than half of the readmes it is # ignored readme_words = tfidf.fit_transform(readmes) if similarity == "cos": similarity_scores = csr_matrix(cosine_similarity(readme_words)) else: # similarity_scores = csr_matrix(ijd(readme_words)) pass mmwrite(sim_fn, similarity_scores, comment=profile+"_"+similarity+"_similarity_{r,r}") return similarity_scores
def loadMatrix(name): name=name.split("/").pop() fileName=localRoot+"/"+name+".mtx" if os.path.exists(fileName): return ios.mmread(fileName).tocsc() else: print "Matrix not found! " + fileName
def compute(directory, qdevel, cdevel, constr_dev): qdevel = np.array(sio.mmread(qdevel).todense()) cdevel = np.array(sio.mmread(cdevel).todense()).transpose() constrdevel = np.array(sio.mmread(constr_dev).todense()) os.chdir(directory) for file in glob.glob('*.npy'): flname = file.split('.npy')[0] rep = np.load(file) sio.mmwrite(str(flname)+'.mtx', ss.coo_matrix(rep)) mat = np.dot(qdevel, np.dot(rep, cdevel)) score = evalEdge(mat, constrdevel) fle = open('new_output.txt', 'a') fle.write('file: '+flname+'\t'+str(score)+'\n') fle.close() return 'Done'
def generate_training_validating_rt(version, r_to_i, u_to_i, r_u_t_fn, split, is_test=False): """Function called to generate training.mtx, validating.mtx and recommendation_times.npy """ if is_test: data_processed_dir = join(PROCESSED_DATA_DIR, "test") else: data_processed_dir = PROCESSED_DATA_DIR u_r_times = mmread(r_u_t_fn).transpose().tolil() nu, nr = u_r_times.shape training_matrix = lil_matrix((nu,nr), dtype=np.int_) validating_matrix = lil_matrix((nu,nr), dtype=np.int_) recommendation_times = np.zeros(nu, dtype=np.int_) valid_repositories_table = version+"_repositories" cursor = getDB(is_test=is_test).cursor() for uidx in xrange(nu): cursor.execute("""SELECT vr.id FROM repositories as r, {} as vr WHERE vr.id = r.id AND r.owner_id = %s """.format(valid_repositories_table), (u_to_i.r(uidx),)) owned_rs = np.array([r_to_i[r[0]] for r in cursor]) interests = u_r_times.getrowview(uidx) interested_rs = np.unique(interests.tocoo().col) ext_rs = np.setdiff1d(interested_rs, owned_rs, assume_unique=True) times = interests[0,ext_rs].toarray()[0] sorted_indices = times.argsort() threshold = int(floor(split*len(ext_rs))) training = [ext_rs[i] for i in sorted_indices[:threshold]] threshold_time = times[sorted_indices[threshold]] training += [r for r in owned_rs if interests[0,r] < threshold_time] validating = [ext_rs[i] for i in sorted_indices[threshold:]] for t in training: training_matrix[uidx,t] = 1 for v in validating: validating_matrix[uidx,v] = 1 recommendation_times[uidx] = threshold_time comment=""" Training interests are before validating interests. The split is as follows: Training: all internals before first last 1/3 externals + first 2/3 externals Testing: last 1/3 externals""" version_dir = join(data_processed_dir, version) tfn = join(version_dir, TRAINING_FN) vfn = join(version_dir, VALIDATING_FN) rtfn = join(version_dir, RECOMMENDATION_TIMES_FN) mmwrite(tfn, training_matrix, comment=comment) mmwrite(vfn, validating_matrix, comment=comment) np.save(rtfn, recommendation_times) return (tfn, vfn, rtfn)
def load_rate_data(fin_str,user_ids_dict,item_ids_dict,theta = 0.0): #设为2014-12-17 23 """ split_date = datetime.datetime(2014,12,17,23) rate_matrix = sparse.lil_matrix((len(user_ids_dict),len(item_ids_dict))) i = 0 with open(fin_str) as fin: for line in fin: #userid,itemid,cate,rate,lasttime i +=1 ''' if i%100 == 0: print i ''' cols = line.strip().split(',') #cur_date = datetime.datetime.strptime(cols[-1],'%Y-%m-%s %H:%M:%S') cur_date = datetime.datetime.strptime(cols[-1],'%Y-%m-%d %H') days_delta = (split_date-cur_date).days #带时间衰减的分数 rate = int(cols[-2]) * math.exp(-theta * days_delta) u_ix = user_ids_dict[cols[0]] i_ix = item_ids_dict[cols[1]] rate_matrix[u_ix,i_ix] = rate io.mmwrite('rate_data_buy',rate_matrix) print >> sys.stdout,rate_matrix.nnz """ rate_matrix = io.mmread('rate_data_buy') rate_matrix = rate_matrix.tolil() print rate_matrix.shape print rate_matrix[1,[1,2]].toarray() return rate_matrix
def main(): """ Main entry point to script to perform kmeans. Returns: - `0` or `1` on success or failure respectively. - Saves `centroids`, `centroiddict`, and `clusters` in working dir. """ parser = gen_args() args = parser.parse_args() sessionid = args.sessionid data = spio.mmread(args.data).tocsc() logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) if args.verbose: logger.setLevel(logging.DEBUG) if args.k: k = args.k kmeans = KMeans(data, k, args.n, args.delta, args.randomcentroids, \ args.classical, args.verbose) result = kmeans.run() clusters = result['clusters'] centroids = result['centroids'] centroiddict = result['centroiddict'] cPickle.dump(clusters, open("data_clusters_" + sessionid + '.pck', 'w')) cPickle.dump(centroiddict, open("centroid_dict_" + \ sessionid + '.pck', 'w')) spio.mmwrite(open("data_centroids_" + sessionid + '.mtx', 'w'), \ centroids, comment="CSC Matrix", field='real') logger.info(" %d Clusters Generated ", len(clusters)) return 0
def reduce_to_eig_problem(self): n = self.n range_n = range(n) self.W.export_mtx(self.file_tmp) self.W.delete_rowcols(zeros(n, dtype=int)) self.W.compress() del self.W self.W = 0 collect() self.W = csr_matrix(mmread(self.file_tmp)) # D is a diagonal matrix with sum_j(W[i, j]) at ith diag element data = zeros(n, dtype=float16) for i in range_n: data[i] = self.W.getrow(i).sum() D = csr_matrix((data, (range_n, range_n)), shape=(n, n), dtype=float16) # D^(-1/2) data2 = zeros(n, dtype=float16) for i in range_n: data2[i] = 1 / sqrt(data[i] + 1E-4) D_minus_1_2 = csr_matrix((data2, (range_n, range_n)), shape=(n, n), dtype=float16) A = D_minus_1_2 * (D - self.W) * D_minus_1_2 return A
import torch import sys sys.path.append('.') sys.path.append('build') import pygunrock as pyg import numpy as np from time import time from tqdm import trange from scipy.io import mmread np.set_printoptions(linewidth=240) # Load graph csr = mmread('chesapeake.mtx').tocsr() # csr = mmread('cit-Patents-sub.mtx').tocsr() n_vertices = csr.shape[0] n_edges = csr.nnz # Convert data to torch + move to GPU indptr = torch.IntTensor(csr.indptr).cuda() indices = torch.IntTensor(csr.indices).cuda() data = torch.FloatTensor(csr.data).cuda() # Allocate memory for output distances = torch.zeros(csr.shape[0]).float().cuda() predecessors = torch.zeros(csr.shape[0]).int().cuda() # Create graph
if __name__ == "__main__": solver_names = ['tpfa', 'vem', 'rt0', 'mpfa'] refinements = ['0', '1', '2'] for refinement in refinements: for solver in solver_names: folder = "./" + solver + "_results_" + refinement + "/" # 1) matrix and grid information file_in = folder + "info.txt" data = read_csv(file_in)[0] data = map(int, map(float, data[:0:-1])) file_in = folder + "matrix.mtx" A = mmread(file_in) data.append(A.shape[0]) data.append(A.nnz) with open(solver + "_results.csv", 'a+') as csvfile: writer = csv.writer(csvfile) writer.writerow(data) # 2) $\int_{\Omega_3,3} \porosity c \, \mathrm{d}x$ $([-])$ vs. time field = "tracer" step = 101 transport_root = folder + "tracer_3_" # in this file the constant data are saved file_in = folder + "sol_3_000000.vtu"
import scipy.sparse as sparse from scipy.io import mmread import petsc4py petsc4py.init() from petsc4py import PETSc import numpy as np # read mtx matrix_name = "bcsstk06.mtx" matrix = mmread(matrix_name) matrix = matrix.toarray() N = matrix.shape[0] # create PETSc comm comm = PETSc.COMM_WORLD size = comm.getSize() rank = comm.getRank() # create PETSc vectors x = PETSc.Vec().create(comm=comm) x.setSizes(N) x.setFromOptions() b = x.duplicate() u = x.duplicate() rstart, rend = x.getOwnershipRange() nlocal = x.getLocalSize() # Create PETSc matrix A = PETSc.Mat().create(comm=comm)
# limitations under the License. import numpy as np from scipy.io import mmread from scipy.sparse import hstack import morpheus.normalized_matrix as nm from morpheus.algorithms.logistic_regression import NormalizedLogisticRegression s = np.matrix( np.genfromtxt('./data/Walmart/MLSraw.txt', skip_header=True, dtype=int)).T join_set1 = np.genfromtxt('./data/Walmart/MLFK1.csv', skip_header=True, dtype=int) r1 = mmread('./data/Walmart/MLR1Sparse.txt') join_set2 = np.genfromtxt('./data/Walmart/MLFK2.csv', skip_header=True, dtype=int) r2 = mmread('./data/Walmart/MLR2Sparse.txt') k = [join_set1 - 1, join_set2 - 1] T = hstack((s, r1.tocsr()[k[0]], r2.tocsr()[k[1]])) Y = np.matrix( np.genfromtxt('./data/Walmart/MLY.csv', skip_header=True, dtype=int)).T w_init = np.matrix(np.random.randn(T.shape[1], 1)) w_init2 = np.matrix(w_init, copy=True) gamma = 0.000001 iterations = 20
def Rp(v): """ Gradient """ result = 2 * (A * v - R(v) * B * v) / dot(v.T, B * v) #print "Rp: ", result return result def Rpp(v): """ Hessian """ result = 2 * (A - R(v) * B - outer(B * v, Rp(v)) - outer(Rp(v), B * v)) / dot(v.T, B * v) #print "Rpp: ", result return result A = io.mmread('nos4.mtx') # clustered eigenvalues #B = io.mmread('bcsstm02.mtx.gz') #A = io.mmread('bcsstk06.mtx.gz') # clustered eigenvalues #B = io.mmread('bcsstm06.mtx.gz') n = A.shape[0] B = speye(n, n) random.seed(1) v_0 = random.rand(n) print("try fmin_bfgs") full_output = 1 data = [] v,fopt, gopt, Hopt, func_calls, grad_calls, warnflag, allvecs = \ optimize.fmin_bfgs(R,v_0,fprime=Rp,full_output=full_output,retall=1) if warnflag == 0: plt.semilogy(np.arange(0, len(data)), data)
def SubNxLost(ts, lost_nodes): Nx = nx.from_numpy_matrix( mmread(MakeSample_node_prediction_lost_InputDir + '/adjacency' + str(ts - 1)).toarray()) return nx.Graph(Nx.edges(lost_nodes))
parser.add_argument('--cuda', help='If use GPU', action='store_true') parser.add_argument('--test', help='If test', action='store_true') parser.add_argument('--sparse', help='If use sparse matrix', action='store_true') args = parser.parse_args() # Prepare the training set print('Loading {} data ...'.format('pos')) if not args.sparse: file = os.path.join(args.input, 'pos/feature.npy') with open(file, 'rb') as f: pos_x = np.load(f) else: file = os.path.join(args.input, 'pos/feature.mtx') pos_x = io.mmread(file).tocsc() print('Pos data: ' + str(pos_x.shape[0])) print('Loading {} data ...'.format('neg')) if not args.sparse: file = os.path.join(args.input, 'neg/feature.npy') with open(file, 'rb') as f: neg_x = np.load(f) else: file = os.path.join(args.input, 'neg/feature.mtx') neg_x = io.mmread(file).tocsc() print('Neg data: ' + str(neg_x.shape[0])) if args.feature > 0: pos_x = pos_x[:, :args.feature] neg_x = neg_x[:, :args.feature]
def train_test_split(X: sp.csr_matrix): row_indices = get_unique_nonzero_indices(X) train_data = [] test_data = [] for row_index in row_indices: col_indices = X.getrow(row_index).indices test_index = np.random.choice(col_indices, 1)[0] train_data.extend([(row_index, col_index) for col_index in col_indices if col_index != test_index]) test_data.append((row_index, test_index)) return train_data, test_data def save_pair_data(data: list, file_path): with open(file_path, 'w') as f: f.write("uid, tid\n") for uid, tid in data: f.write("%d %d\n" % (uid, tid)) return if __name__ == '__main__': print("Reading .mtx file...") mtx_data: sp.csr_matrix = mmread(percent_subset_data_path).tocsr() train_data, test_data = train_test_split(mtx_data) train_data_path = "30music_train.txt" save_pair_data(train_data, train_data_path) test_data_path = "30music_test.txt" save_pair_data(test_data, test_data_path)
def LoadMTX(path): mtx = mmread(str(path)) hypergraph = FromSparseMatrix(mtx.T) return hypergraph
from scipy.io import mminfo, mmread A = mmread('MatrixMarket_MHM_subproblem.mtx') import matplotlib matplotlib.use('PDF') import matplotlib.pyplot as plt plt.spy(A, markersize=1) plt.savefig('MatrixMarket_MHM_subproblem')
def link_prediction(n_appeared, p_appeared, n_disappeared, p_disappeared, n_new, p_new, n_lost, p_lost, is_train, is_valid, is_test): probability_appeared_InputDir, num_appeared_InputDir = get_appeared_InputDirs( p_appeared, n_appeared) appeared_edge_pred_set_list, appeared_edge_true_set_list, recall_appeared_edge, precision_appeared_edge, f1_score_appeared_edge = get_component_result( "edge", probability_appeared_InputDir, num_appeared_InputDir, all_node_num, is_train, is_valid, is_test) probability_disappeared_InputDir, num_disappeared_InputDir = get_disappeared_InputDirs( p_disappeared, n_disappeared) disappeared_edge_pred_set_list, disappeared_edge_true_set_list, recall_disappeared_edge, precision_disappeared_edge, f1_score_disappeared_edge = get_component_result( "edge", probability_disappeared_InputDir, num_disappeared_InputDir, all_node_num, is_train, is_valid, is_test) probability_new_InputDir, num_new_InputDir = get_new_InputDirs( p_new, n_new) new_edge_pred_set_list, new_edge_true_set_list, recall_new_edge, precision_new_edge, f1_score_new_edge = get_component_result( "edge", probability_new_InputDir, num_new_InputDir, all_node_num + n_expanded, is_train, is_valid, is_test) probability_lost_InputDir, num_lost_InputDir = get_lost_InputDirs( p_lost, n_lost) lost_node_pred_set_list, lost_node_true_set_list, recall_lost_node, precision_lost_node, f1_score_lost_node = get_component_result( "node", probability_lost_InputDir, num_lost_InputDir, all_node_num, is_train, is_valid, is_test) lost_edge_pred_set_list, lost_edge_true_set_list, recall_lost_edge, precision_lost_edge, f1_score_lost_edge = get_edge_connected_lost_node( probability_lost_InputDir, lost_node_pred_set_list, lost_node_true_set_list, is_train, is_valid, is_test) # 総合結果を計算 # 「tのlink集合 」 + 「appeared (link) 集合」+ 「new (link) 集合」- 「disappeared (link) 集合」- 「lost (link) 集合」 ts_list = get_ts_list(probability_appeared_InputDir, is_train, is_valid, is_test) ts_c_pred_A = [] for i, ts in enumerate(ts_list): ts_train, ts_test, ts_all = TsSplit(ts, L) t_edge_set = set() for edge in nx.from_numpy_matrix( mmread(MakeSample_node_prediction_lost_InputDir + '/adjacency' + str(ts_train[-1])).toarray()).edges: t_edge_set.add(frozenset({edge[0], edge[1]})) appeared_edge_pred_set = appeared_edge_pred_set_list[i] appeared_edge_true_set = appeared_edge_true_set_list[i] assert len( t_edge_set & appeared_edge_true_set) == 0, "tのlink集合とappeared(link)集合は被らない" assert len( t_edge_set & appeared_edge_pred_set) == 0, "tのlink集合とappeared(link)集合は被らない" disappeared_edge_pred_set = disappeared_edge_pred_set_list[i] disappeared_edge_true_set = disappeared_edge_true_set_list[i] assert len(t_edge_set & disappeared_edge_true_set) == len( disappeared_edge_true_set), "tのlink集合とdisappeared(link)集合は被るべき" assert len(t_edge_set & disappeared_edge_pred_set) == len( disappeared_edge_pred_set), "tのlink集合とdisappeared(link)集合は被るべき" new_edge_pred_set = new_edge_pred_set_list[i] new_edge_true_set = new_edge_true_set_list[i] assert len(t_edge_set & new_edge_true_set) == 0, "tのlink集合とnew(link)集合は被らない" assert len(t_edge_set & new_edge_pred_set) == 0, "tのlink集合とnew(link)集合は被らない" lost_node_pred_set = lost_node_pred_set_list[i] lost_edge_pred_set = lost_edge_pred_set_list[i] lost_edge_true_set = lost_edge_true_set_list[i] assert len(t_edge_set & lost_edge_true_set) == len( lost_edge_true_set), "tのlink集合とlost(link)集合は被るべき" assert len(t_edge_set & lost_edge_pred_set) == len( lost_edge_pred_set), "tのlink集合とlost(link)集合は被るべき" pred_set = [set() for _ in range(16)] # appeared : disappeared : new : lost # 何もしない場合 0000 pred_set[0] = t_edge_set # lostのみをbest methodにした時 0001 pred_set[1] = t_edge_set - lost_edge_pred_set pred_set[1] = delete_lost_node(pred_set[1], lost_node_pred_set) # newのみをbest methodにした時 0010 pred_set[2] = t_edge_set | new_edge_pred_set # lostとnewのみをbest methodにした時 0011 pred_set[3] = (t_edge_set | new_edge_pred_set) - lost_edge_pred_set pred_set[3] = delete_lost_node(pred_set[3], lost_node_pred_set) # disappearedのみをbest methodにした時 0100 pred_set[4] = t_edge_set - disappeared_edge_pred_set # disappearedとlostをbest methodにした時 0101 pred_set[5] = (t_edge_set - disappeared_edge_pred_set) - lost_edge_pred_set pred_set[5] = delete_lost_node(pred_set[5], lost_node_pred_set) # disappearedとnewをbest methodにした時 0110 pred_set[6] = (t_edge_set | new_edge_pred_set) - disappeared_edge_pred_set # disappearedとnewとlostをbest methodにした時 0111 pred_set[7] = ((t_edge_set | new_edge_pred_set) - disappeared_edge_pred_set) - lost_edge_pred_set pred_set[7] = delete_lost_node(pred_set[7], lost_node_pred_set) # appearedのみをbest methodにした時 1000 pred_set[8] = t_edge_set | appeared_edge_pred_set # appearedとlostをbest methodにした時 1001 pred_set[9] = (t_edge_set | appeared_edge_pred_set) - lost_edge_pred_set pred_set[9] = delete_lost_node(pred_set[9], lost_node_pred_set) # appearedとnewをbest methodにした時 1010 pred_set[10] = (t_edge_set | appeared_edge_pred_set) | new_edge_pred_set # appearedとnewとlostをbest methodにした時 1011 pred_set[11] = ((t_edge_set | appeared_edge_pred_set) | new_edge_pred_set) - lost_edge_pred_set pred_set[11] = delete_lost_node(pred_set[11], lost_node_pred_set) # appearedとdisappearedのみをbest methodにした時 1100 pred_set[12] = (t_edge_set | appeared_edge_pred_set) - disappeared_edge_pred_set # appearedとdisappearedとlostのみをbest methodにした時 1101 pred_set[13] = ((t_edge_set | appeared_edge_pred_set) - disappeared_edge_pred_set) - lost_edge_pred_set pred_set[13] = delete_lost_node(pred_set[13], lost_node_pred_set) # appearedとdisappearedとnewのみをbest methodにした時 1110 pred_set[14] = ((t_edge_set | appeared_edge_pred_set) | new_edge_pred_set) - disappeared_edge_pred_set # appearedとdisappearedとnewとlostをbest methodにした時 1111 pred_set[15] = (( (t_edge_set | appeared_edge_pred_set) | new_edge_pred_set) - disappeared_edge_pred_set) - lost_edge_pred_set pred_set[15] = delete_lost_node(pred_set[15], lost_node_pred_set) pred_A_list = [] for c_idx in range(16): pred_G = nx.Graph() pred_G.add_edges_from( [tuple(froset) for froset in pred_set[c_idx]]) pred_A = np.array( nx.to_numpy_matrix( pred_G, nodelist=[ node for node in range(all_node_num + n_expanded) ])) pred_A_list.append(pred_A) ts_c_pred_A.append(pred_A_list) return np.array(ts_c_pred_A)
# coding: utf-8 # In[3]: from __future__ import division import scipy as sp import numpy as np from scipy import io import itertools import math import time # In[4]: data = io.mmread("data/netflix_mm_10000_1000") data.shape # In[18]: def RMSE(data, latent): userOffset = 0 movieOffset = data.shape[0] cx = data.tocoo() err = 0 for user, movie, rating in itertools.izip(cx.row, cx.col, cx.data): vUser = latent[user + userOffset] vMovie = latent[movie + movieOffset] err += (vUser.dot(vMovie) - rating)**2 #print "%f %f" % (vUser.dot(vMovie), rating)
import pandas as pd from scipy.io import mmread data_mtx = mmread("salmon_output/alevin/quants_mat.mtx").toarray() cols = open("salmon_output/alevin/quants_mat_cols.txt", "r") rows = open("salmon_output/alevin/quants_mat_rows.txt", "r") cols_list = [] rows_list = [] for line in cols: cols_list.append(line.strip("\n")) for line in rows: rows_list.append(line.strip("\n")) df = pd.DataFrame(data_mtx, columns=cols_list, index=rows_list) df = df.T x = open("final_mtx.csv", "w") x.write(df.to_csv())
from make_poisson import * #---------------------------------------------------------------------------- parser = argparse.ArgumentParser(sys.argv[0]) parser.add_argument('-A,--matrix', dest='A', help='System matrix in MatrixMarket format') parser.add_argument('-f,--rhs', dest='f', help='RHS in MatrixMarket format') parser.add_argument('-n,--size', dest='n', type=int, default=64, help='The size of the Poisson problem to solve when no system matrix is given') parser.add_argument('-o,--out', dest='x', help='Output file name') parser.add_argument('-p,--prm', dest='p', help='AMGCL parameters: key1=val1 key2=val2', nargs='+', default=[]) args = parser.parse_args(sys.argv[1:]) #---------------------------------------------------------------------------- if args.A: A = mmread(args.A) f = mmread(args.f).flatten() if args.f else np.ones(A.rows()) else: A,f = make_poisson(args.n) # Parse parameters prm = {p[0]: p[1] for p in map(lambda s: s.split('='), args.p)} # Create preconditioner P = amg.make_preconditioner(A, prm) print(P) iters = [0] def count_iters(x): iters[0] += 1
from scipy import sparse, io from sklearn import svm from sklearn.model_selection import KFold from sklearn.metrics import precision_recall_fscore_support from sklearn.feature_extraction.text import CountVectorizer from sklearn.feature_selection import chi2, RFE from sklearn.linear_model import LogisticRegression import pickle import matplotlib.pyplot as plt '''NEW SCRIPT''' scores = [] stats_path = "./NEW_STATS_1/ALL_BALANCED" lieb_train = io.mmread('./lieb/lieb_balanced_train.mtx') lieb_train = lieb_train.tocsc() gonz_train = io.mmread('./gonzalez/gonz_balanced_train.mtx') gonz_train = gonz_train.tocsc() bush_train = io.mmread('./bush/bush_balanced_train.mtx') bush_train = bush_train.tocsc() josh_train = io.mmread('./joshi/jc_balanced_train.mtx') josh_train = josh_train.tocsc() train_labels = np.loadtxt('./ddata/balanced_train_labels.txt', dtype=np.int32) all_feat_train = sparse.hstack( (lieb_train, gonz_train[:, -7:], bush_train[:, -8:-4], josh_train[:, -64:-9], josh_train[:, -5:])) model = LogisticRegression()
filename = "" # create variable for output filename outputFile = "APSP_report.txt" # loop over the list for i in graphList: # zero out the inFile filename = "" # create the new one filename = str(i) + "-vertices_degree-6.mtx" # read graph into a SciPy sparse graph A = csc_matrix(sio.mmread(filename), dtype=np.int32) # get the start time start_time = time.time() # run the APSP algorithm on the graph path = floyd_warshall(A) # get the stop time and compute the difference elapsed_time = time.time() - start_time # open the report file in APPEND mode outFile = open(outputFile, "a") # write the runtime for the graph to the file outFile.write("Runtime for %s is %.6f ms\n\n" % (filename, (elapsed_time*1000)))
# ######################## Initialise AnnData ############################### # # ########################################################################### # if not args.loom == "none": adata = scv.read(args.loom) # get directory with metadata + barcodes metadata_dir = args.rdims.split("/")[0] elif not args.dropest_dir == "none": exon_matrix = os.path.join(args.dropest_dir, "exons.mtx.gz") intron_matrix = os.path.join(args.dropest_dir, "introns.mtx.gz") spanning_matrix = os.path.join(args.dropest_dir, "spanning.mtx.gz") exons = io.mmread(exon_matrix).transpose().tocsr() introns = io.mmread(intron_matrix).transpose().tocsr() spanning = io.mmread(spanning_matrix).transpose().tocsr() adata = ad.AnnData(X=exons) adata.layers["spliced"] = adata.X adata.layers["unspliced"] = introns adata.layers["ambiguous"] = spanning adata.obs.index = [ x for x in pd.read_csv(os.path.join(args.dropest_dir, "barcodes.tsv.gz"), header=None)[0].values ] metadata_dir = args.dropest_dir
"weight":J_E, "delay":delay}) nest.CopyModel("static_synapse_hom_w", "inhibitory", {"weight":J_I, "delay":delay}) nest.CopyModel("static_synapse_hom_w", "excitatory-input", {"weight":J_E, "delay":delay}) # ########################################### # Projections # ########################################### A=mmread('../ee.wmat') rows,cols = A.nonzero() if (plastic): nest.Connect(rows+1, cols+1, {"rule": "one_to_one"}, model="excitatory-plastic") else: nest.Connect(rows+1, cols+1, {"rule": "one_to_one"}, model="excitatory-input") A=mmread('../ei.wmat') rows,cols = A.nonzero() nest.Connect(rows+1, cols+N_E+1, {"rule": "one_to_one"}, model="excitatory-input") A=mmread('../ie.wmat') rows,cols = A.nonzero() nest.Connect(rows+1+N_E, cols+1, {"rule": "one_to_one"}, syn_spec="inhibitory") A=mmread('../ii.wmat')
#Setup solver parameters solver_parameters = NT.SolverParameters() solver_parameters.SetConvergeDiff(convergence_threshold) solver_parameters.SetThreshold(threshold) solver_parameters.SetVerbosity(False) #Run the ScipyMatrixGenerator script to generate a random hamiltonian of size rows x rows #Also construct the overlap matrix subprocess.run([ "python3", "ScipyMatrixGenerator.py", '--rows', str(rows), '--density', str(density) ]) #hamiltonian = mmread("hamiltonian.mtx").toarray() hamiltonians.append(mmread("hamiltonian.mtx").toarray()) overlap = sparse.identity(rows, format='coo', dtype='complex') mmwrite("overlap", overlap) ntpoly_hamiltonian = NT.Matrix_ps("hamiltonian.mtx") ntpoly_overlap = NT.Matrix_ps("overlap.mtx") Density = NT.Matrix_ps(ntpoly_hamiltonian.GetActualDimension()) #Compute the density matrix energy_value, chemical_potential = \ NT.DensityMatrixSolvers.TRS2(ntpoly_hamiltonian, ntpoly_overlap, number_of_electrons, Density, solver_parameters) print(chemical_potential) #Output density matrix Density.WriteToMatrixMarket(density_file_out) ntpoly_hamiltonian.WriteToMatrixMarket("test.mtx")
def NodeAttribute(ts): return mmread(InputDir + '/node_attribute' + str(ts)).toarray()
def Nx(ts): return nx.from_numpy_matrix(mmread(InputDir + '/adjacency' + str(ts)).toarray())
weights = model.get_weights() weights = [np.random.permutation(w.flat).reshape(w.shape) for w in weights] model.set_weights(weights) if args.run_on_cpu: with tf.device("/cpu:0"): for i in range(0, 1): encoding_dim = 2 n_hidden_1 = 200 n_hidden_2 = 50 print "Building positive and negative report matrices..." pos_reports = io.mmread('model_0_posreports.mtx') pos_reports = pos_reports.tocsr() neg_reports = io.mmread('model_0_negreports.mtx') neg_reports = neg_reports.tocsr() for reportblock in range(1, 50): print "Procesing", reportblock thispos = io.mmread('model_' + str(reportblock) + '_posreports.mtx') thispos = thispos.tocsr() pos_reports = vstack((pos_reports, thispos)) thisneg = io.mmread('model_' + str(reportblock) + '_negreports.mtx') thisneg = thisneg.tocsr()
#usage: converts binary .mat file to csr file import sys import numpy as np import scipy.io as sio # load the input file matfile = sys.argv[1] mat = sio.mmread(matfile) # symmetricize the matrix mat = mat + mat.transpose() # convert to csr mat = mat.tocsr() # eliminate the diagonal mat.setdiag(0) mat.eliminate_zeros() # output format fgraph outfile = matfile.split('.')[0] + '.graph' f = open(outfile, 'w') #writing list of list in the outfile mlol = [list(line.nonzero()[1]) for line in mat] #getting dimension nv = mat.get_shape()[0] nnz = mat.getnnz() # number of non-zeros in the sparse matrix
#random((K,1)) 随机生成一个0-1之间的Kx1矩阵 #reshape(random((K,1))/10*(np.sqrt(K)),K) 将生成的K行1列矩阵,变换成一个含有10个元素的列表 pu[uid] = np.reshape(random((K, 1)) / 10 * (np.sqrt(K)), K) for pid in range(n_items): qi[pid] = np.reshape(random((K, 1)) / 10 * (np.sqrt(K)), K) #加载模型 # 用户和item的索引 users_index = pickle.load(open("users_index.pkl", 'rb')) items_index = pickle.load(open("items_index.pkl", 'rb')) n_users = len(users_index) n_items = len(items_index) # 用户-物品关系矩阵R user_item_scores = sio.mmread("user_item_scores") # 倒排表 ##每个用户播放的歌曲 user_items = pickle.load(open("user_items.pkl", 'rb')) ##事件参加的用户 item_users = pickle.load(open("item_users.pkl", 'rb')) # 所有用户之间的相似度 #similarity_matrix_users = pickle.load(open("/data/weixin-38664232/my-dataset/users_similarity_playcount.pkl", 'rb')) # 所有item之间的相似度 #similarity_matrix_items = pickle.load(open("/data/weixin-38664232/my-dataset/items_similarity_playcount.pkl", 'rb')) #每个用户的平均打分
from morpheus.algorithms.kmeans import NormalizedKMeans import numpy as np from scipy.io import mmread from scipy.sparse import hstack import morpheus.normalized_matrix as nm s = np.matrix([]) join_set1 = np.genfromtxt('./data/BookCrossing/MLFK1.csv', skip_header=True, dtype=int) num_s = len(join_set1) num_r1 = max(join_set1) r1 = mmread('./data/BookCrossing/MLR1Sparse.txt', ) join_set2 = np.genfromtxt('./data/BookCrossing/MLFK2.csv', skip_header=True, dtype=int) num_s = len(join_set2) num_r2 = max(join_set2) r2 = mmread('./data/BookCrossing/MLR2Sparse.txt', ) Y = np.matrix( np.genfromtxt('./data/BookCrossing/MLY.csv', skip_header=True, dtype=int)).T k = [join_set1 - 1, join_set2 - 1] T = hstack((r1.tocsr()[k[0]], r2.tocsr()[k[1]])) iterations = 1
def configure_and_run_brian2genn(simtime_in_s, num_threads): # ########################################### # Configuration # ########################################### numpy.random.seed(98765) set_device('genn') defaultclock.dt = 0.1*ms #prefs.devices.genn.path="..." alternative to GENN_PATH # ########################################### # Network parameters # ########################################### taum = 20*ms taue = 5*ms taui = 10*ms Vt = -50*mV Vr = -60*mV El = -60*mV Erev_exc = 0.*mV Erev_inh = -80.*mV I = 20. * mvolt # ########################################### # Neuron model # ########################################### eqs = ''' dv/dt = (ge*(Erev_exc-v)+gi*(Erev_inh-v)-(v-El) + I)*(1./taum) : volt (unless refractory) dge/dt = -ge/taue : 1 dgi/dt = -gi/taui : 1 ''' # ########################################### # Population # ########################################### NE = 3200 NI = NE/4 P = NeuronGroup(NE+NI, eqs, threshold='v>Vt', reset='v = Vr', refractory=5*ms, method='euler') P.v = (randn(len(P)) * 5. - 55.) * mvolt Pe = P[:NE] Pi = P[NE:] # ########################################### # Projections # ########################################### we = 0.6 # excitatory synaptic weight (voltage) wi = 6.7 # inhibitory synaptic weight conn_ee = Synapses(Pe,Pe,model="w:1",on_pre='ge += w', method='euler') conn_ei = Synapses(Pe,Pi,model="w:1",on_pre='ge += w', method='euler') conn_ie = Synapses(Pi,Pe,model="w:1",on_pre='gi += w', method='euler') conn_ii = Synapses(Pi,Pi,model="w:1",on_pre='gi += w', method='euler') ee_mat = mmread('ee.wmat') ei_mat = mmread('ei.wmat') ie_mat = mmread('ie.wmat') ii_mat = mmread('ii.wmat') conn_ee.connect(i=ee_mat.row, j=ee_mat.col) conn_ee.w=we conn_ei.connect(i=ei_mat.row, j=ei_mat.col) conn_ei.w=we conn_ie.connect(i=ie_mat.row, j=ie_mat.col) conn_ie.w=wi conn_ii.connect(i=ii_mat.row, j=ii_mat.col) conn_ii.w=wi # ########################################### # Simulation # ########################################### s_mon = SpikeMonitor(P) # Run for 0 second in order to measure compilation time run(simtime_in_s * second) totaltime = device._last_run_time print('Done in', totaltime) # ########################################### # Data analysis # ########################################### f = figure() plot(s_mon.t/ms, s_mon.i, '.') xlabel('Time (ms)') ylabel('Neuron index') f.savefig("brian2genn_raster_plot.png") return totaltime
def read_mtx_file(mm_file): print('Reading ' + str(mm_file) + '...') return mmread(mm_file).asfptype()
def populate(self): logger.info("Preprocessing dataset") was_extracted = False if len(self.filenames) > 0: file_path = os.path.join(self.save_path, self.filenames[0]) if not os.path.exists(file_path[:-7]): # nothing extracted yet if tarfile.is_tarfile(file_path): logger.info("Extracting tar file") tar = tarfile.open(file_path, "r:gz") tar.extractall(path=self.save_path) was_extracted = True tar.close() # get exact path of the extract, for robustness to changes is the 10X storage logic path_to_data, suffix = self.find_path_to_data() # get filenames, according to 10X storage logic measurements_filename = "genes.tsv" if suffix == "" else "features.tsv.gz" barcode_filename = "barcodes.tsv" + suffix matrix_filename = "matrix.mtx" + suffix expression_data = sp_io.mmread( os.path.join(path_to_data, matrix_filename)).T if self.dense: expression_data = expression_data.A else: expression_data = csr_matrix(expression_data) # group measurements by type (e.g gene, protein) # in case there are multiple measurements, e.g protein # they are indicated in the third column gene_expression_data = expression_data measurements_info = pd.read_csv(os.path.join(path_to_data, measurements_filename), sep="\t", header=None) Ys = None if measurements_info.shape[1] < 3: gene_names = measurements_info[ self.measurement_names_column].astype(np.str) else: gene_names = None for measurement_type in np.unique(measurements_info[2]): # .values required to work with sparse matrices measurement_mask = ( measurements_info[2] == measurement_type).values measurement_data = expression_data[:, measurement_mask] measurement_names = measurements_info[ self.measurement_names_column][measurement_mask].astype( np.str) if measurement_type == "Gene Expression": gene_expression_data = measurement_data gene_names = measurement_names else: Ys = [] if Ys is None else Ys if measurement_type == "Antibody Capture": measurement_type = "protein_expression" columns_attr_name = "protein_names" # protein counts do not have many zeros so always make dense if self.dense is not True: measurement_data = measurement_data.A else: measurement_type = measurement_type.lower().replace( " ", "_") columns_attr_name = measurement_type + "_names" measurement = CellMeasurement( name=measurement_type, data=measurement_data, columns_attr_name=columns_attr_name, columns=measurement_names, ) Ys.append(measurement) if gene_names is None: raise ValueError( "When loading measurements, no 'Gene Expression' category was found." ) batch_indices, cell_attributes_dict = None, None if os.path.exists(os.path.join(path_to_data, barcode_filename)): barcodes = pd.read_csv(os.path.join(path_to_data, barcode_filename), sep="\t", header=None) cell_attributes_dict = { "barcodes": np.squeeze(np.asarray(barcodes, dtype=str)) } # As of 07/01, 10X barcodes have format "%s-%d" where the digit is a batch index starting at 1 batch_indices = np.asarray([ barcode.split("-")[-1] for barcode in cell_attributes_dict["barcodes"] ]) batch_indices = batch_indices.astype(np.int64) - 1 logger.info("Finished preprocessing dataset") self.populate_from_data( X=gene_expression_data, batch_indices=batch_indices, gene_names=gene_names, cell_attributes_dict=cell_attributes_dict, Ys=Ys, ) self.filter_cells_by_count() # cleanup if required if was_extracted and self.remove_extracted_data: logger.info("Removing extracted data at {}".format(file_path[:-7])) shutil.rmtree(file_path[:-7])
def load(self, data): try: if isinstance(data, str) and ('.csv' in data or '.tsv' in data or '.txt' in data): logger.info('Reading data...') sep = self.which_sep(data) if self.to_transpose(sep, data): dat = pd.read_csv(data, sep=sep, header=0, index_col=0).T else: dat = pd.read_csv(data, sep=sep, header=0, index_col=0) elif isinstance(data, str): logger.info( 'Importing 10X data from directory. Directory must contain barcodes.tsv, features.tsv, matrix.mtx, tissue_positions_list.csv' ) # find the barcodes file from 10X directory file_barcodes = [ str(x) for x in Path(data).rglob("*barcodes.tsv*") ] if len(file_barcodes) == 0: logger.error( 'There is no barcode.tsv file in the 10X directory.') file_barcodes = file_barcodes[0] barcodes = np.asarray(pd.read_csv(file_barcodes, header=None)).flatten() # find the features file from 10X directory file_features = [ str(x) for x in Path(data).rglob("*features.tsv*") ] if len(file_features) == 0: logger.error( 'There is no features.tsv file in the 10X directory.') file_features = file_features[0] genes = np.asarray( pd.read_csv(file_features, sep='\t', header=None)) genes = genes[:, 1] # find the tissue_position_list file from 10X directory file_coords = [ str(x) for x in Path(data).rglob("*tissue_positions_list.csv*") ] if len(file_coords) == 0: logger.error( 'There is no tissue_positions_list.csv file in the 10X directory.' ) file_coords = file_coords[0] coords = np.asarray( pd.read_csv(file_coords, sep=',', header=None)) d = dict() for row in coords: d[row[0]] = str(row[2]) + 'x' + str(row[3]) inds = [] coords2 = [] for i, barcode in enumerate(barcodes): if barcode in d.keys(): inds.append(i) coords2.append(d[barcode]) # find the count matrix file file_matrix = [ str(x) for x in Path(data).rglob("*matrix.mtx*") ] if len(file_matrix) == 0: logger.error( 'There is no matrix.mtx file in the 10X directory.') file_matrix = file_matrix[0] matrix = mmread(file_matrix).toarray() logger.info(str(barcodes) + ' ' + str(barcodes.shape)) logger.info(str(genes) + ' ' + str(genes.shape)) logger.info(str(coords) + ' ' + str(coords.shape)) logger.info(str(matrix.shape)) matrix = matrix[:, inds] genes, inds2 = np.unique(genes, return_index=True) matrix = matrix[inds2, :] dat = pd.DataFrame(matrix, index=genes, columns=coords2) logger.info(str(dat)) else: dat = pd.DataFrame(data) except: raise Exception("Incorrect input format") logger.info('coords ' + str(len(dat.columns.values))) logger.info('genes ' + str(len(dat.index.values))) data = dat.values logger.info(str(data.shape)) self.rows = dat.index.values self.columns = dat.columns.values return (dat, data)
basis='631g', symmetry=True, ) mf = dft.RKS(mol) #mf.xc = 'blyp' # shorthand for b88,lyp mf.xc = 'pbe' # shorthand for pbe,pbe #mf.xc = 'lda,vwn_rpa' #mf.xc = 'pbe0' #mf.xc = 'b3lyp' # this where self-content diagonalization happens mf.kernel() # Orbital energies, Mulliken population etc. mf.analyze() # Get the converged density matrix (it generates the density matrix) dm = mf.make_rdm1() mmwrite('dft_density.mtx', sparse.coo_matrix(dm)) # Get the nuclear-nuclear repulsion energy e_nuc = mf.energy_nuc() # Get the 'core' hamiltonian, corresponding to kinetic energy and e-nuclear repulsion terms h1e = mf.get_hcore() ovlp = mmread("dft_overlap.mtx").toarray() h1e_eigs = linalg.eigvalsh(h1e, ovlp) print(h1e_eigs)