def aff_cluster(Sfn, conv_iter=15, max_iter=2000, damping=0.95, mpi=None, verbose=False, debug=False, *args, **kwargs): comm, NPROCS, rank = mpi NPROCS_LOCAL = int(os.environ['OMPI_COMM_WORLD_LOCAL_SIZE']) #Init storage for matrices #Get file name #Open matrix file in parallel mode SSf = h5py.File(Sfn, 'r+', driver='mpio', comm=comm) SSf.atomic = True #Open table with data for clusterization SS = SSf['cluster'] SSs = SS.id.get_space() params = { 'N': 0, 'l': 0, 'll': 0, 'TMfn': '', 'disk': False, 'preference': 0.0 } P = Bunch(params) ft = np.float32 if rank == 0: N, N1 = SS.shape if N != N1: raise ValueError("S must be a square array \ (shape=%s)" % repr((N, N1))) else: P.N = N try: preference = SS.attrs['preference'] except: raise ValueError('Unable to get preference from cluster matrix') if max_iter < 0: raise ValueError('max_iter must be > 0') if not 0 < conv_iter < max_iter: raise ValueError('conv_iter must lie in \ interval between 0 and max_iter') if damping < 0.5 or damping >= 1: raise ValueError('damping must lie in interval between 0.5 and 1') print '#' * 10, 'Main params', '#' * 10 print 'preference: %.3f' % preference print 'damping: %.3f' % damping print 'conv_iter: %d' % conv_iter print 'max_iter: %d' % max_iter print '#' * 31 P.TMbfn = str(uuid.uuid1()) P.TMfn = P.TMbfn + '.hdf5' # Magic 4 to fit MPI.Gather r = N % (NPROCS * 4) N -= r l = N // NPROCS if r > 0: print 'Truncating matrix to %sx%s to fit on %d procs' \ % (N, N, NPROCS) P.N = N # Fit to memory MEM = psutil.virtual_memory().available / NPROCS_LOCAL # MEM = 500 * 10 ** 6 ts = np.dtype(ft).itemsize * N # Python give bits ts *= 8 * 1.1 # Allocate memory for e, tE, and ... # MEM -= ts # ---- tl = int(MEM // ts) # Allocate memory for tS, tA, tR.... def adjust_cache(tl, l): while float(l) % float(tl) > 0: tl -= 1 return tl if tl < l: P.disk = True try: cache = 0 # cache = int(sys.argv[1]) # print sys.argv[1] assert cache < l except: cache = tl #print 'Wrong cache settings, set cache to %d' % tl tl = adjust_cache(tl, l) P.l = l P.ll = tl else: P.l = l P.ll = l if verbose: print "Available memory per process: %.2fG" % (MEM / 10.0**9) print "Memory per row: %.2fM" % (ts / 10.0**6) print "Estimated memory per process: %.2fG" \ % (ts * P.ll / 10.0 ** 9) print 'Cache size is %d of %d' % (P.ll, P.l) P = comm.bcast(P) N = P.N l = P.l ll = P.ll ms = h5s.create_simple((ll, N)) ms_l = h5s.create_simple((N, )) tb, te = task(N, NPROCS, rank) tS = np.ndarray((ll, N), dtype=ft) tSl = np.ndarray((N, ), dtype=ft) disk = P.disk if disk is True: TMLfd = tempfile.mkdtemp() TMLfn = osp(TMLfd, P.TMbfn + '_' + str(rank) + '.hdf5') TMLf = h5py.File(TMLfn, 'w') TMLf.atomic = True S = TMLf.create_dataset('S', (l, N), dtype=ft) Ss = S.id.get_space() #Copy input data and #place preference on diagonal z = -np.finfo(ft).max for i in range(tb, te, ll): SSs.select_hyperslab((i, 0), (ll, N)) SS.id.read(ms, SSs, tS) if disk is True: Ss.select_hyperslab((i - tb, 0), (ll, N)) S.id.write(ms, Ss, tS) if disk is True: R = TMLf.create_dataset('R', (l, N), dtype=ft) Rs = R.id.get_space() tRold = np.zeros((ll, N), dtype=ft) tR = np.zeros((ll, N), dtype=ft) tdR = np.zeros((l, ), dtype=ft) #Shared storage TMf = h5py.File(P.TMfn, 'w', driver='mpio', comm=comm) TMf.atomic = True Rp = TMf.create_dataset('Rp', (N, N), dtype=ft) Rps = Rp.id.get_space() tRp = np.ndarray((ll, N), dtype=ft) tRpa = np.ndarray((N, ll), dtype=ft) A = TMf.create_dataset('A', (N, N), dtype=ft) As = A.id.get_space() tAS = np.ndarray((ll, N), dtype=ft) tAold = np.ndarray((N, ll), dtype=ft) tA = np.ndarray((N, ll), dtype=ft) tdA = np.ndarray((l, ), dtype=ft) e = np.ndarray((N, conv_iter), dtype=np.int8) tE = np.ndarray((N, ), dtype=np.int8) ttE = np.ndarray((l, ), dtype=np.int8) converged = False cK = 0 K = 0 ind = np.arange(ll) for it in range(max_iter): if rank == 0: if verbose is True: print '=' * 10 + 'It %d' % (it) + '=' * 10 tit = time.time() # Compute responsibilities for i in range(tb, te, ll): if disk is True: il = i - tb Ss.select_hyperslab((il, 0), (ll, N)) S.id.read(ms, Ss, tS) #tS = S[i, :] Rs.select_hyperslab((il, 0), (ll, N)) R.id.read(ms, Rs, tRold) else: tRold = tR.copy() As.select_hyperslab((i, 0), (ll, N)) A.id.read(ms, As, tAS) #Tas = a[I, :] tAS += tS #tRold = R[i, :] tI = bn.nanargmax(tAS, axis=1) tY = tAS[ind, tI] tAS[ind, tI[ind]] = z tY2 = bn.nanmax(tAS, axis=1) tR = tS - tY[:, np.newaxis] tR[ind, tI[ind]] = tS[ind, tI[ind]] - tY2[ind] tR = (1 - damping) * tR + damping * tRold tRp = np.maximum(tR, 0) for il in range(ll): tRp[il, i + il] = tR[il, i + il] tdR[i - tb + il] = tR[il, i + il] if disk is True: R.id.write(ms, Rs, tR) #R[i, :] = tR Rps.select_hyperslab((i, 0), (ll, N)) Rp.id.write(ms, Rps, tRp) #Rp[i, :] = tRp if rank == 0: if verbose is True: teit1 = time.time() print 'R T %s' % (teit1 - tit) comm.Barrier() # Compute availabilities for j in range(tb, te, ll): As.select_hyperslab((0, j), (N, ll)) if disk is True: A.id.read(ms, As, tAold) else: tAold = tA.copy() Rps.select_hyperslab((0, j), (N, ll)) Rp.id.read(ms, Rps, tRpa) #tRp = Rp[:, j] tA = bn.nansum(tRpa, axis=0)[np.newaxis, :] - tRpa for jl in range(ll): tdA[j - tb + jl] = tA[j + jl, jl] tA = np.minimum(tA, 0) for jl in range(ll): tA[j + jl, jl] = tdA[j - tb + jl] tA *= (1 - damping) tA += damping * tAold for jl in range(ll): tdA[j - tb + jl] = tA[j + jl, jl] A.id.write(ms, As, tA) if rank == 0: if verbose is True: teit2 = time.time() print 'A T %s' % (teit2 - teit1) ttE = np.array(((tdA + tdR) > 0), dtype=np.int8) if NPROCS > 1: comm.Gather([ttE, MPI.INT], [tE, MPI.INT]) comm.Bcast([tE, MPI.INT]) else: tE = ttE e[:, it % conv_iter] = tE pK = K K = bn.nansum(tE) if rank == 0: if verbose is True: teit = time.time() cc = '' if K == pK: if cK == 0: cK += 1 elif cK > 1: cc = ' Conv %d of %d' % (cK, conv_iter) else: cK = 0 print 'Total K %d T %s%s' % (K, teit - tit, cc) if it >= conv_iter: if rank == 0: se = bn.nansum(e, axis=1) converged = (bn.nansum((se == conv_iter) + (se == 0)) == N) if (converged == np.bool_(True)) and (K > 0): if verbose is True: print("Converged after %d iterations." % (it)) converged = True else: converged = False converged = comm.bcast(converged, root=0) if converged is True: break if not converged and verbose and rank == 0: print("Failed to converge after %d iterations." % (max_iter)) if K > 0: I = np.nonzero(e[:, 0])[0] C = np.zeros((N, ), dtype=np.int) tC = np.zeros((l, ), dtype=np.int) for i in range(l): if disk is True: Ss.select_hyperslab((i, 0), (1, N)) S.id.read(ms_l, Ss, tSl) else: tSl = tS[i] tC[i] = bn.nanargmax(tSl[I]) comm.Gather([tC, MPI.INT], [C, MPI.INT]) if rank == 0: C[I] = np.arange(K) comm.Bcast([C, MPI.INT]) for k in range(K): ii = np.where(C == k)[0] tN = ii.shape[0] tI = np.zeros((tN, ), dtype=np.float32) ttI = np.zeros((tN, ), dtype=np.float32) tttI = np.zeros((tN, ), dtype=np.float32) ms_k = h5s.create_simple((tN, )) j = rank while j < tN: ind = [(ii[i], ii[j]) for i in range(tN)] SSs.select_elements(ind) SS.id.read(ms_k, SSs, tttI) ttI[j] = bn.nansum(tttI) j += NPROCS comm.Reduce([ttI, MPI.FLOAT], [tI, MPI.FLOAT]) if rank == 0: I[k] = ii[bn.nanargmax(tI)] I.sort() comm.Bcast([I, MPI.INT]) for i in range(l): if disk is True: Ss.select_hyperslab((i, 0), (1, N)) S.id.read(ms_l, Ss, tSl) else: tSl = tS[i] tC[i] = bn.nanargmax(tSl[I]) comm.Gather([tC, MPI.INT], [C, MPI.INT]) if rank == 0: C[I] = np.arange(K) else: if rank == 0: I = np.zeros(()) C = np.zeros(()) #Cleanup SSf.close() TMf.close() if disk is True: TMLf.close() shutil.rmtree(TMLfd) comm.Barrier() if rank == 0: os.remove(P.TMfn) if verbose: print 'APN: %d' % K if I.size and C.size: Sf = h5py.File(Sfn, 'r+', driver='sec2') if 'aff_labels' in Sf.keys(): del Sf['aff_labels'] LM = Sf.require_dataset('aff_labels', shape=C.shape, dtype=np.int) LM[:] = C[:] if 'aff_centers' in Sf.keys(): del Sf['aff_centers'] CM = Sf.require_dataset('aff_centers', shape=I.shape, dtype=np.int) CM[:] = I[:] Sf.close()
from os.path import join as osp import time # Installed packages. import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score import pandas as pd ######################################################################## # FILES # Define mnist files. These were decompressed with 'gzip -d <file>' MNIST_DIR = 'mnist' TRAIN_IMAGES_FILE = osp(MNIST_DIR, 'train-images-idx3-ubyte') TRAIN_LABELS_FILE = osp(MNIST_DIR, 'train-labels-idx1-ubyte') TEST_IMAGES_FILE = osp(MNIST_DIR, 't10k-images-idx3-ubyte') TEST_LABELS_FILE = osp(MNIST_DIR, 't10k-labels-idx1-ubyte') TRAIN_FILE_CHAMBERLAND = osp(MNIST_DIR, 'mnist_train.csv') TEST_FILE_CHAMBERLAND = osp(MNIST_DIR, 'mnist_test.csv') # Output directory. OUT_DIR = osp('..', '..', 'Challenges', '2Submissions', 'team1') # Define output files. LR_PRED_FILE = osp(OUT_DIR, '2challenge_logreg.csv') LR_COEFF_FILE = osp(OUT_DIR, '2challenge_logreg_vectors.csv') KNN_PRED_FILE = osp(OUT_DIR, '2challenge_knn.csv') ########################################################################
#! /usr/bin/env python import os from flask import Flask, redirect, render_template, request, session import yaml import uuid import zipfile import urllib #from urlparse import urlparse as urlparse from mendeley import Mendeley from mendeley.session import MendeleySession from syncrm import cli from argparse import Namespace from os.path import join as osp from os.path import dirname as dirname with open(osp(dirname(__file__), 'config.yml'), 'r') as f: config = yaml.load(f) REDIRECT_URI = 'http://*****:*****@app.route('/') def home(): if 'token' in session: return redirect('/listDocuments')
ms = h5s.create_simple((ll, N)) ms_l = h5s.create_simple((N,)) ms_e = h5s.create_simple((1,)) tb, te = task(rank, l) tS = np.ndarray((ll, N), dtype=ft) tSl = np.ndarray((N,), dtype=ft) tdS = np.ndarray((1,), dtype=ft) disk = P.disk if disk is True: TMLfd = tempfile.mkdtemp() TMLfn = osp(TMLfd, P.TMfn + '_' + str(rank) + '.hdf5') TMLf = h5py.File(TMLfn, 'w') S = TMLf.create_dataset('S', (l, N), dtype=ft) Ss = S.id.get_space() #Copy input data and #place preference on diagonal preference = P.preference random_state = np.random.RandomState(0) x = np.finfo(ft).eps y = np.finfo(ft).tiny * 100 z = - np.finfo(ft).max for i in xrange(tb, te, ll): SSs.select_hyperslab((i, 0), (ll, N))