示例#1
0
  def build_sparse_crank_nicolson(s):
    """(internal) Set up the sparse matrices for the Crank-Nicolson method. """

    A = sparse.lil_matrix((s.n, s.n))
    C = sparse.lil_matrix((s.n, s.n))

    for j in xrange(0, s.n):
      xd = j+1+s.xs
      ssxx = (s.sigma * xd) ** 2

      A[j,j] = 1.0 - 0.5*s.dt*(ssxx + s.r)
      C[j,j] = 1.0 + 0.5*s.dt*(ssxx + s.r)
      
      if j > 0:
        A[j,j-1] = 0.25*s.dt*(+ssxx - s.r*xd)
        C[j,j-1] = 0.25*s.dt*(-ssxx + s.r*xd)
      if j < s.n-1:
        A[j,j+1] = 0.25*s.dt*(+ssxx + s.r*xd)
        C[j,j+1] = 0.25*s.dt*(-ssxx - s.r*xd)

    s.A = A.tocsr()
    s.C = linsolve.splu(C)              # perform sparse LU decomposition

    # Buffer to store right-hand side of the linear system Cu = v
    s.v = empty((n, ))
示例#2
0
文件: base.py 项目: aweinstein/nipype
 def _submit_mapnode(self, jobid):
     if jobid in self.mapnodes:
         return True
     self.mapnodes.append(jobid)
     mapnodesubids = self.procs[jobid].get_subnodes()
     numnodes = len(mapnodesubids)
     logger.info('Adding %d jobs for mapnode %s' % (numnodes,
                                                    self.procs[jobid]._id))
     for i in range(numnodes):
         self.mapnodesubids[self.depidx.shape[0] + i] = jobid
     self.procs.extend(mapnodesubids)
     self.depidx = ssp.vstack((self.depidx,
                               ssp.lil_matrix(np.zeros(
                                   (numnodes, self.depidx.shape[1])))),
                              'lil')
     self.depidx = ssp.hstack((self.depidx,
                               ssp.lil_matrix(
                                   np.zeros((self.depidx.shape[0],
                                             numnodes)))),
                              'lil')
     self.depidx[-numnodes:, jobid] = 1
     self.proc_done = np.concatenate((self.proc_done,
                                      np.zeros(numnodes, dtype=bool)))
     self.proc_pending = np.concatenate((self.proc_pending,
                                         np.zeros(numnodes, dtype=bool)))
     return False
示例#3
0
    def compute_cond(self, X, y):
        self.knn = NearestNeighbors(self.k).fit(X)
        c = sparse.lil_matrix((self.num_labels, self.k + 1), dtype="i8")
        cn = sparse.lil_matrix((self.num_labels, self.k + 1), dtype="i8")

        label_info = get_matrix_in_format(y, "dok")

        neighbors = self.knn.kneighbors(X, self.k, return_distance=False)
        for instance in xrange(self.num_instances):
            deltas = label_info[neighbors[instance], :].sum(axis=0)
            for label in xrange(self.num_labels):
                if label_info[instance, label] == 1:
                    c[label, deltas[0, label]] += 1
                else:
                    cn[label, deltas[0, label]] += 1

        c_sum = c.sum(axis=1)
        cn_sum = cn.sum(axis=1)

        cond_prob_true = sparse.lil_matrix((self.num_labels, self.k + 1), dtype="float")
        cond_prob_false = sparse.lil_matrix((self.num_labels, self.k + 1), dtype="float")
        for label in xrange(self.num_labels):
            for neighbor in xrange(self.k + 1):
                cond_prob_true[label, neighbor] = (self.s + c[label, neighbor]) / (
                    self.s * (self.k + 1) + c_sum[label, 0]
                )
                cond_prob_false[label, neighbor] = (self.s + cn[label, neighbor]) / (
                    self.s * (self.k + 1) + cn_sum[label, 0]
                )
        return cond_prob_true, cond_prob_false
    def __init__(self, name):
        ManyBodyHam.__init__(self)
        self.mbhd = np.zeros((self.mbDim))
        self.mbhc = sparse.lil_matrix((self.mbDim, self.mbDim), dtype=complex)
        self.mbhr = sparse.lil_matrix((self.mbDim, self.mbDim), dtype=float)

        self.name = name
示例#5
0
	def criarMatrizes(self, listaCompleta):
		# matriz1 = numpy.zeros((self.grupo.numeroDeMembros(), self.grupo.numeroDeMembros()), dtype=numpy.int32)
		# matriz2 = numpy.zeros((self.grupo.numeroDeMembros(), self.grupo.numeroDeMembros()), dtype=numpy.float32)
		matriz1 = sparse.lil_matrix((self.grupo.numeroDeMembros(), self.grupo.numeroDeMembros()))
		matriz2 = sparse.lil_matrix((self.grupo.numeroDeMembros(), self.grupo.numeroDeMembros()))

		keys = listaCompleta.keys()
		keys.sort(reverse=True)
		for k in keys:
			for pub in listaCompleta[k]:

				numeroDeCoAutores = len(pub.idMembro)
				if numeroDeCoAutores>1:
					# Para todos os co-autores da publicacao:
					# (1) atualizamos o contador de colaboracao (adjacencia)
					# (2) incrementamos a 'frequencia' de colaboracao
					combinacoes = self.calcularCombinacoes(pub.idMembro)
					for c in combinacoes:
						matriz1[c[0] , c[1]] += 1
						matriz1[c[1] , c[0]] += 1

						matriz2[c[0] , c[1]] += 1.0/(numeroDeCoAutores-1)
						matriz2[c[1] , c[0]] += 1.0/(numeroDeCoAutores-1)

		return [matriz1, matriz2]
示例#6
0
    def compute_Belief_Prop(H):
        ''' generate the matricies for P, S_ and q from H '''

        global B, P, S_, q, m, n

        m,n = np.shape(H)
        q = np.count_nonzero(H)
        P = lil_matrix(q,q, np.transpose(np.sum(H,2)) * np.sum(H,2))
        S = lil_matrix(q,q, (np.sum(H,1)-1) * np.transpose(np.sum(H,1)))

        k = 0
        for j in range(1,n):
            I = nonzero(H[:,j])
            for x in range(1,length(I)):
                for y in range(x+1,length(I)):
                    P[k+x,k+y] = 1
                    P[k+y,k+x] = 1
            k += length(I)

        k = 0
        for i in range(1,m):
            J = nonzero(H[i,:])
            for x in range(1,length(J)):
                for y in range(x+1,length(J)):
                    S_[k+x,k+y] = 1
                    S_[k+y,k+x] = 1

            k += length(J)

        B = lil_matrix(q,n,q)
        b = []
        for k in range(1,m):
            b = [nonzero(H[k,:])]
        B = lil_matrix(np.transpose([1,q]),np.transpose(b),np.ones(q,1),q,n)
示例#7
0
    def QPModel(self, addW=False):
        A = self.A
        c = self.c
        s = CyClpSimplex()
        
        x = s.addVariable('x', self.nCols)
        if addW:
            w = s.addVariable('w', self.nCols)
        
        s += A * x >= 1
        n = self.nCols
        
        if not addW:
            s += 0 <= x <= 1
        else:
            s += x + w == 1
            s += 0 <= w <= 1

##        s += -1 <= x <= 1

        s.objective = c * x
       
        if addW:
            G = sparse.lil_matrix((2*n, 2*n))
            for i in xrange(n/2, n): #xrange(n-1):
                G[i, i] = 1
            G[2*n-1, 2*n-1] = 10**-10
        else:
            G = sparse.lil_matrix((n, n))
            for i in xrange(n/2, n): #xrange(n-1):
                G[i, i] = 1

    
        s.Hessian = G
        return s
示例#8
0
def convert_graph_connectivity_to_sparse(G, nodes):
	"""
		Given a networkx graph, return sparse adjacency matrix S and H
		S and H are different in that S's entires contain edge weights
		(if there are multiple edges, behavior is overwrite),
		and H just has a 1 for every non-zero entry.

		The edge data right now is ((strand1, start1, end1),(strand2, start2, end2), score)
	"""
	n = len(nodes)
	S = sparse.lil_matrix((n,n))
	H = sparse.lil_matrix((n,n))
	nodes_to_index = dict(zip(nodes,range(n)))
	for e in G.edges_iter(data=True):
		i = nodes_to_index[e[0]]
		j = nodes_to_index[e[1]]
		try:
			w = e[2][2]
		except:
			w = e[2]
		S[i,j] = w
		S[j,i] = w
		H[i,j] = 1
		H[j,i] = 1
	# we do a lot of column-slicing, so convert to CSC for efficiency	
	S = S.tocsc()	
	H = H.tocsr()

	return S,H
    def get_corr_pred( self, sctx, u, tn, tn1 ):
        ndofs = self.domain.n_dofs
        #self.K.data[::] = 0.0
        self.K = sparse.lil_matrix((ndofs, ndofs), float_ )
        self.F_int[:]    = 0.0
        e_arr_size      = self.e_arr_size

        for elem in sctx.sdomain.elements:
            e_id = elem.id_number
            ix = elem.get_dof_map()
            sctx.elem = elem
            sctx.elem_state_array = sctx.state_array[ e_id*e_arr_size : (e_id+1)*e_arr_size ]
            sctx.X = elem.get_X_mtx()
            f, k = self.fets_eval.get_corr_pred( sctx, u[ix_(ix)], tn, tn1 )
            #self.K_temp.data[:][:] = 0.
            self.K_temp = sparse.lil_matrix((ndofs, ndofs), float_ )
            a = 0
            for i in ix:
                self.K_temp.rows[i] = ix
                self.K_temp.data[i][:] = k[a][:]
                a =+1
                #print K_temp
            self.K = self.K + self.K_temp
            self.F_int[ ix_(ix) ] += f


        return self.F_int, self.K
def partition_train_data(
    counts,
    nonzero,
    percent=0.8,
    num_users=NUM_USER,
    num_items=NUM_SONG
):
    print "Start to partition data...\n"
    t0 = time.time()
    num_train = int(np.floor(nonzero * percent))
    num_validate = int(nonzero - num_train)

    shuffle_index = range(nonzero)
    np.random.shuffle(shuffle_index)

    validate_index = shuffle_index[:num_validate]
    shuffle_index[:num_validate].sort()

    validate_counts = sparse.lil_matrix((num_users, num_items), dtype=np.int32)
    idx, curr = 0, 0
    counts = sparse.lil_matrix(counts)
    counts_coo = counts.tocoo()
    for row, col, count in itertools.izip(counts_coo.row,
                                          counts_coo.col,
                                          counts_coo.data):
        if idx < num_validate and validate_index[idx] == curr:
            validate_counts[row, col] = count
            counts[row, col] = 0
            idx += 1
        curr += 1
    t1 = time.time()
    print 'Finished partitioning data in %f seconds\n' % (t1 - t0)
    return counts.tocsr(), validate_counts.tocoo()
示例#11
0
    def transformation_matrices(self):
        """Returns the sparse transformation matrix to turn quantities
        defined on faces to loop-star basis

        For vector quantities, assumes that the face-based quantity has been
        packed to a 2D array of size (n_basis*3, n_basis*3)

        For scalar quantities, assumes that the face-based quantity has been
        packed to a 2D array of size (n_basis, n_basis)
        """

        num_basis = len(self)
        num_tri = len(self.mesh.polygons)
        # scalar_transform = np.zeros((num_basis, num_tri), np.float64)
        # vector_transform=np.zeros((num_basis, 3*num_tri), np.float64)
        scalar_transform = lil_matrix((num_basis, num_tri))
        vector_transform = lil_matrix((num_basis, 3*num_tri))

        for basis_count, (tri_p, tri_m, node_p, node_m) in enumerate(self):
            scalar_transform[basis_count, tri_p] = 1.0
            scalar_transform[basis_count, tri_m] = -1.0

            vector_transform[basis_count, tri_p*3+node_p] = 1.0
            vector_transform[basis_count, tri_m*3+node_m] = -1.0

        return vector_transform.tocsr(), scalar_transform.tocsr()
示例#12
0
文件: ROI.py 项目: losonczylab/sima
def _reformat_mask(mask):
    """Convert mask to a list of sparse matrices (scipy.sparse.lil_matrix)

    Accepts a 2 or 3D array, a list of 2D arrays, or a sequence of sparse
    matrices.

    Parameters
    ----------
    mask : a 2 or 3 dimensional numpy array, a list of 2D numpy arrays, or a
        sequence of sparse matrices.  Masks are assumed to follow a (z, y, x)
        convention.  If mask is a list of 2D arrays or of sparse matrices, each
        element is assumed to correspond to the mask for a single plane (and is
        assumed to follow a (y, x) convention)
    """
    if isinstance(mask, np.ndarray):
        # user passed in a 2D or 3D np.array
        if mask.ndim == 2:
            mask = [lil_matrix(mask, dtype=mask.dtype)]
        elif mask.ndim == 3:
            new_mask = []
            for s in range(mask.shape[0]):
                new_mask.append(lil_matrix(mask[s, :, :], dtype=mask.dtype))
            mask = new_mask
        else:
            raise ValueError('numpy ndarray must be either 2 or 3 dimensions')
    elif issparse(mask):
        # user passed in a single lil_matrix
        mask = [lil_matrix(mask)]
    else:
        new_mask = []
        for plane in mask:
            new_mask.append(lil_matrix(plane, dtype=plane.dtype))
        mask = new_mask
    return mask
    def __init__(self):
        self.review_data = pd.read_csv(review_fileName)
        print 'Finished loading data...'

        # Mapping all business_ids
        self.business_ids = list()
        for business in self.review_data['business_id']:
            self.business_ids.append(business)
        unique_business_ids = list(set(self.business_ids))
        self.n_businesses = len(unique_business_ids)

        self.business_dict = dict()
        for index, b_id in enumerate(unique_business_ids):
            self.business_dict[index] = b_id
            self.business_dict[b_id] = index

        # Mapping all user_ids
        self.user_ids = list()
        for user in self.review_data['user_id']:
            self.user_ids.append(user)
        unique_user_ids = list(set(self.user_ids))
        self.n_users = len(unique_user_ids)

        self.user_dict = dict()
        for index, u_id in enumerate(unique_user_ids):
            self.user_dict[index] = u_id
            self.user_dict[u_id] = index

        self.reviews = lil_matrix((self.n_businesses, n_words))
        self.ratings = lil_matrix((self.n_users, self.n_businesses))
def build_CF_matrix(CF_categories, dammage_factors, H, EF_list_for_CF_global, EF_list, 
                           CF_matrices, EF_list_for_CF_per_category, impact_method):
    from scipy.sparse import lil_matrix, find
    from copy import deepcopy
    
    #building a transient matrix (the columns correspond to the system set up by the impact method, NOT the one of ecoinvent)
    transient_CF = lil_matrix((len(CF_categories[impact_method]), len(EF_list_for_CF_global)))
    for [matrix_line, matrix_column, CF] in H:
        transient_CF[matrix_line, matrix_column] = CF
    
    #building the matrix
    CF_matrices[impact_method] = lil_matrix((len(CF_categories[impact_method]), len(EF_list)))
    for category in CF_categories[impact_method]:
        matrix_line = CF_categories[impact_method].index(category)
        for EF in EF_list:
            column_number_EF = EF_list.index(EF)
            if EF_list_for_CF_per_category[category].count(EF): #if the exact EF is found in the list of EF with a CF in this specefic category
                column_number_CF = EF_list_for_CF_global.index(EF) #find the number in the global list
            else:
                EF_transient = deepcopy(EF)
                EF_transient[2] = '(unspecified)'
                #a EF without exact match will recieve the (unspecified) CF if compartment and EF ID match
                if EF_list_for_CF_per_category[category].count(EF_transient):
                    column_number_CF = EF_list_for_CF_global.index(EF_transient)
                else: #otherwise, no match, the CF is left to zero
                    column_number_CF = 'NA'
            if column_number_CF != 'NA':
                CF_matrices[impact_method][matrix_line, column_number_EF] = transient_CF[matrix_line, column_number_CF]
    del transient_CF
    
    del H, EF_list_for_CF_global, EF_list_for_CF_per_category, dammage_factors

    return CF_matrices
示例#15
0
def getColumnSum(subTermDoc, avg=False):

    """
    Recieves a sub term document matrix and optional flag for getting
    average instead of sum.
    """

    sumVector = sparse.lil_matrix((2,subTermDoc.shape[1]))
    sumVector = sumVector.todense()

    if avg:
        counter = 0

    for i in range(1, subTermDoc.shape[0]):
        row = subTermDoc.getrow(i)
        row = row.todense()[0,1:]

        sumVector[1,1:] += row

        if avg:
            counter+=1

    if avg:
        sumVector[1,1:]/=counter
        
    return sparse.lil_matrix(sumVector)
示例#16
0
文件: LSPI.py 项目: okkhoy/rlpy
    def __init__(
            self, policy, representation, discount_factor, max_window, steps_between_LSPI,
            lspi_iterations=5, tol_epsilon=1e-3, re_iterations=100, use_sparse=False):

        self.steps_between_LSPI = steps_between_LSPI
        self.tol_epsilon = tol_epsilon
        self.lspi_iterations = lspi_iterations
        self.re_iterations = re_iterations
        self.use_sparse = use_sparse

        # Make A and r incrementally if the representation can not expand
        self.fixedRep = not representation.isDynamic
        if self.fixedRep:
            f_size = representation.features_num * representation.actions_num
            self.b = np.zeros((f_size, 1))
            self.A = np.zeros((f_size, f_size))

            # Cache calculated phi vectors
            if self.use_sparse:
                self.all_phi_s = sp.lil_matrix(
                    (max_window, representation.features_num))
                self.all_phi_ns = sp.lil_matrix(
                    (max_window, representation.features_num))
                self.all_phi_s_a = sp.lil_matrix((max_window, f_size))
                self.all_phi_ns_na = sp.lil_matrix((max_window, f_size))
            else:
                self.all_phi_s = np.zeros(
                    (max_window, representation.features_num))
                self.all_phi_ns = np.zeros(
                    (max_window, representation.features_num))
                self.all_phi_s_a = np.zeros((max_window, f_size))
                self.all_phi_ns_na = np.zeros((max_window, f_size))

        super(LSPI, self).__init__(policy, representation, discount_factor, max_window)
示例#17
0
文件: gp.py 项目: dfm/dfm-ml
    def k(self,x1,x2,chi2max=25.0):
        """
        The default kernel function

        Parameters
        ----------
        x1,x2 : numpy.ndarray
            Vectors of positions.

        chi2max : float, optional
            Set clipping for sparseness.

        Returns
        -------
        k : numpy.ndarray
            Covariance matrix between x1 and x2.

        Note
        ----
        This works well for small matrices but it is poorly implemented for larger
        matrices --- especially if they are actually sparse!

        """
        d = (x1-x2)**2/self._l2
        k = sp.lil_matrix(d.shape)
        k = self._a*np.exp(-0.5*d)
        k[d > chi2max] = 0.0
        return sp.lil_matrix(k).tocsc()
示例#18
0
    def __init__(self, plotid, xmin, xmax, ymin, ymax, step, localRadius, overviewStep, xlabel, ylabel):

        # Initialize local map
        self.localRadius = localRadius / float(step)
        self.step = step
        self.xrange = np.linspace(xmin, xmax, (xmax-xmin)/float(step)+1)
        self.yrange = np.linspace(ymin, ymax, (ymax-ymin)/float(step)+1)
        self.Nx = self.xrange.shape[0]
        self.Ny = self.yrange.shape[0]
        self.localXmin = self.xrange[self.Nx/2-self.localRadius]
        self.localXmax = self.xrange[self.Nx/2+self.localRadius]
        self.localYmin = self.yrange[self.Ny/2-self.localRadius]
        self.localYmax = self.yrange[self.Ny/2+self.localRadius]
        self.sparseSum  = lil_matrix((self.Ny, self.Nx), dtype=np.float32)
        self.sparseNorm = lil_matrix((self.Ny, self.Nx), dtype=np.float32)
        self.localMap   = np.zeros((2*self.localRadius+1, 2*self.localRadius+1))

        # Initialize overview map
        self.overviewXrange = np.linspace(xmin, xmax, (xmax-xmin)/float(overviewStep))
        self.overviewYrange = np.linspace(ymin, ymax, (ymax-ymin)/float(overviewStep))
        overviewNx = self.overviewXrange.shape[0]
        overviewNy = self.overviewYrange.shape[0]
        self.overviewMap = np.zeros((overviewNy, overviewNx))

        # Initialize plots
        self.counter    = 0
        ipc.broadcast.init_data(plotid+' -> Overview', data_type='image', history_length=1, flipy=True, \
                                xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xlabel=xlabel, ylabel=ylabel)
        ipc.broadcast.init_data(plotid+' -> Local',    data_type='image', history_length=1, flipy=True, \
                                xmin=self.localXmin, xmax=self.localXmax, \
                                ymin=self.localYmin, ymax=self.localYmax, xlabel=xlabel, ylabel=ylabel)
示例#19
0
def learnProjection(dataset):
    """
    Learn the projection matrix and store it to a file. 
    """
    h = 50 # no. of latent dimensions.
    print "Loading the bipartite matrix...",
    coocData = sio.loadmat("../work/%s/DSxDI.mat" % (dataset))
    M = sp.lil_matrix(coocData['DSxDI'])
    (nDS, nDI) = M.shape
    print "Done."
    print "Computing the Laplacian...",
    D1 = sp.lil_matrix((nDS, nDS), dtype=np.float64)
    D2 = sp.lil_matrix((nDI, nDI), dtype=np.float64)
    for i in range(0, nDS):
        D1[i,i] = 1.0 / np.sqrt(np.sum(M[i,:].data[0]))
    for i in range(0, nDI):
        D2[i,i] = 1.0 / np.sqrt(np.sum(M[:,i].T.data[0]))
    B = (D1.tocsr().dot(M.tocsr())).dot(D2.tocsr())
    print "Done."
    # Perform SVD on B
    print "Perform SVD on the weight matrix...",
    startTime = time.time()
    # ut, s, vt = sparsesvd(B.tocsc(), h)
    B = sp.csc_matrix(B, dtype=float)
    ut, s, vt = sp.linalg.svds(B, h)
    # print ut.shape
    endTime = time.time()
    print "%ss" % str(round(endTime-startTime, 2)),
    # sio.savemat("../work/%s/proj_sfa.mat" % (dataset), {'proj':ut.T}) 
    sio.savemat("../work/%s/proj_sfa.mat" % (dataset), {'proj':ut})
    print "Done."    
    pass
示例#20
0
def get_rsas_rehosps_7x(rehosps_dict, rsas_file_path=rsa_clean_file_path_2013, rsa_format=formats.rsa_2013_format, cll=column_label_list):
    '''
    This method parses the lines of the file rsas_file_path and takes only those whose line_number (starting from 1) are included in rehosps_dict, i. e.
    the RSAs with rehosp.
    It returns two arrays:
    X : the features according to colum_label_list
    Y : responsewith 1 = rehosp delay 1 or multiple of 7 (days), 0 otherwise
    '''
    line_number = 1
    i = 0
    rows_count = len(rehosps_dict)
    cols_count = len(cll)
    sparse_X = sparse.lil_matrix((rows_count, cols_count))
    sparse_y = sparse.lil_matrix((rows_count, 1))

    with open(rsas_file_path) as rsa_file:
        while True:
            rsa_line = rsa_file.readline().strip()
            if (line_number in rehosps_dict):
                rsa_data_dict = get_rsa_data(rsa_line, rsa_format)
                rsa_to_X(rsa_data_dict, sparse_X, i)
                if rehosps_dict[line_number]:
                    sparse_y[i] = 1
                i += 1
            line_number += 1
            if line_number % 10000 == 0:
                print '\rLines processed ', line_number, ', % processed ', (i*100/rows_count),
            if (not rsa_line):
                break

    return sparse_X, sparse_y
def one_hot_encode(train_discrete_features, test_discrete_features):
    """ Perform one hot encoding to both train and test set.
        Use this when having memory limitation, otherwise to use
        scikit-learn's OneHotEncoder.
    parameters:
    --------------------------------------------------------
    train_discrete_features: discrete features of training data
    test_discrete_features: discrete features of test data
    """
    m, n = train_discrete_features.shape
    train_encoded_features = lil_matrix((LENGTH_OF_TRAIN, MAX_OF_DIM))
    test_encoded_features = lil_matrix((LENGTH_OF_TEST, MAX_OF_DIM))
    cnt = 0

    for i in range(n):
        print "processing " + str(i) + "th feature..."
        train_column = train_discrete_features[:, i]
        test_column = test_discrete_features[:, i]

        # one hot encode the value in train and test
        encoder = OneHotEncoder(handle_unknown="ignore")
        train_encoded_column = lil_matrix(encoder.fit_transform(np.mat(train_column).T))
        test_encoded_column = lil_matrix(encoder.transform(np.mat(test_column).T))

        # get number of features
        _, num = train_encoded_column.shape

        # put the column into matrix
        for j in range(num):
            train_encoded_features[:,cnt+j] = train_encoded_column[:,j]
            test_encoded_features[:,cnt+j] = test_encoded_column[:,j]
            
        cnt += num

    return csr_matrix(train_encoded_features[:, 0:cnt]), csr_matrix(test_encoded_features[:, 0:cnt])
    def advection_matrix(self):
        """ Construct the advection matrix operator """
        M = sprs.lil_matrix((self.N, self.N))
        dz = self.dz
        v = self.aacc

        # Upwinding
        """ Upwind formula from the wiki """
        A = sprs.lil_matrix((self.N, self.N)) 
        if v < 0:
            A.setdiag(np.ones(self.N) * -1.0)
            A.setdiag(np.ones(self.N), k=1)
        elif v == 0:
            A = A * 0
        elif v > 0:
            A.setdiag(np.ones(self.N) * -1.0, k=-1)
            A.setdiag(np.ones(self.N))

        A[0,:] = np.zeros(self.N)
        A[-1,:] = np.zeros(self.N)
        A = A / (2 * dz)
        
        # Set up the final row so that it can handle the base of the system. 
        B = sprs.lil_matrix((self.N, self.N))
        B.setdiag(np.ones(self.N) *  3.0, k= 0)
        B.setdiag(np.ones(self.N) * -4.0, k=-1)
        B.setdiag(np.ones(self.N)       , k=-2)
        B = B / (2 * dz)
        A[-1,:] = B[-1, :]

        return A 
示例#23
0
def fromList(shape, coords, weights=None):
    if weights is not None:
        assert len(coords) == len(weights)

    if len(shape) == 1:
        NX = NY = shape[0]
    else:
        NX = shape[0]
        NY = shape[1]

    if weights is None:
        X = [a for (a, b) in coords]
        Y = [b for (a, b) in coords]
        G = sps.lil_matrix((NX * NY, 1), dtype=np.float64)
        lin_I = common.sub2ind((NX, NY), X, Y)
        G[lin_I, 0] = 1
        G = G.reshape((NX, NY))
    else:
        G = sps.lil_matrix((NX, NY), dtype=np.float64)

        for (i, w) in enumerate(weights):
            c = coords[i]
            x = c[0]
            y = c[1]
            G[x, y] = w
    return G
def stack_D(Xw,u_hat,W,N,T,R):
	Dstack = ssp.lil_matrix((N*R*T,W*R*T))

	i = 0;
	NW = N * W
	# logger.debug("Calling _stack_D")
	for r in range(R):
		# I expect this to be (N * W) * U
		Xwr = Xw[r*NW:(r+1)*NW,:]
		for t in range(T):
			# I expect this to be (N * W) * 1
			# Xwrt = Xwr.dot(u_hat[r,t,:])
			Xwrt = ssp.csr_matrix(Xwr.dot(u_hat[r,t,:]))
			for n in range(N):
				DSn = (i * N + n)
				DSw = (i * W )
				# Dstack.rows[DSn] = range(DSw,DSw + W)
				# Dstack.data[DSn] = Xwrt[n*W:n*W + W]
				sub = ssp.lil_matrix(Xwrt[:,n*W:n*W + W])
				Dstack.rows[DSn] = [x + DSw for x in sub.rows[0]]
				Dstack.data[DSn] = sub.data[0]
			i+=1 
	# logger.debug("Done")
	return Dstack
	pass
示例#25
0
文件: counter1.py 项目: vslovik/NLP
 def initialize(self):
     words = {}
     for en_line, fr_line in self.iterator():
         en_words = en_line.split(" ")
         fr_words = fr_line.split(" ")
         if "NULL" in words:
             words["NULL"] = words["NULL"].union(set(fr_words))
         else:
             words["NULL"] = set(fr_words)
         for en_word in en_words:
             if en_word in words:
                 words[en_word] = words[en_word].union(set(fr_words))
             else:
                 words[en_word] = set(fr_words)
     self.tmatrix = lil_matrix((len(words.keys()), len(words["NULL"])))
     self.cef = lil_matrix((len(words.keys()), len(words["NULL"])))
     i = 0
     for word in words["NULL"]:
         self.fr_dict[word] = i
         i += 1
     i = 0
     for en_word, value in words.iteritems():
         self.en_dict[en_word] = i
         if len(value):
             for fr_word in value:
                 self.tmatrix[i, self.fr_dict[fr_word]] = pow(len(value), -1) 
         i += 1
     del words
    def __init__(self):

        # Files
        # Inputs
        self.train_file = "training.txt"
        self.test_file = "testing.txt"
        self.label_training_file = "label_training.txt"

        # Output
        self.nb_classifier_output = "nb_classifier_output"
        self.svm_classifier_output = "svm_classifier_output"

        # Estimator value
        self.estimatorSize = 500

        # Sparse Matrix

        # Training data
        self.sparse_matrix = lil_matrix((1842, 26364), dtype=float)
        self.training_labels_list = []

        # Test data
        self.test_data = lil_matrix((952, 26364), dtype=float)

        # Results
        self.predicted_labels = None
 def init_est(self):
   for m in xrange(len(self.trn_data)):
     self.z.append([])
     N = len(self.trn_data[m])
     for n in xrange(N):
       self.z[m].append(0)
   self.nksum = [0 for i in xrange(self.topics)]
   print "init nwk"
   self.nwk = sparse.lil_matrix((self.word_dict_sz, self.topics))
   print self.rank,self.nwk.shape
   self.phi = sparse.lil_matrix((self.topics, self.word_dict_sz))
   self.nwkp = sparse.lil_matrix((self.word_dict_sz, self.topics))
   for i in xrange(self.topics):
     self.p.append(0)
   M = len(self.z)
   self.ndsum = [0 for i in xrange(M)]
   print "init ndk"
   self.ndk = sparse.lil_matrix((M, self.topics))
   self.theta = sparse.lil_matrix((M, self.topics))
   for m in xrange(M):
     N = len(self.z[m])
     for n in xrange(N):
       w = self.trn_data[m][n]
       self.z[m][n] = int(random.random()*self.topics)
       w_topic = self.z[m][n]
       self.nwkp[w, w_topic] += 1
       self.ndk[m, w_topic] += 1
       '''self.nksum[w_topic] += 1'''
     self.ndsum[m] += N
   print "init estimate complete!"
示例#28
0
def rand_matrices(A, t, nonzero_ids):
    '''Create random matrices A, B for the meetFriend_matrix model.

    The matrices are created as follows. A_t has a diagonal of (t-1)/t and B_t
    is the zero matrix. We make a weighted random choice for each node
    depending on the values of its row on A matrix. Depending on the outcome
    of this choice, either the B[i, i] = 1/t or A[i, r] = 1/t, where r is the
    random choice.

    Args:
        A (NxN numpy array): Weights matrix (its diagonal is the stubborness)

        t (int): Round number

    Returns:
        Two NxN matrices, A_t and B_t

    '''

    N = A.shape[0]
    A_t = sparse.lil_matrix((N, N))
    A_t.setdiag(np.ones(N) * (t-1)/t)
    B_t = sparse.lil_matrix((N, N))
    for i in xrange(N):
        r = rchoice(A[i, :], nonzero_ids[i])
        if r == i:
            B_t[i, i] = 1/t
        else:
            A_t[i, r] = 1/t

    return A_t.tocsr(), B_t.tocsr()
    def setUp(self):

        self.nbr_elements = 1000
        self.size = 10000

        self.A_c = LLSparseMatrix(size=self.size, size_hint=self.nbr_elements, itype=INT32_T, dtype=FLOAT64_T)
        self.A_s = lil_matrix((self.size, self.size), dtype=np.float64)

        self.list_of_matrices = []
        self.list_of_matrices.append(self.A_c)
        self.list_of_matrices.append(self.A_s)

        construct_random_matrices(self.list_of_matrices, self.size, self.nbr_elements)

        self.CSR_c = self.A_c.to_csr()
        self.CSR_s = self.A_s.tocsr()

        self.B_c = LLSparseMatrix(size=self.size, size_hint=self.nbr_elements, itype=INT32_T, dtype=FLOAT64_T)
        self.B_s = lil_matrix((self.size, self.size), dtype=np.float64)

        self.list_of_matrices = []
        self.list_of_matrices.append(self.B_c)
        self.list_of_matrices.append(self.B_s)

        construct_random_matrices(self.list_of_matrices, self.size, self.nbr_elements)

        self.CSC_c = self.B_c.to_csc()
        self.CSC_s = self.B_s.tocsc()


        self.v = np.arange(0, self.size, dtype=np.float64)
示例#30
0
def _pade(A, m):
    n = np.shape(A)[0]
    c = _padecoeff(m)
    if m != 13:
        apows = [[] for jj in range(int(np.ceil((m + 1) / 2)))]
        apows[0] = sp.eye(n, n, format='csc')
        apows[1] = A * A
        for jj in range(2, int(np.ceil((m + 1) / 2))):
            apows[jj] = apows[jj - 1] * apows[1]
        U = sp.lil_matrix((n, n)).tocsc()
        V = sp.lil_matrix((n, n)).tocsc()
        for jj in range(m, 0, -2):
            U = U + c[jj] * apows[jj // 2]
        U = A * U
        for jj in range(m - 1, -1, -2):
            V = V + c[jj] * apows[(jj + 1) // 2]
        F = spla.spsolve((-U + V), (U + V))
        return F.tocsr()
    elif m == 13:
        A2 = A * A
        A4 = A2 * A2
        A6 = A2 * A4
        U = A * (A6 * (c[13] * A6 + c[11] * A4 + c[9] * A2) +
                 c[7] * A6 + c[5] * A4 + c[3] * A2 +
                 c[1] * sp.eye(n, n).tocsc())
        V = A6 * (c[12] * A6 + c[10] * A4 + c[8] * A2) + c[6] * A6 + c[4] * \
            A4 + c[2] * A2 + c[0] * sp.eye(n, n).tocsc()
        F = spla.spsolve((-U + V), (U + V))
        return F.tocsr()
示例#31
0
def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(
        description='Map word embeddings in two languages into a shared space')
    parser.add_argument('src_input', help='the input source embeddings')
    parser.add_argument('trg_input', help='the input target embeddings')
    parser.add_argument('sense_input', help='the input sense mapping matrix')
    parser.add_argument('src_output', help='the output source embeddings')
    parser.add_argument('trg_output', help='the output target embeddings')
    parser.add_argument('tsns_output',
                        default='tsns.pkl',
                        help='the output target senses pickle file')
    parser.add_argument(
        '--encoding',
        default='utf-8',
        help='the character encoding for input/output (defaults to utf-8)')
    parser.add_argument('--precision',
                        choices=['fp16', 'fp32', 'fp64'],
                        default='fp32',
                        help='the floating-point precision (defaults to fp32)')
    parser.add_argument('--cuda',
                        action='store_true',
                        help='use cuda (requires cupy)')
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='the random seed (defaults to 0)')

    recommended_group = parser.add_argument_group(
        'recommended settings', 'Recommended settings for different scenarios')
    recommended_type = recommended_group.add_mutually_exclusive_group()
    recommended_type.add_argument(
        '--unsupervised',
        action='store_true',
        help=
        'recommended if you have no seed dictionary and do not want to rely on identical words'
    )
    recommended_type.add_argument('--future',
                                  action='store_true',
                                  help='experiment with stuff')
    recommended_type.add_argument('--toy',
                                  action='store_true',
                                  help='experiment with stuff on toy dataset')
    recommended_type.add_argument('--acl2018',
                                  action='store_true',
                                  help='reproduce our ACL 2018 system')

    init_group = parser.add_argument_group(
        'advanced initialization arguments',
        'Advanced initialization arguments')
    init_type = init_group.add_mutually_exclusive_group()
    init_type.add_argument('--init_unsupervised',
                           action='store_true',
                           help='use unsupervised initialization')
    init_group.add_argument(
        '--unsupervised_vocab',
        type=int,
        default=0,
        help=
        'restrict the vocabulary to the top k entries for unsupervised initialization'
    )

    mapping_group = parser.add_argument_group(
        'advanced mapping arguments', 'Advanced embedding mapping arguments')
    mapping_group.add_argument(
        '--normalize',
        choices=['unit', 'center', 'unitdim', 'centeremb', 'none'],
        nargs='*',
        default=[],
        help='the normalization actions to perform in order')
    mapping_group.add_argument('--whiten',
                               action='store_true',
                               help='whiten the embeddings')
    mapping_group.add_argument('--src_reweight',
                               type=float,
                               default=0,
                               nargs='?',
                               const=1,
                               help='re-weight the source language embeddings')
    mapping_group.add_argument('--trg_reweight',
                               type=float,
                               default=0,
                               nargs='?',
                               const=1,
                               help='re-weight the target language embeddings')
    mapping_group.add_argument('--src_dewhiten',
                               choices=['src', 'trg'],
                               help='de-whiten the source language embeddings')
    mapping_group.add_argument('--trg_dewhiten',
                               choices=['src', 'trg'],
                               help='de-whiten the target language embeddings')
    mapping_group.add_argument('--dim_reduction',
                               type=int,
                               default=0,
                               help='apply dimensionality reduction')
    mapping_type = mapping_group.add_mutually_exclusive_group()
    mapping_type.add_argument('-c',
                              '--orthogonal',
                              action='store_true',
                              help='use orthogonal constrained mapping')

    self_learning_group = parser.add_argument_group(
        'advanced self-learning arguments',
        'Advanced arguments for self-learning')
    self_learning_group.add_argument(
        '--vocabulary_cutoff',
        type=int,
        default=0,
        help='restrict the vocabulary to the top k entries')
    self_learning_group.add_argument(
        '--threshold',
        default=0.000001,
        type=float,
        help='the convergence threshold (defaults to 0.000001)')
    self_learning_group.add_argument(
        '--stochastic_initial',
        default=0.1,
        type=float,
        help=
        'initial keep probability stochastic dictionary induction (defaults to 0.1)'
    )
    self_learning_group.add_argument(
        '--stochastic_multiplier',
        default=2.0,
        type=float,
        help='stochastic dictionary induction multiplier (defaults to 2.0)')
    self_learning_group.add_argument(
        '--stochastic_interval',
        default=50,
        type=int,
        help='stochastic dictionary induction interval (defaults to 50)')
    self_learning_group.add_argument(
        '--log',
        default='map.log',
        help='write to a log file in tsv format at each iteration')
    self_learning_group.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='write log information to stderr at each iteration')

    future_group = parser.add_argument_group('experimental arguments',
                                             'Experimental arguments')
    future_group.add_argument('--skip_top',
                              type=int,
                              default=0,
                              help='Top k words to skip, presumably function')
    future_group.add_argument(
        '--start_src',
        action='store_true',
        help='Algorithm starts by tuning sense embeddings based on source')
    future_group.add_argument('--trim_senses',
                              action='store_true',
                              help='Trim sense table to working vocab')
    future_group.add_argument(
        '--lamb',
        type=float,
        default=0.5,
        help='Weight hyperparameter for sense alignment objectives')
    future_group.add_argument('--reglamb',
                              type=float,
                              default=1.,
                              help='Lasso regularization hyperparameter')
    future_group.add_argument(
        '--ccreglamb',
        type=float,
        default=0.1,
        help='Sense embedding regularization hyperparameter')
    future_group.add_argument('--inv_delta',
                              type=float,
                              default=0.0001,
                              help='Delta_I added for inverting sense matrix')
    future_group.add_argument('--lasso_iters',
                              type=int,
                              default=10,
                              help='Number of iterations for LASSO/NMF')
    future_group.add_argument('--iterations',
                              type=int,
                              default=-1,
                              help='Number of overall model iterations')
    future_group.add_argument('--trg_batch',
                              type=int,
                              default=5000,
                              help='Batch size for target steps')
    future_group.add_argument(
        '--trg_knn',
        action='store_true',
        help='Perform target sense mapping by k-nearest neighbors')
    future_group.add_argument(
        '--trg_sns_csls',
        type=int,
        default=10,
        help='K-nearest neighbors for CSLS target sense search')
    future_group.add_argument(
        '--senses_per_trg',
        type=int,
        default=1,
        help='K-max target sense mapping (default = 1 = off)')
    future_group.add_argument(
        '--gd',
        action='store_true',
        help='Apply gradient descent for assignment and synset embeddings')
    future_group.add_argument('--gd_lr',
                              type=float,
                              default=1e-2,
                              help='Learning rate for SGD (default=0.01)')
    future_group.add_argument('--gd_wd',
                              action='store_true',
                              help='Weight decay in SGD')
    future_group.add_argument(
        '--gd_wd_hl',
        type=int,
        default=100,
        help='Weight decay half-life in SGD, default=100')
    future_group.add_argument(
        '--gd_clip',
        type=float,
        default=5.,
        help='Per-coordinate gradient clipping (default=5)')
    future_group.add_argument(
        '--gd_map_steps',
        type=int,
        default=1,
        help='Consecutive steps for each target-sense mapping update phase')
    future_group.add_argument(
        '--gd_emb_steps',
        type=int,
        default=1,
        help='Consecutive steps for each sense embedding update phase')
    future_group.add_argument(
        '--base_prox_lambda',
        type=float,
        default=0.99,
        help='Lambda for proximal gradient in lasso step')
    future_group.add_argument(
        '--prox_decay',
        action='store_true',
        help='Multiply proximal lambda by itself each iteration')
    future_group.add_argument(
        '--sense_limit',
        type=float,
        default=1.1,
        help=
        'Maximum amount of target sense mappings, in terms of source mappings (default=1.1x)'
    )
    future_group.add_argument(
        '--gold_pairs',
        help='Gold data for evaluation, if exists (not for tuning)')
    future_group.add_argument(
        '--gold_threshold',
        type=float,
        default=0.0,
        help='Threshold for gold mapping (0 is fine if sparse)')

    future_group.add_argument('--debug', action='store_true')

    args = parser.parse_args()

    # pre-setting groups
    if args.toy:
        parser.set_defaults(init_unsupervised=True,
                            unsupervised_vocab=4000,
                            normalize=['unit', 'center', 'unit'],
                            whiten=True,
                            src_reweight=0.5,
                            trg_reweight=0.5,
                            src_dewhiten='src',
                            trg_dewhiten='trg',
                            vocabulary_cutoff=50,
                            trim_senses=True,
                            inv_delta=1.,
                            reglamb=0.2,
                            lasso_iters=100,
                            gd_wd=True,
                            log='map-toy.log')
    if args.unsupervised or args.future:
        parser.set_defaults(init_unsupervised=True,
                            unsupervised_vocab=4000,
                            normalize=['unit', 'center', 'unit'],
                            whiten=True,
                            src_reweight=0.5,
                            trg_reweight=0.5,
                            src_dewhiten='src',
                            trg_dewhiten='trg',
                            vocabulary_cutoff=2000,
                            trim_senses=True,
                            gd_wd=True)
    if args.unsupervised or args.acl2018:
        parser.set_defaults(init_unsupervised=True,
                            unsupervised_vocab=4000,
                            normalize=['unit', 'center', 'unit'],
                            whiten=True,
                            src_reweight=0.5,
                            trg_reweight=0.5,
                            src_dewhiten='src',
                            trg_dewhiten='trg',
                            vocabulary_cutoff=20000)
    args = parser.parse_args()

    # Check command line arguments
    if (args.src_dewhiten is not None
            or args.trg_dewhiten is not None) and not args.whiten:
        print('ERROR: De-whitening requires whitening first', file=sys.stderr)
        sys.exit(-1)

    # Choose the right dtype for the desired precision
    if args.precision == 'fp16':
        dtype = 'float16'  # many operations not supported by cupy
    elif args.precision == 'fp32':  # default
        dtype = 'float32'
    elif args.precision == 'fp64':
        dtype = 'float64'

    # Read input embeddings
    print('reading embeddings...')
    srcfile = open(args.src_input,
                   encoding=args.encoding,
                   errors='surrogateescape')
    trgfile = open(args.trg_input,
                   encoding=args.encoding,
                   errors='surrogateescape')
    src_words, x = embeddings.read(srcfile, dtype=dtype)
    trg_words, z = embeddings.read(trgfile, dtype=dtype)
    print('embeddings read')

    # Read input source sense mapping
    print('reading sense mapping')
    src_senses = pickle.load(open(args.sense_input, 'rb'))
    if src_senses.shape[0] != x.shape[0]:
        src_senses = csr_matrix(src_senses.transpose()
                                )  # using non-cuda scipy because of 'inv' impl
    #src_senses = get_sparse_module(src_senses)
    print(
        f'source sense mapping of shape {src_senses.shape} loaded with {src_senses.getnnz()} nonzeros'
    )

    # NumPy/CuPy management
    if args.cuda:
        if not supports_cupy():
            print('ERROR: Install CuPy for CUDA support', file=sys.stderr)
            sys.exit(-1)
        xp = get_cupy()
        x = xp.asarray(x)
        z = xp.asarray(z)
        print('CUDA loaded')
    else:
        xp = np
    xp.random.seed(args.seed)

    # removed word to index map (only relevant in supervised learning or with validation)

    # STEP 0: Normalization
    embeddings.normalize(x, args.normalize)
    embeddings.normalize(z, args.normalize)
    print('normalization complete')

    # removed building the seed dictionary

    # removed validation step

    # Create log file
    if args.log:
        log = open(args.log,
                   mode='w',
                   encoding=args.encoding,
                   errors='surrogateescape')
        print(f'logging into {args.log}')

    # Allocate memory

    # Initialize the projection matrices W(s) = W(t) = I.
    xw = xp.empty_like(x)
    zw = xp.empty_like(z)
    xw[:] = x
    zw[:] = z

    src_size = x.shape[0] if args.vocabulary_cutoff <= 0 else min(
        x.shape[0] - args.skip_top, args.vocabulary_cutoff)
    trg_size = z.shape[0] if args.vocabulary_cutoff <= 0 else min(
        z.shape[0] - args.skip_top, args.vocabulary_cutoff)
    emb_dim = x.shape[1]

    cutoff_end = min(src_size + args.skip_top, x.shape[0])

    if args.trim_senses:
        # reshape sense assignment
        src_senses = src_senses[args.skip_top:cutoff_end]

        # new columns for words with no senses in original input
        ### TODO might also need this if not trimming (probably kinda far away)
        newcols = [csc_matrix(([1],([i],[0])),shape=(src_size,1)) for i in range(src_size)\
                   if src_senses.getrow(i).getnnz() == 0]
        #with open(f'data/synsets/dummy_synsets_v3b_{src_size}','wb') as dummy_cols_file:
        #    dummy_col_idcs = [i for i in range(src_size) if src_senses.getrow(i).getnnz() == 0]
        #    pickle.dump(np.array(dummy_col_idcs), dummy_cols_file)

        # trim senses no longer used, add new ones
        colsums = src_senses.sum(axis=0).tolist()[0]
        kept_senses = [i for i, j in enumerate(colsums) if j > 0]
        #with open(f'data/synsets/kept_synsets_v3b_{src_size}','wb') as kept_save_file:
        #    pickle.dump(np.array(kept_senses), kept_save_file)
        src_senses = hstack([src_senses[:, kept_senses]] + newcols)
        print(
            f'trimmed sense dictionary dimensions: {src_senses.shape} with {src_senses.getnnz()} nonzeros'
        )
    sense_size = src_senses.shape[1]

    if args.gold_pairs is not None:
        with open(args.gold_pairs, 'rb') as gold_pairs_f:
            gold_pairs = pickle.load(gold_pairs_f)
            gold_pairs = [(i-args.skip_top,j) for i,j in gold_pairs \
                          if i >= args.skip_top and i < src_senses.shape[0] and j < src_senses.shape[1]]
        gold_trgs = sorted(set([x[0] for x in gold_pairs]))
        gold_senses = sorted(set([x[1] for x in gold_pairs]))
        gold_domain_size = len(gold_trgs) * len(gold_senses)
        print(
            f'evaluating on {len(gold_pairs)} pairs with {len(gold_trgs)} unique words and {len(gold_senses)} unique senses'
        )

    # Initialize the concept embeddings from the source embeddings
    ### TODO maybe try gradient descent instead?
    ### TODO (pre-)create non-singular alignment matrix
    cc = xp.empty((sense_size, emb_dim), dtype=dtype)  # \tilde{E}
    t01 = time.time()
    print('starting psinv calc')
    src_sns_psinv = psinv(src_senses, dtype, args.inv_delta)
    xecc = x[args.skip_top:cutoff_end].T.dot(
        get_sparse_module(src_senses).toarray()).T  # sense_size * emb_dim
    cc[:] = src_sns_psinv.dot(xecc)
    print(f'initialized concept embeddings in {time.time()-t01:.2f} seconds',
          file=sys.stderr)
    if args.verbose:
        # report precision of psedo-inverse operation, checked by inverting
        pseudo_id = src_senses.transpose().dot(src_senses).dot(
            src_sns_psinv.get())
        real_id = sparse_id(sense_size)
        rel_diff = (pseudo_id - real_id).sum() / (sense_size * sense_size)
        print(f'per-coordinate pseudo-inverse precision is {rel_diff:.5f}')

    ### TODO initialize trg_senses using seed dictionary instead?
    trg_sns_size = trg_size if args.trim_senses else z.shape[0]
    trg_senses = csr_matrix(
        (trg_sns_size,
         sense_size))  # using non-cuda scipy because of 'inv' impl
    zecc = xp.empty_like(xecc)  # sense_size * emb_dim
    #tg_grad = xp.empty((trg_sns_size, sense_size))

    if args.gd:
        # everything can be done on gpu
        src_senses = get_sparse_module(src_senses, dtype=dtype)
        trg_senses = get_sparse_module(trg_senses, dtype=dtype)
        if args.sense_limit > 0.0:
            trg_sense_limit = int(args.sense_limit * src_senses.getnnz())
            if args.verbose:
                print(
                    f'limiting target side to {trg_sense_limit} sense mappings'
                )
        else:
            trg_sense_limit = -1

    ### TODO return memory assignment for similarities?

    # Training loop
    if args.gd:
        prox_lambda = args.base_prox_lambda
    else:
        lasso_model = Lasso(alpha=args.reglamb, fit_intercept=False, max_iter=args.lasso_iters,\
                            positive=True, warm_start=True)  # TODO more parametrization

    if args.log is not None:
        if args.gd:
            print(f'gradient descent lr: {args.gd_lr}', file=log)
            print(f'base proximal lambda: {args.base_prox_lambda}', file=log)
        else:
            print(f'lasso regularization: {args.reglamb}', file=log)
            print(f'lasso iterations: {args.lasso_iters}', file=log)
            print(f'inversion epsilon: {args.inv_delta}', file=log)
        if args.gold_pairs is not None:
            print(f'gold mappings: {len(gold_pairs)}', file=log)
        print(
            f'Iteration\tObjective\tSource\tTarget\tL_1\tDuration\tNonzeros\tCorrect_mappings',
            file=log)
        log.flush()

    best_objective = objective = 1000000000.
    correct_mappings = -1
    regularization_lambda = args.base_prox_lambda if args.gd else args.reglamb
    it = 1
    last_improvement = 0
    t = time.time()
    map_gd_lr = args.gd_lr
    emb_gd_lr = args.gd_lr
    end = False
    print('starting training')

    if args.start_src:
        print('starting with converging synset embeddings')
        it_range = range(
            args.iterations
        )  ### TODO possibly add arg, but there's early stopping
        if not args.verbose:
            it_range = tqdm(it_range)
        prev_obj = float('inf')
        for pre_it in it_range:
            if args.gd_wd:
                emb_gd_lr = args.gd_lr * pow(0.5, floor(
                    pre_it / args.gd_wd_hl))

            # Synset embedding
            cc_grad = src_senses.T.dot(
                xw[args.skip_top:cutoff_end] -
                src_senses.dot(cc)) - args.ccreglamb * cc
            cc_grad.clip(-args.gd_clip, args.gd_clip, out=cc_grad)
            cc += emb_gd_lr * cc_grad

            # Source projection
            u, s, vt = xp.linalg.svd(cc.T.dot(xecc))
            wx = vt.T.dot(u.T).astype(dtype)
            x.dot(wx, out=xw)

            pre_objective = ((xp.linalg.norm(
                xw[args.skip_top:cutoff_end] -
                get_sparse_module(src_senses).dot(cc), 'fro'))**2) / 2
            pre_objective = float(pre_objective)

            if args.verbose and pre_it > 0 and pre_it % 10 == 0:
                print(
                    f'source synset embedding objective iteration {pre_it}: {pre_objective:.3f}'
                )

            if pre_objective > prev_obj:
                print(
                    f'stopping at pre-iteration {pre_it}, source-sense objective {prev_obj:.3f}'
                )
                # revert
                cc -= emb_gd_lr * cc_grad
                break

            prev_obj = pre_objective

    while True:
        if it % 50 == 0:
            print(
                f'starting iteration {it}, last objective was {objective}, correct mappings at {correct_mappings}'
            )

        # Increase the keep probability if we have not improved in args.stochastic_interval iterations
        if it - last_improvement > args.stochastic_interval:
            last_improvement = it

        if args.iterations > 0 and it > args.iterations:
            end = True

        ### update target assignments (6) - lasso-esque regression
        time6 = time.time()
        # optimize: 0.5 * (xp.linalg.norm(zw[i] - trg_senses[i].dot(cc))^2) + (regularization_lambda * xp.linalg.norm(trg_senses[i],1))

        if args.trg_knn:
            # for csls-based neighborhoods
            knn_sense = xp.full(sense_size, -100)
            for i in range(0, sense_size, args.trg_batch):
                batch_end = min(i + args.trg_batch, sense_size)
                sim_sense_trg = cc[i:batch_end].dot(
                    zw[args.skip_top:cutoff_end].T)
                knn_sense[i:batch_end] = topk_mean(sim_sense_trg,
                                                   k=args.trg_sns_csls,
                                                   inplace=True)

            # calculate new target mappings
            trg_senses = lil_matrix(trg_senses.shape)
            for i in range(0, trg_size, args.trg_batch):
                sns_batch_end = min(i + args.trg_batch, trg_size)
                z_i = i + args.skip_top
                z_batch_end = min(sns_batch_end + args.skip_top, zw.shape[0])

                sims = zw[z_i:z_batch_end].dot(cc.T)
                sims -= knn_sense / 2  # equivalent to the real CSLS scores for NN
                best_idcs = sims.argmax(1).tolist()
                trg_senses[(list(range(i, sns_batch_end)),
                            best_idcs)] = sims.max(1).tolist()

                # second-to-lth-best
                for l in range(args.senses_per_trg - 1):
                    sims[(list(range(sims.shape[0])), best_idcs)] = 0.
                    best_idcs = sims.argmax(1).tolist()
                    trg_senses[(list(range(i, sns_batch_end)),
                                best_idcs)] = sims.max(1).tolist()

            trg_senses = get_sparse_module(trg_senses.tocsr())

        elif args.gd:
            ### TODO add args.skip_top calculations
            if args.gd_wd:
                true_it = (it - 1) * args.gd_map_steps
                map_gd_lr = args.gd_lr * pow(
                    0.5, floor((1 + true_it) / args.gd_wd_hl))
                if args.verbose:
                    print(f'mapping learning rate: {map_gd_lr}')

            for k in range(args.gd_map_steps):
                # st <- st + eta * (ew - st.dot(es)).dot(es.T)
                # allow up to sense_limit updates, clip gradient

                batch_grads = []
                for i in range(0, trg_size, args.trg_batch):
                    batch_end = min(i + args.trg_batch, trg_size)
                    tg_grad_b = (zw[i:batch_end] -
                                 trg_senses[i:batch_end].dot(cc)).dot(cc.T)

                    # proximal gradient
                    tg_grad_b += prox_lambda
                    tg_grad_b.clip(None, 0.0, out=tg_grad_b)
                    batch_grads.append(batch_sparse(tg_grad_b))

                tg_grad = get_sparse_module(vstack(batch_grads))
                del tg_grad_b

                if args.prox_decay:
                    prox_lambda *= args.base_prox_lambda

                ### TODO consider weight decay here as well (args.gd_wd)
                trg_senses -= map_gd_lr * tg_grad

                # allow up to sense_limit nonzeros
                if trg_sense_limit > 0:
                    trg_senses = trim_sparse(trg_senses,
                                             trg_sense_limit,
                                             clip=None)

            ### TODO consider finishing up with lasso (maybe only in final iteration)

        else:
            ### TODO add args.skip_top calculations
            # parallel LASSO (no cuda impl)
            cccpu = cc.get().T  # emb_dim * sense_size
            lasso_model.fit(cccpu, zw[:trg_size].get().T)
            ### TODO maybe trim, keep only above some threshold (0.05) OR top f(#it)
            trg_senses = lasso_model.sparse_coef_

        if args.verbose:
            print(
                f'target sense mapping step: {(time.time()-time6):.2f} seconds, {trg_senses.getnnz()} nonzeros',
                file=sys.stderr)
            objective = ((xp.linalg.norm(xw[args.skip_top:cutoff_end] - get_sparse_module(src_senses).dot(cc),'fro') ** 2)\
                            + (xp.linalg.norm(zw[args.skip_top:cutoff_end] - get_sparse_module(trg_senses).dot(cc),'fro')) ** 2) / 2 \
                        + regularization_lambda * trg_senses.sum()  # TODO consider thresholding reg part
            objective = float(objective)
            print(f'objective: {objective:.3f}')

        # Write target sense mapping
        with open(f'tmp_outs/{args.tsns_output[:-4]}-it{it:03d}.pkl',
                  mode='wb') as tsnsfile:
            pickle.dump(trg_senses.get(), tsnsfile)

        ### update synset embeddings (10)
        time10 = time.time()
        if args.gd and args.gd_emb_steps > 0:
            ### TODO probably handle sizes and/or threshold sparse matrix
            if args.gd_wd:
                true_it = (it - 1) * args.gd_emb_steps
                emb_gd_lr = args.gd_lr * pow(
                    0.5, floor((1 + true_it) / args.gd_wd_hl))
                if args.verbose:
                    print(f'embedding learning rate: {emb_gd_lr}')

            ### replace block for no-source-tuning mode
            all_senses = trg_senses if args.start_src else get_sparse_module(
                vstack((src_senses.get(), trg_senses.get()), format='csr'),
                dtype=dtype)
            aw = zw[args.
                    skip_top:cutoff_end] if args.start_src else xp.concatenate(
                        (xw[args.skip_top:cutoff_end],
                         zw[args.skip_top:cutoff_end]))

            for i in range(args.gd_emb_steps):
                cc_grad = all_senses.T.dot(
                    aw - all_senses.dot(cc)) - args.ccreglamb * cc
                cc_grad.clip(-args.gd_clip, args.gd_clip, out=cc_grad)
                cc += emb_gd_lr * cc_grad

        else:
            ### TODO add args.skip_top calculations
            all_senses = get_sparse_module(
                vstack((src_senses, trg_senses), format='csr'))
            xzecc = xp.concatenate((xw[:src_size], zw[:trg_size])).T\
                        .dot(all_senses.toarray()).T  # sense_size * emb_dim
            all_sns_psinv = psinv(
                all_senses.get(), dtype, args.inv_delta
            )  ### TODO only update target side? We still have src_sns_psinv [it doesn't matter, dimensions are the same]
            cc[:] = all_sns_psinv.dot(xzecc)

        if args.verbose:
            print(f'synset embedding update: {time.time()-time10:.2f}',
                  file=sys.stderr)
            objective = ((xp.linalg.norm(xw[args.skip_top:cutoff_end] - get_sparse_module(src_senses).dot(cc),'fro')) ** 2\
                            + (xp.linalg.norm(zw[args.skip_top:cutoff_end] - get_sparse_module(trg_senses).dot(cc),'fro')) ** 2) / 2 \
                        + regularization_lambda * trg_senses.sum()  # TODO consider thresholding reg part
            objective = float(objective)
            print(f'objective: {objective:.3f}')

        ### update projections (3,5)
        # write to zw and xw
        if args.orthogonal or not end:

            ### remove block for no-source-tuning mode
            # source side - mappings don't change so xecc is constant
            #if not args.start_src:  # need to do this anyway whenever cc updates
            time3 = time.time()
            u, s, vt = xp.linalg.svd(cc.T.dot(xecc))
            wx = vt.T.dot(u.T).astype(dtype)
            x.dot(wx, out=xw)
            if args.verbose:
                print(f'source projection update: {time.time()-time3:.2f}',
                      file=sys.stderr)

            # target side - compute sense mapping first
            time3 = time.time()
            zecc.fill(0.)
            for i in range(0, trg_size, args.trg_batch):
                end_idx = min(i + args.trg_batch, trg_size)
                zecc += z[i:end_idx].T.dot(
                    get_sparse_module(trg_senses[i:end_idx]).toarray()).T
            u, s, vt = xp.linalg.svd(cc.T.dot(zecc))
            wz = vt.T.dot(u.T).astype(dtype)
            z.dot(wz, out=zw)
            if args.verbose:
                print(f'target projection update: {time.time()-time3:.2f}',
                      file=sys.stderr)

        ### TODO add parts from 'advanced mapping' part - transformations, whitening, etc.

        # Objective function evaluation
        time_obj = time.time()
        trg_senses_l1 = float(trg_senses.sum())
        src_obj = (float(
            xp.linalg.norm(
                xw[args.skip_top:cutoff_end] -
                get_sparse_module(src_senses).dot(cc), 'fro'))**2) / 2
        trg_obj = (float(
            xp.linalg.norm(
                zw[args.skip_top:cutoff_end] -
                get_sparse_module(trg_senses).dot(cc), 'fro'))**2) / 2
        objective = src_obj + trg_obj + regularization_lambda * trg_senses_l1  # TODO consider thresholding reg part
        if args.verbose:
            print(f'objective calculation: {time.time()-time_obj:.2f}',
                  file=sys.stderr)

        if objective - best_objective <= -args.threshold:
            last_improvement = it
            best_objective = objective

        # WordNet transduction evaluation (can't tune on this)
        if args.gold_pairs is not None:
            np_trg_senses = trg_senses.get()
            trg_corr = [
                p for p in gold_pairs if np_trg_senses[p] > args.gold_threshold
            ]
            correct_mappings = len(trg_corr)
            domain_trgs = np_trg_senses[gold_trgs][:, gold_senses]
        else:
            correct_mappings = -1

        # Logging
        duration = time.time() - t
        if args.verbose:
            print('ITERATION {0} ({1:.2f}s)'.format(it, duration),
                  file=sys.stderr)
            print('objective: {0:.3f}'.format(objective), file=sys.stderr)
            print('target senses l_1 norm: {0:.3f}'.format(trg_senses_l1),
                  file=sys.stderr)
            if len(gold_pairs) > 0 and domain_trgs.getnnz() > 0:
                print(
                    f'{correct_mappings} correct target mappings: {(correct_mappings/len(gold_pairs)):.3f} recall, {(correct_mappings/domain_trgs.getnnz()):.3f} precision',
                    file=sys.stderr)
            print(file=sys.stderr)
            sys.stderr.flush()
        if args.log is not None:
            print(
                f'{it}\t{objective:.3f}\t{src_obj:.3f}\t{trg_obj:.3f}\t{trg_senses_l1:.3f}\t{duration:.3f}\t{trg_senses.getnnz()}\t{correct_mappings}',
                file=log)
            log.flush()

        if end:
            break

        t = time.time()
        it += 1

    # Write mapped embeddings
    with open(args.src_output,
              mode='w',
              encoding=args.encoding,
              errors='surrogateescape') as srcfile:
        embeddings.write(src_words, xw, srcfile)
    with open(args.trg_output,
              mode='w',
              encoding=args.encoding,
              errors='surrogateescape') as trgfile:
        embeddings.write(trg_words, zw, trgfile)

    # Write target sense mapping
    with open(args.tsns_output, mode='wb') as tsnsfile:
        pickle.dump(trg_senses.get(), tsnsfile)
示例#32
0
def _mutual_proximity_gumbel_sparse(S: np.ndarray,
                                    min_nnz: int = 30,
                                    test_set_ind: np.ndarray = None,
                                    verbose: int = 0,
                                    log=None):
    """MP Gumbel for sparse similarity matrices. 

    Please do not directly use this function, but invoke via 
    mutual_proximity_gumbel()
    """
    n = S.shape[0]
    self_value = 1.
    if test_set_ind is None:
        train_set_ind = slice(0, n)
    else:
        train_set_ind = np.setdiff1d(np.arange(n), test_set_ind)

    # mean, variance WITHOUT zero values (missing values), ddof=1
    if S.diagonal().max() != 1. or S.diagonal().min() != 1.:
        raise ValueError("Self similarities must be 1.")
    S_param = S[train_set_ind]
    # the -1 accounts for self similarities that must be excluded from the calc
    mu = np.array((S_param.sum(0) - 1) / (S_param.getnnz(0) - 1)).ravel()
    E2 = mu**2
    X = S_param.copy()
    X.data **= 2
    n_x = (X.getnnz(0) - 1)
    E1 = np.array((X.sum(0) - 1) / (n_x)).ravel()
    del X
    # for an unbiased sample variance
    va = n_x / (n_x - 1) * (E1 - E2)
    del E1, E2
    sd = np.sqrt(va)
    del va

    # Euler-Mascheroni gamma=.57721566490153286 (https://oeis.org/A001620)
    EULER_MASCHERONI = np.euler_gamma
    beta_hat = sd * np.sqrt(6) / np.pi
    mu_hat = mu - EULER_MASCHERONI * beta_hat

    del mu, sd

    S_mp = lil_matrix(S.shape, dtype=np.float32)
    nnz = S.getnnz(axis=1)  # nnz per row

    for i in range(n):
        if verbose and log and ((i + 1) % 1000 == 0 or i + 1 == n):
            log.message("MP_gumbel: {} of {}".format(i + 1, n), flush=True)
        j_idx = slice(i + 1, n)

        Dij = S[i, j_idx].toarray().ravel()  #Extract dense rows temporarily
        tmp = np.empty(n - i)
        tmp[0] = self_value / 2.
        if nnz[i] <= min_nnz:
            tmp[1:] = np.nan
        else:  # Rescale iff there are enough neighbors for current point
            p1 = _gumbelcdf(Dij, mu_hat[i], beta_hat[i])
            p1[Dij == 0] = 0.
            del Dij
            Dji = S[j_idx, i].toarray().ravel()  #for vectorization below.
            p2 = _gumbelcdf(Dji, mu_hat[j_idx], beta_hat[j_idx])
            p2[Dji == 0] = 0.
            del Dji
            tmp[1:] = (p1 * p2).ravel()
            S_mp[i, i:] = tmp
            del tmp, j_idx
    S_mp += S_mp.T

    # Retain original distances for objects with too few neighbors.
    # That is, keep distances FROM these objects to others (rows), but
    # set distances of other objects TO them to NaN (columns).
    # Returned matrix is thus NOT SYMMETRIC.
    for row in np.argwhere(nnz <= min_nnz):
        row = row[0]  # use scalar for indexing instead of array
        S_mp[row, :] = S.getrow(row)
    return S_mp.tocsr()
示例#33
0
def main():

    # Load CNN
    original_model = models.alexnet(pretrained=True)
    class AlexNetConv3(nn.Module):
                def __init__(self):
                    super(AlexNetConv3, self).__init__()
                    self.features = nn.Sequential(
                        # stop at conv3
                        *list(original_model.features.children())[:7]
                    )
                def forward(self, x):
                    x = self.features(x)
                    return x

    model = AlexNetConv3()
    model.eval()

    tm = TemporalMemory(
        # Must be the same dimensions as the SP
        columnDimensions=(2048,),
        # How many cells in each mini-column.
        cellsPerColumn=32,
        # A segment is active if it has >= activationThreshold connected synapses
        # that are active due to infActiveState
        activationThreshold=4,#1,4(melhor),
        initialPermanence=0.55,
        connectedPermanence=0.5,
        # Minimum number of active synapses for a segment to be considered during
        # search for the best-matching segments.
        minThreshold=1, #1
        # The max number of synapses added to a segment during learning
        maxNewSynapseCount=20, #6
        permanenceIncrement=0.01,
        permanenceDecrement=0.01,
        predictedSegmentDecrement=0.0005,#0.0001,#0.0005,
        maxSegmentsPerCell=100, #8 16(colou)
        maxSynapsesPerSegment=100, #8 16(colou)
        seed=42
    )

    numberImages = 200
    features = []
    labels = []

    DIR = "/home/cappizzino/Documentos/doutorado/dataset"

    path_im = [os.path.join(DIR,sp) for sp in [
        'fall/',
        'spring/',
        'summer/',
        'winter/']]

    # Seasons to compare.
    # First season is the input one. Second season is the reference season.
    # 0 = fall, 1 = spring, 2 = summer, 3 = winter.
    # simul 1 = 2 and 3
    # simul 2 = 1 and 0
    # simul 3 = 0 and 3
    reference_season = 2
    input_season = 3

    # Extract Features
    reference_features, reference_labels = extractFeatures(numberImages, reference_season, model,path_im)
    input_features, input_labels = extractFeatures(numberImages, input_season, model, path_im)

    #print len(input_features[0])
    #print input_labels[0]
    #print input_features

    # Experiments
    # Ground truth
    print 'Ground truth'
    GT = np.identity(numberImages, dtype = bool)
    for i in range(GT.shape[0]):
        for j in range(GT.shape[0]-1):
            if i==j:
                GT[i,j]=1

    # Pairwise (raw descriptors)
    print 'Pairwise descriptors'
    t = time.time()
    S_pairwise = cosine_similarity(reference_features[:numberImages], input_features[:numberImages])
    elapsed = time.time() - t
    print("Elapsed time: %f seconds\n" %elapsed)              

    # Dimension Reduction and binarizarion
    print 'Dimension Reduction'
    P = np.random.randn(len(input_features[0]), 1024)
    P = normc(P)

    # sLSBH (binarized descriptors)
    print 'sLSBH'
    t = time.time()
    D1_slsbh = getLSBH(reference_features[:numberImages],P,0.25)
    D2_slsbh = getLSBH(input_features[:numberImages],P,0.25)
    Sb_pairwise = pairwiseDescriptors(D1_slsbh[:numberImages], D2_slsbh[:numberImages])
    elapsed = time.time() - t
    print("Elapsed time: %f seconds\n" %elapsed)   
    #print len(np.nonzero(D1_slsbh[0])[0])

    D1_tm=[]
    D2_tm=[]
    id_max=[]
    id_max1=[]
    id_max2=[]

    print 'Temporal Pooler (1) descriptors'
    t = time.time()
    for i in range(numberImages):
        for _ in range(1):
            activeColumnIndices = np.nonzero(D1_slsbh[i,:])[0]
            tm.compute(activeColumnIndices, learn=True)
            activeCells = tm.getWinnerCells()
            D1_tm.append(activeCells)
            id_max1.append(max(activeCells))

    print 'Temporal Pooler (2) descriptors'
    for i in range(numberImages):
        activeColumnIndices = np.nonzero(D2_slsbh[i,:])[0]
        tm.compute(activeColumnIndices, learn=False)
        activeCells = tm.getWinnerCells()
        D2_tm.append(activeCells)
        id_max2.append(max(activeCells))

    id_max = max(max(id_max1),max(id_max2))
 
    D1_sparse = sparse.lil_matrix((numberImages, id_max+1), dtype='int8')
    for i in range(numberImages):
        D1_sparse[i,D1_tm[i]] = 1

    D2_sparse = sparse.lil_matrix((numberImages, id_max+1), dtype='int8')
    for i in range(numberImages):
        D2_sparse[i,D2_tm[i]] = 1

    S_TM = pairwiseDescriptors(D1_sparse, D2_sparse)
    elapsed = time.time() - t
    print("Elapsed time: %f seconds\n" %elapsed)

    D1_mcn=[]
    D2_mcn=[]
    id_max=[]
    id_max1=[]
    id_max2=[]

    # Simple HTM parameters
    params = Params()
    params.probAdditionalCon = 0.05    # probability for random connection
    params.nCellPerCol = 32            # number of cells per minicolumn
    params.nInConPerCol = 200          # number of connections per minicolumn
    params.minColumnActivity = 0.75    # minicolumn activation threshold
    params.nColsPerPattern = 50        # minimum number of active minicolumns k_min
    params.kActiveColumn = 100         # maximum number of active minicolumns k_max

    # conversion of the parameter to a natural number that contains the
    # required number of 1s for activation
    params.minColumnActivity = np.round(params.minColumnActivity*params.nInConPerCol)

    htm = MCN('htm',params)

    nCols_MCN=[]
    nCols_HTM=[]

    print ('Simple HTM (1)')
    t = time.time()
    for i in range(numberImages):
        htm.compute(D1_slsbh[i,:],0)
        nCols_MCN.append(htm.nCols)
        nCols_HTM.append(tm.columnDimensions[0])
        id_max1.append(max(htm.winnerCells))
        D1_mcn.append(htm.winnerCells)

    print ('Simple HTM (2)')
    for i in range(numberImages):
        htm.compute(D2_slsbh[i,:],1)
        #nCols_MCN.append(htm.nCols)
        #nCols_HTM.append(tm.columnDimensions[0])
        id_max2.append(max(htm.winnerCells))
        D2_mcn.append(htm.winnerCells)

    id_max = max(max(id_max1),max(id_max2))

    D1_sparse = sparse.lil_matrix((numberImages, id_max+1), dtype='int8')
    for i in range(numberImages):
        D1_sparse[i,D1_mcn[i]] = 1

    D2_sparse = sparse.lil_matrix((numberImages, id_max+1), dtype='int8')
    for i in range(numberImages):
        D2_sparse[i,D2_mcn[i]] = 1

    S_MCN = pairwiseDescriptors(D1_sparse, D2_sparse)
    elapsed = time.time() - t
    print("Elapsed time: %f seconds\n" %elapsed)

    # Results
    print 'Results 1'
    fig, ax = plt.subplots()

    P, R = createPR(S_pairwise,GT)
    ax.plot(R, P, label='pairwise / raw (avgP=%f)' %np.trapz(P,R))

    P, R = createPR(S_MCN,GT)
    ax.plot(R, P, label='MCN (avgP=%f)' %np.trapz(P,R))

    P, R = createPR(Sb_pairwise,GT)
    ax.plot(R, P, label='sLSBH / raw (avgP=%f)' %np.trapz(P,R))

    P, R = createPR(S_TM,GT)
    ax.plot(R, P, label='HTM TM (avgP=%f)' %np.trapz(P,R))

    ax.legend()
    ax.grid(True)
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.show()

    print 'Results 2'
    fig2, ax2 = plt.subplots()

    ax2.plot(nCols_MCN,'g',label='MCN = %i cols' %htm.nCols)
    ax2.plot(nCols_HTM,'b',label='HTM TM = %i cols' %tm.columnDimensions[0])

    ax2.legend()
    ax2.grid(True)
    plt.xlabel('Number of seen images')
    plt.ylabel('Number of MiniColumns')
    plt.show()

    print 'Results 3'
    fig3, (ax3, ax4) = plt.subplots(nrows=1, ncols=2, gridspec_kw={'width_ratios': [2, 1]}, figsize=(9,4))

    P, R = createPR(S_pairwise,GT)
    ax3.plot(R, P, label='pairwise / raw (AUC=%f)' %np.trapz(P,R))

    P, R = createPR(S_MCN,GT)
    ax3.plot(R, P, label='MCN (AUC=%f)' %np.trapz(P,R))

    P, R = createPR(Sb_pairwise,GT)
    ax3.plot(R, P, label='sLSBH / raw (AUC=%f)' %np.trapz(P,R))

    P, R = createPR(S_TM,GT)
    ax3.plot(R, P, label='HTM TM (AUC=%f)' %np.trapz(P,R))

    ax3.grid(True)

    ax3.set_xlabel("Recall", fontsize = 12.0)
    ax3.set_ylabel("Precision", fontsize = 12.0)
    ax3.legend(fontsize=10)

    ax4.plot(nCols_MCN,'g',label='MCN = %i cols' %htm.nCols)
    ax4.plot(nCols_HTM,'b',label='HTM TM = %i cols' %tm.columnDimensions[0])

    ax4.grid(True)

    ax4.tick_params(axis='both', labelsize=6)
    ax4.set_xlabel('Number of seen images', fontsize = 12.0)
    ax4.set_ylabel('Number of MiniColumns', fontsize = 12.0)
    ax4.legend(fontsize=10)

    fig3.savefig('tes.eps')
    plt.show()
示例#34
0
def init_parameters():
    """
        This function defines the material parameters for silicon.
        After executing this function the parameters can be altered.
        This function must be executed after **init_geometry()**
        """

    global dt
    dt = 1E-12

    global Chi
    Chi = np.full(n, 4.05)

    global Eg
    Eg = np.full(n, 1.12)

    global Nc
    Nc = np.full(n, 2.81E25)

    global Nv
    Nv = np.full(n, 1.83E25)

    global Epsilon
    Epsilon = np.full(n, Epsilon_r * Epsilon_0)

    global mu_p
    mu_p = np.full(n, 0.045)

    global mu_n
    mu_n = np.full(n, 0.14)

    # Doping-Profile
    global C
    C = np.zeros(n)

    # Cheet Charge
    global CA
    CA = np.zeros(n)

    global Cau
    Cau = np.full(n, 0)  # 1E-28

    global generation
    generation = np.full(n, 0.0)

    #
    global u
    u = np.zeros(3 * n)

    #
    global u_old
    u_old = np.zeros(3 * n)

    #
    global b
    b = np.zeros(3 * n)

    #
    global A
    A = sparse.lil_matrix((3 * n, 3 * n))

    # Vector dx to be solved
    global x
    x = np.zeros(3 * n)
示例#35
0
    def __init__(self,
                 X,
                 Y,
                 regparam=1.0,
                 qids=None,
                 callbackfun=None,
                 **kwargs):
        self.regparam = regparam
        self.callbackfun = None
        self.Y = array_tools.as_2d_array(Y)
        #Number of training examples
        self.size = Y.shape[0]
        if self.Y.shape[1] > 1:
            raise Exception(
                'CGRankRLS does not currently work in multi-label mode')
        self.learn_from_labels = True
        self.callbackfun = callbackfun
        self.X = csc_matrix(X.T)
        #if qids != None:
        #    self.setQids(qids)
        #else:
        #    self.qidmap = None
        #self.train()
        if qids != None:
            self.qids = map_qids(qids)
            self.splits = qids_to_splits(self.qids)
        else:
            self.qids = None
        regparam = self.regparam
        #regparam = 0.
        qids = self.qids
        if qids != None:
            P = sp.lil_matrix((self.size, len(set(qids))))
            for qidind in range(len(self.splits)):
                inds = self.splits[qidind]
                qsize = len(inds)
                for i in inds:
                    P[i, qidind] = 1. / sqrt(qsize)
            P = P.tocsr()
            PT = P.tocsc().T
        else:
            P = 1. / sqrt(self.size) * (np.mat(
                np.ones((self.size, 1), dtype=np.float64)))
            PT = P.T
        X = self.X.tocsc()
        X_csr = X.tocsr()

        def mv(v):
            v = np.mat(v).T
            return X_csr * (X.T * v) - X_csr * (P * (PT *
                                                     (X.T * v))) + regparam * v

        G = LinearOperator((X.shape[0], X.shape[0]),
                           matvec=mv,
                           dtype=np.float64)
        Y = self.Y
        if not self.callbackfun == None:

            def cb(v):
                self.A = np.mat(v).T
                self.b = np.mat(np.zeros((1, 1)))
                self.callbackfun.callback(self)
        else:
            cb = None
        XLY = X_csr * Y - X_csr * (P * (PT * Y))
        try:
            self.A = np.mat(cg(G, XLY, callback=cb)[0]).T
        except Finished:
            pass
        self.b = np.mat(np.zeros((1, 1)))
        self.predictor = predictor.LinearPredictor(self.A, self.b)
示例#36
0
def _get_projection(n_samples, n_features, density='auto', eps=0.1):
    p = SparseRandomProjection()
    mat = lil_matrix((n_samples, n_features))
    return p.fit(mat)
示例#37
0
文件: test.py 项目: verivital/pdev
    alpha_range = (0.99, 1.01)
    beta_range = (0.99, 1.01)
    dPde.set_perturbation(alpha_range, beta_range)

    ########################################################
    # test verifier class

    verifier = Verifier()
    toTimeStep = 2
    dis_reachable_set = verifier.get_dreach_set(
        dPde, toTimeStep)  # compute discrete reachable set
    dis_min_vec, _, dis_max_vec, _ = dis_reachable_set[toTimeStep -
                                                       1].get_min_max()

    unsafe_mat = lil_matrix((1, dPde.matrix_a.shape[0]), dtype=float)
    unsafe_mat[0, dPde.matrix_a.shape[0] - 1] = 1
    unsafe_vector = lil_matrix((1, 1), dtype=float)
    unsafe_vector[0, 0] = -1

    dPde.set_unsafe_set(unsafe_mat.tocsc(), unsafe_vector.tocsc())
    verifier.on_fly_check_dPde(dPde, toTimeStep)

    ############################################################
    # test ReachSetAssembler
    RSA = ReachSetAssembler()
    u_dset, e_dset, bloated_dset = RSA.get_dreachset(dPde, toTimeStep)
    print "\nu_dset = {}".format(u_dset)
    print "\ne_dset = {}".format(e_dset)
    print "\nbloated_dset = {}".format(bloated_dset)
    u_min, u_min_points, u_max, u_max_points = u_dset[toTimeStep].get_min_max()
示例#38
0
def load_gcn_data(dataset_str):
    """
    Loads input data from gcn/data directory
    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.
    All objects above must be saved using python pickle module.
    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/{}/ind.{}.{}".format(dataset_str,dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/{}/ind.{}.test.index".format(dataset_str,dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1)
        tx_extended = sparse.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sparse.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    G=nx.from_dict_of_lists(graph, create_using=nx.Graph())
    A = nx.adjacency_matrix(G)

    labels = np.vstack((ally, ty))

    labels[test_idx_reorder, :] = labels[test_idx_range, :]


    idx_test = test_idx_range.tolist()

    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    features = normalize_feature(features)

    features = torch.FloatTensor(np.array(features.todense()))

    if dataset_str == 'citeseer':
        kk = np.zeros(len(labels)).astype(int)
        for i in range(len(labels)):
            t = labels[i]
            if sum(t) == 0:
                kk[i] = len(t)
            else:
                kk[i] = np.argwhere(t != 0)[0]
        labels=kk
        labels = torch.LongTensor(labels)
    else:
        labels = torch.LongTensor(np.where(labels)[1])




    idx_train = torch.LongTensor(idx_train)
    idx_test = torch.LongTensor(idx_test)
    idx_val = torch.LongTensor(idx_val)

    A_processed = preprocess_adj(A)
    return (G,A_processed, features, labels, idx_train, idx_test, idx_val)
示例#39
0
def assembly_quads_mindlin_plate_geometric(nodes,
                                           elements,
                                           thickness,
                                           sigma_x,
                                           sigma_y,
                                           tau_xy,
                                           gauss_order=3):
    from quadrature import legendre_quad
    from shape_functions import iso_quad
    from numpy import sum
    print "The assembly routine is started"
    freedom = 3
    element_nodes = 4
    nodes_count = len(nodes)
    dimension = freedom * nodes_count
    element_dimension = freedom * element_nodes
    geometric = lil_matrix((dimension, dimension))
    elements_count = len(elements)
    (xi, eta, w) = legendre_quad(gauss_order)

    for element_index in range(elements_count):
        kg = zeros((element_dimension, element_dimension))
        vertices = nodes[elements[element_index, :], :]
        sx = sigma_x[elements[element_index, :]]
        sy = sigma_y[elements[element_index, :]]
        txy = tau_xy[elements[element_index, :]]
        for i in range(len(w)):
            (jacobian, shape, shape_dx,
             shape_dy) = iso_quad(vertices, xi[i], eta[i])
            s0 = array([[sum(shape * sx), sum(shape * txy)],
                        [sum(shape * txy), sum(shape * sy)]])
            bb = array([[
                shape_dx[0], 0.0, 0.0, shape_dx[1], 0.0, 0.0, shape_dx[2], 0.0,
                0.0, shape_dx[3], 0.0, 0.0
            ],
                        [
                            shape_dy[0], 0.0, 0.0, shape_dy[1], 0.0, 0.0,
                            shape_dy[2], 0.0, 0.0, shape_dy[3], 0.0, 0.0
                        ]])
            bs1 = array([[
                0.0, shape_dx[0], 0.0, 0.0, shape_dx[1], 0.0, 0.0, shape_dx[2],
                0.0, 0.0, shape_dx[3], 0.0
            ],
                         [
                             0.0, shape_dy[0], 0.0, 0.0, shape_dy[1], 0.0, 0.0,
                             shape_dy[2], 0.0, 0.0, shape_dy[3], 0.0
                         ]])
            bs2 = array([[
                0.0, 0.0, shape_dx[0], 0.0, 0.0, shape_dx[1], 0.0, 0.0,
                shape_dx[2], 0.0, 0.0, shape_dx[3]
            ],
                         [
                             0.0, 0.0, shape_dy[0], 0.0, 0.0, shape_dy[1], 0.0,
                             0.0, shape_dy[2], 0.0, 0.0, shape_dy[3]
                         ]])
            kg = kg + thickness * bb.transpose().dot(s0).dot(
                bb) * jacobian * w[i] + thickness**3.0 / 12.0 * (
                    bs1.transpose().dot(s0).dot(bs1) +
                    bs2.transpose().dot(s0).dot(bs2)) * jacobian * w[i]

        for i in range(element_dimension):
            ii = elements[element_index, i / freedom] * freedom + i % freedom
            for j in range(i, element_dimension):
                jj = elements[element_index,
                              j / freedom] * freedom + j % freedom
                geometric[ii, jj] += kg[i, j]
                if ii != jj:
                    geometric[jj, ii] = geometric[ii, jj]
        print_progress(element_index, elements_count - 1)
    print "\nThe assembly routine is completed"
    return geometric.tocsr()
示例#40
0
文件: data.py 项目: MisterFXGuy/pypet
def create_param_dict(param_dict):
    '''Fills a dictionary with some parameters that can be put into a trajectory.
    '''
    param_dict['Normal'] = {}
    param_dict['Numpy'] = {}
    param_dict['Sparse'] ={}
    param_dict['Numpy_2D'] = {}
    param_dict['Numpy_3D'] = {}
    param_dict['Tuples'] ={}
    param_dict['Lists'] ={}
    param_dict['Pickle']={}

    normal_dict = param_dict['Normal']
    normal_dict['string'] = 'Im a test string!'
    normal_dict['int'] = 42
    normal_dict['long'] = compat.long_type(42)
    normal_dict['double'] = 42.42
    normal_dict['bool'] =True
    normal_dict['trial'] = 0

    numpy_dict=param_dict['Numpy']
    numpy_dict['string'] = np.array(['Uno', 'Dos', 'Tres'])
    numpy_dict['int'] = np.array([1,2,3,4])
    numpy_dict['double'] = np.array([1.0,2.0,3.0,4.0])
    numpy_dict['bool'] = np.array([True, False, True])

    param_dict['Numpy_2D']['double'] = np.matrix([[1.0,2.0],[3.0,4.0]])
    param_dict['Numpy_3D']['double'] = np.array([[[1.0,2.0],[3.0,4.0]],[[3.0,-3.0],[42.0,41.0]]])

    spsparse_csc = spsp.lil_matrix((222,22))
    spsparse_csc[1,2] = 44.6
    spsparse_csc[1,9] = 44.5
    spsparse_csc = spsparse_csc.tocsc()

    spsparse_csr = spsp.lil_matrix((222,22))
    spsparse_csr[1,3] = 44.7
    spsparse_csr[17,17] = 44.755555
    spsparse_csr = spsparse_csr.tocsr()

    spsparse_bsr = spsp.bsr_matrix(np.matrix([[1, 1, 0, 0, 2, 2],
        [1, 1, 0, 0, 2, 2],
        [0, 0, 0, 0, 3, 3],
        [0, 0, 0, 0, 3, 3],
        [4, 4, 5, 5, 6, 6],
        [4, 4, 5, 5, 6, 6]]))

    spsparse_dia = spsp.dia_matrix(np.matrix([[1, 0, 3, 0],
        [1, 2, 0, 4],
        [0, 2, 3, 0],
        [0, 0, 3, 4]]))


    param_dict['Sparse']['bsr_mat'] = spsparse_bsr
    param_dict['Sparse']['csc_mat'] = spsparse_csc
    param_dict['Sparse']['csr_mat'] = spsparse_csr
    param_dict['Sparse']['dia_mat'] = spsparse_dia

    param_dict['Tuples']['empty'] = ()
    param_dict['Tuples']['int'] = (1,2,3)
    param_dict['Tuples']['float'] = (44.4,42.1,3.)
    param_dict['Tuples']['str'] = ('1','2wei','dr3i')

    param_dict['Lists']['lempty'] = []
    param_dict['Lists']['lint'] = [1,2,3]
    param_dict['Lists']['lfloat'] = [44.4,42.1,3.]
    param_dict['Lists']['lstr'] = ['1','2wei','dr3i']

    param_dict['Pickle']['list']= ['b','h', 53, (), 0]
    param_dict['Pickle']['list']= ['b','h', 42, (), 1]
    param_dict['Pickle']['list']= ['b',[444,43], 44, (),2]
示例#41
0
curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)
import pandas as pd
import numpy as np
from scipy.sparse import lil_matrix
import scipy as scp
from code_file.utils import get_logs_from
from code_file.model import calculate_matrix

# 要想计算协同过滤矩阵,要获得物品的编号最大数
ITEM_NUM = 4318203
# 获取当前组的用户行为日志

user_logs = get_logs_from('../full_logs/user_logs_group7.txt')

# 转化成链表的形式
user_logs = list(user_logs.items())

for i in range(0, len(user_logs), 10000):
    print("The %d " % i + 'batch is started...........')
    print("--------------------------")
    mat = lil_matrix((ITEM_NUM, ITEM_NUM), dtype=float)
    mat = calculate_matrix(mat, user_logs[i:i + 10000], alpha=0.5)
    # 计算每一千条之后好之后开始存下来
    # scp.sparse.save_npz('../tmpData/sparse_matrix_%d_batch_group4.npz' % i, mat.tocsr())
    scp.sparse.save_npz('../tmpdata_iuf/sparse_matrix_%d_batch_group7.npz' % i,
                        mat.tocsr())
    print("save successfully!!!!")
    print("************************")
示例#42
0
def load_data(dataset_str):
    """
    Loads input data from gcn/data directory

    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.

    All objects above must be saved using python pickle module.

    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask
示例#43
0
def main():
	""" Plasma PIC simulation """
	
	# Simulation parameters
	N         = 40000   # Number of particles
	Nx        = 400     # Number of mesh cells
	t         = 0       # current time of the simulation
	tEnd      = 50      # time at which simulation ends
	dt        = 1       # timestep
	boxsize   = 50      # periodic domain [0,boxsize]
	n0        = 1       # electron number density
	vb        = 3       # beam velocity
	vth       = 1       # beam width
	A         = 0.1     # perturbation
	plotRealTime = True # switch on for plotting as the simulation goes along
	
	# Generate Initial Conditions
	np.random.seed(42)            # set the random number generator seed
	# construct 2 opposite-moving Guassian beams
	pos  = np.random.rand(N,1) * boxsize  
	vel  = vth * np.random.randn(N,1) + vb
	Nh = int(N/2)
	vel[Nh:] *= -1
	# add perturbation
	vel *= (1 + A*np.sin(2*np.pi*pos/boxsize))
	
	# Construct matrix G to computer Gradient  (1st derivative)
	dx = boxsize/Nx
	e = np.ones(Nx)
	diags = np.array([-1,1])
	vals  = np.vstack((-e,e))
	Gmtx = sp.spdiags(vals, diags, Nx, Nx);
	Gmtx = sp.lil_matrix(Gmtx)
	Gmtx[0,Nx-1] = -1
	Gmtx[Nx-1,0] = 1
	Gmtx /= (2*dx)
	Gmtx = sp.csr_matrix(Gmtx)

	# Construct matrix L to computer Laplacian (2nd derivative)
	diags = np.array([-1,0,1])
	vals  = np.vstack((e,-2*e,e))
	Lmtx = sp.spdiags(vals, diags, Nx, Nx);
	Lmtx = sp.lil_matrix(Lmtx)
	Lmtx[0,Nx-1] = 1
	Lmtx[Nx-1,0] = 1
	Lmtx /= dx**2
	Lmtx = sp.csr_matrix(Lmtx)
	
	# calculate initial gravitational accelerations
	acc = getAcc( pos, Nx, boxsize, n0, Gmtx, Lmtx )
	
	# number of timesteps
	Nt = int(np.ceil(tEnd/dt))
	
	# prep figure
	fig = plt.figure(figsize=(5,4), dpi=80)
	
	# Simulation Main Loop
	for i in range(Nt):
		# (1/2) kick
		vel += acc * dt/2.0
		
		# drift (and apply periodic boundary conditions)
		pos += vel * dt
		pos = np.mod(pos, boxsize)
		
		# update accelerations
		acc = getAcc( pos, Nx, boxsize, n0, Gmtx, Lmtx )
		
		# (1/2) kick
		vel += acc * dt/2.0
		
		# update time
		t += dt
		
		# plot in real time - color 1/2 particles blue, other half red
		if plotRealTime or (i == Nt-1):
			plt.cla()
			plt.scatter(pos[0:Nh],vel[0:Nh],s=.4,color='blue', alpha=0.5)
			plt.scatter(pos[Nh:], vel[Nh:], s=.4,color='red',  alpha=0.5)
			plt.axis([0,boxsize,-6,6])
			
			plt.pause(0.001)
			
	
	# Save figure
	plt.xlabel('x')
	plt.ylabel('v')
	plt.savefig('pic.png',dpi=240)
	plt.show()
	    
	return 0
示例#44
0
def assembly_quads_mindlin_plate_laminated(nodes,
                                           elements,
                                           thicknesses,
                                           elasticity_matrices,
                                           gauss_order=3,
                                           kappa=5.0 / 6.0):
    # type: (array, array, float, float, float, int, float) -> lil_matrix
    """
    Assembly Routine for the Mindlin Plates Analysis
    :param nodes: A two-dimensional array of plate's nodes coordinates
    :param elements: A two-dimensional array of plate's triangles (mesh)
    :param thicknesses: An array of thicknesses that stores thicknesses of each layer
    :param elasticity_matrices: A list or a sequence of two-dimensional arrays. Each array represents stress-strain relations of corresponded layer
    :param gauss_order: An order of gaussian quadratures
    :param kappa: The shear correction factor
    :return: Global stiffness matrix in the CSR sparse format
    Order: u_0, v0, u_1, v_1, ..., u_(n-1), v_(n-1); n - nodes count
    """
    from quadrature import legendre_quad
    from shape_functions import iso_quad
    from numpy import sum

    print "The assembly routine is started"
    freedom = 5
    element_nodes = 4
    nodes_count = len(nodes)
    dimension = freedom * nodes_count
    element_dimension = freedom * element_nodes
    global_matrix = lil_matrix((dimension, dimension))
    elements_count = len(elements)
    (xi, eta, w) = legendre_quad(gauss_order)

    h = sum(thicknesses)

    for element_index in range(elements_count):
        local = zeros((element_dimension, element_dimension))
        element = nodes[elements[element_index, :], :]
        for i in range(len(w)):
            (jacobian, shape, shape_dx,
             shape_dy) = iso_quad(element, xi[i], eta[i])
            bm = array([[
                shape_dx[0], 0.0, 0.0, 0.0, 0.0, shape_dx[1], 0.0, 0.0, 0.0,
                0.0, shape_dx[2], 0.0, 0.0, 0.0, 0.0, shape_dx[3], 0.0, 0.0,
                0.0, 0.0
            ],
                        [
                            0.0, shape_dy[0], 0.0, 0.0, 0.0, 0.0, shape_dy[1],
                            0.0, 0.0, 0.0, 0.0, shape_dy[2], 0.0, 0.0, 0.0,
                            0.0, shape_dy[3], 0.0, 0.0, 0.0
                        ],
                        [
                            shape_dy[0], shape_dx[0], 0.0, 0.0, 0.0,
                            shape_dy[1], shape_dx[1], 0.0, 0.0, 0.0,
                            shape_dy[2], shape_dx[2], 0.0, 0.0, 0.0,
                            shape_dy[3], shape_dx[3], 0.0, 0.0, 0.0
                        ]])
            bf = array([[
                0.0, 0.0, 0.0, shape_dx[0], 0.0, 0.0, 0.0, 0.0, shape_dx[1],
                0.0, 0.0, 0.0, 0.0, shape_dx[2], 0.0, 0.0, 0.0, 0.0,
                shape_dx[3], 0.0
            ],
                        [
                            0.0, 0.0, 0.0, 0.0, shape_dy[0], 0.0, 0.0, 0.0,
                            0.0, shape_dy[1], 0.0, 0.0, 0.0, 0.0, shape_dy[2],
                            0.0, 0.0, 0.0, 0.0, shape_dy[3]
                        ],
                        [
                            0.0, 0.0, 0.0, shape_dy[0], shape_dx[0], 0.0, 0.0,
                            0.0, shape_dy[1], shape_dx[1], 0.0, 0.0, 0.0,
                            shape_dy[2], shape_dx[2], 0.0, 0.0, 0.0,
                            shape_dy[3], shape_dx[3]
                        ]])
            bc = array([[
                0.0, 0.0, shape_dx[0], shape[0], 0.0, 0.0, 0.0, shape_dx[1],
                shape[1], 0.0, 0.0, 0.0, shape_dx[2], shape[2], 0.0, 0.0, 0.0,
                shape_dx[3], shape[3], 0.0
            ],
                        [
                            0.0, 0.0, shape_dy[0], 0.0, shape[0], 0.0, 0.0,
                            shape_dy[1], 0.0, shape[1], 0.0, 0.0, shape_dy[2],
                            0.0, shape[2], 0.0, 0.0, shape_dy[3], 0.0, shape[3]
                        ]])
            z0 = -h / 2.0
            for j in range(len(thicknesses)):
                z1 = z0 + thicknesses[j]
                df = elasticity_matrices[j]
                dc = array([[df[2, 2], 0.0], [0.0, df[2, 2]]])
                local = local + (z1 - z0) * (
                    bm.transpose().dot(df).dot(bm)) * jacobian * w[i]
                local = local + (z1**2.0 - z0**2.0) / 2.0 * (
                    bm.transpose().dot(df).dot(bf)) * jacobian * w[i]
                local = local + (z1**2.0 - z0**2.0) / 2.0 * (
                    bf.transpose().dot(df).dot(bm)) * jacobian * w[i]
                local = local + (z1**3.0 - z0**3.0) / 3.0 * (
                    bf.transpose().dot(df).dot(bf)) * jacobian * w[i]
                local = local + (z1 - z0) * kappa * (
                    bc.transpose().dot(dc).dot(bc)) * jacobian * w[i]
                z0 = z1

        for i in range(element_dimension):
            ii = elements[element_index, i / freedom] * freedom + i % freedom
            for j in range(i, element_dimension):
                jj = elements[element_index,
                              j / freedom] * freedom + j % freedom
                global_matrix[ii, jj] += local[i, j]
                if i != j:
                    global_matrix[jj, ii] = global_matrix[ii, jj]
        print_progress(element_index, elements_count - 1)
    print "\nThe assembly routine is completed"
    return global_matrix.tocsr()
示例#45
0
def ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3):
    """Solve the ridge equation by the method of normal equations.

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training data

    y : array-like, shape = [n_samples] or [n_samples, n_responses]
        Target values

    sample_weight : float or numpy array of shape [n_samples]
        Individual weights for each sample

    solver : {'auto', 'dense_cholesky', 'sparse_cg'}, optional
        Solver to use in the computational routines. 'delse_cholesky'
        will use the standard scipy.linalg.solve function, 'sparse_cg'
        will use the a conjugate gradient solver as found in
        scipy.sparse.linalg.cg while 'auto' will chose the most
        appropiate depending on the matrix X.

    tol: float
        Precision of the solution.

    Returns
    -------
    coef: array, shape = [n_features] or [n_responses, n_features]
        Weight vector(s).

    Notes
    -----
    This function won't compute the intercept.
    """

    n_samples, n_features = X.shape
    is_sparse = False

    if hasattr(X, 'todense'):  # lazy import of scipy.sparse
        from scipy import sparse
        is_sparse = sparse.issparse(X)

    if is_sparse:
        if n_features > n_samples or \
           isinstance(sample_weight, np.ndarray) or \
           sample_weight != 1.0:

            I = sparse.lil_matrix((n_samples, n_samples))
            I.setdiag(np.ones(n_samples) * alpha * sample_weight)
            c = _solve(X * X.T + I, y, solver, tol)
            coef = X.T * c
        else:
            I = sparse.lil_matrix((n_features, n_features))
            I.setdiag(np.ones(n_features) * alpha)
            coef = _solve(X.T * X + I, X.T * y, solver, tol)
    else:
        if n_features > n_samples or \
           isinstance(sample_weight, np.ndarray) or \
           sample_weight != 1.0:

            # kernel ridge
            # w = X.T * inv(X X^t + alpha*Id) y
            A = np.dot(X, X.T)
            A.flat[::n_samples + 1] += alpha * sample_weight
            coef = np.dot(X.T, _solve(A, y, solver, tol))
        else:
            # ridge
            # w = inv(X^t X + alpha*Id) * X.T y
            A = np.dot(X.T, X)
            A.flat[::n_features + 1] += alpha
            coef = _solve(A, np.dot(X.T, y), solver, tol)

    return coef.T
示例#46
0

if __name__ == '__main__':
    path = argv[1]
    num_movies = argv[2]
    user_file = argv[3]
    #user_file = 'user_ids.csv'

    if num_movies == 'all':
        files = os.listdir(path)
    else:
        files = os.listdir(path)[:int(num_movies)]

    write_user_ids(path, files, user_file)
    users = get_user_dict(user_file)
    data = lil_matrix((len(users), len(files)))

    ct = 0
    for f in files:
        if ct % 100 == 0:
            print ct
        infile = open('%s/%s' % (path, f), 'r')
        j = int(infile.readline().strip()[:-1]) - 1  # Move number
        for line in infile:
            id, rating, _ = line.split(',')
            i = users[id]
            data[i, j] = int(rating)
        infile.close()
        ct += 1

    mmwrite('ratings_matrix', data)
示例#47
0
    def process(self):

        dataset_str = self.dataset_name
        names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(names)):
            data_name = "ind.{}.{}".format(dataset_str, names[i])
            data_path = os.path.join(self.raw_root_path, data_name)
            with open(data_path, 'rb') as f:
                if sys.version_info > (3, 0):
                    objects.append(pickle.load(f, encoding='latin1'))
                else:
                    objects.append(pickle.load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)

        with open(os.path.join(self.raw_root_path,
                               "ind.{}.test.index".format(dataset_str)),
                  "r",
                  encoding="utf-8") as f:
            test_idx_reorder = [int(line.strip()) for line in f]
            test_idx_range = np.sort(test_idx_reorder)

        if self.dataset_name == 'citeseer':
            # Fix citeseer dataset (there are some isolated nodes in the graph)
            # Find isolated nodes, add them as zero-vecs into the right position
            test_idx_range_full = list(
                range(min(test_idx_reorder),
                      max(test_idx_reorder) + 1))
            tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
            tx_extended[test_idx_range - min(test_idx_range), :] = tx
            tx = tx_extended
            ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
            ty_extended[test_idx_range - min(test_idx_range), :] = ty
            ty = ty_extended

        features = sp.vstack((allx, tx)).tolil()
        features[test_idx_reorder, :] = features[test_idx_range, :]
        # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

        labels = np.vstack((ally, ty))
        labels[test_idx_reorder, :] = labels[test_idx_range, :]

        test_index = test_idx_range.tolist()
        if self.task == "semi_supervised":
            train_index = list(range(len(y)))
            valid_index = list(range(len(y), len(y) + 500))
        else:
            train_index = range(len(ally) - 500)
            valid_index = range(len(ally) - 500, len(ally))

        x = np.array(features.todense()).astype(np.float32)
        inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True)
        inv_sum_x[np.isnan(inv_sum_x)] = 1.0
        inv_sum_x[np.isinf(inv_sum_x)] = 1.0
        x *= inv_sum_x

        edge_index = np.array(nx.from_dict_of_lists(graph).edges).T
        edge_index, _ = remove_self_loop_edge(edge_index)
        edge_index, _ = convert_edge_to_directed(edge_index)
        y = np.argmax(labels, axis=-1).astype(np.int32)

        graph = Graph(x=x, edge_index=edge_index, y=y)

        return graph, (train_index, valid_index, test_index)
示例#48
0
            bc_fix[i * ndofV + 1] = True
            bc_val[i * ndofV + 1] = velocity_y(x[i], y[i])
    #end for
#end if

print("setup: boundary conditions: %.3f s" % (time.time() - start))

#################################################################
# build FE matrix
# [ K G ][u]=[f]
# [GT 0 ][p] [h]
#################################################################
start = time.time()

if pnormalise:
    A_mat = lil_matrix((Nfem + 1, Nfem + 1), dtype=np.float64)  # matrix A
    rhs = np.zeros((Nfem + 1), dtype=np.float64)  # right hand side
    A_mat[Nfem, NfemV:Nfem] = 1
    A_mat[NfemV:Nfem, Nfem] = 1
else:
    A_mat = lil_matrix((Nfem, Nfem), dtype=np.float64)  # matrix A
    rhs = np.zeros(Nfem, dtype=np.float64)  # right hand side

b_mat = np.zeros((3, ndofV * mV), dtype=np.float64)  # gradient matrix B
N = np.zeros(mV, dtype=np.float64)  # shape functions
dNdx = np.zeros(mV, dtype=np.float64)  # shape functions derivatives
dNdy = np.zeros(mV, dtype=np.float64)  # shape functions derivatives
dNdr = np.zeros(mV, dtype=np.float64)  # shape functions derivatives
dNds = np.zeros(mV, dtype=np.float64)  # shape functions derivatives
u = np.zeros(NV, dtype=np.float64)  # x-component velocity
v = np.zeros(NV, dtype=np.float64)  # y-component velocity
示例#49
0
    def calc(self, exposures, impact_funcs, hazard, save_mat=False):
        """Compute impact of an hazard to exposures.

        Parameters:
            exposures (Exposures): exposures
            impact_funcs (ImpactFuncSet): impact functions
            hazard (Hazard): hazard
            self_mat (bool): self impact matrix: events x exposures

        Examples:
            Use Entity class:

            >>> haz = Hazard('TC') # Set hazard
            >>> haz.read_mat(HAZ_DEMO_MAT)
            >>> haz.check()
            >>> ent = Entity() # Load entity with default values
            >>> ent.read_excel(ENT_TEMPLATE_XLS) # Set exposures
            >>> ent.check()
            >>> imp = Impact()
            >>> imp.calc(ent.exposures, ent.impact_funcs, haz)
            >>> imp.calc_freq_curve().plot()

            Specify only exposures and impact functions:

            >>> haz = Hazard('TC') # Set hazard
            >>> haz.read_mat(HAZ_DEMO_MAT)
            >>> haz.check()
            >>> funcs = ImpactFuncSet()
            >>> funcs.read_excel(ENT_TEMPLATE_XLS) # Set impact functions
            >>> funcs.check()
            >>> exp = Exposures(pd.read_excel(ENT_TEMPLATE_XLS)) # Set exposures
            >>> exp.check()
            >>> imp = Impact()
            >>> imp.calc(exp, funcs, haz)
            >>> imp.aai_agg
        """
        # 1. Assign centroids to each exposure if not done
        assign_haz = INDICATOR_CENTR + hazard.tag.haz_type
        if assign_haz not in exposures:
            exposures.assign_centroids(hazard)
        else:
            LOGGER.info('Exposures matching centroids found in %s', assign_haz)

        # 2. Initialize values
        self.unit = exposures.value_unit
        self.event_id = hazard.event_id
        self.event_name = hazard.event_name
        self.date = hazard.date
        self.coord_exp = np.stack(
            [exposures.latitude.values, exposures.longitude.values], axis=1)
        self.frequency = hazard.frequency
        self.at_event = np.zeros(hazard.intensity.shape[0])
        self.eai_exp = np.zeros(exposures.value.size)
        self.tag = {
            'exp': exposures.tag,
            'if_set': impact_funcs.tag,
            'haz': hazard.tag
        }
        self.crs = exposures.crs

        # Select exposures with positive value and assigned centroid
        exp_idx = np.where(np.logical_and(exposures.value > 0, \
                           exposures[assign_haz] >= 0))[0]
        if exp_idx.size == 0:
            LOGGER.warning("No affected exposures.")

        num_events = hazard.intensity.shape[0]
        LOGGER.info('Calculating damage for %s assets (>0) and %s events.',
                    exp_idx.size, num_events)

        # Get damage functions for this hazard
        if_haz = INDICATOR_IF + hazard.tag.haz_type
        haz_imp = impact_funcs.get_func(hazard.tag.haz_type)
        if if_haz not in exposures and INDICATOR_IF not in exposures:
            LOGGER.error('Missing exposures impact functions %s.',
                         INDICATOR_IF)
            raise ValueError
        if if_haz not in exposures:
            LOGGER.info('Missing exposures impact functions for hazard %s. ' +\
                        'Using impact functions in %s.', if_haz, INDICATOR_IF)
            if_haz = INDICATOR_IF

        # Check if deductible and cover should be applied
        insure_flag = False
        if ('deductible' in exposures) and ('cover' in exposures) \
        and exposures.cover.max():
            insure_flag = True

        if save_mat:
            self.imp_mat = sparse.lil_matrix(
                (self.date.size, exposures.value.size))

        # 3. Loop over exposures according to their impact function
        tot_exp = 0
        for imp_fun in haz_imp:
            # get indices of all the exposures with this impact function
            exp_iimp = np.where(
                exposures[if_haz].values[exp_idx] == imp_fun.id)[0]
            tot_exp += exp_iimp.size
            exp_step = int(CONFIG['global']['max_matrix_size'] / num_events)
            if not exp_step:
                LOGGER.error(
                    'Increase max_matrix_size configuration parameter'
                    ' to > %s', str(num_events))
                raise ValueError
            # separte in chunks
            chk = -1
            for chk in range(int(exp_iimp.size / exp_step)):
                self._exp_impact( \
                    exp_idx[exp_iimp[chk*exp_step:(chk+1)*exp_step]],\
                    exposures, hazard, imp_fun, insure_flag)
            self._exp_impact(exp_idx[exp_iimp[(chk+1)*exp_step:]],\
                exposures, hazard, imp_fun, insure_flag)

        if not tot_exp:
            LOGGER.warning('No impact functions match the exposures.')
        self.aai_agg = sum(self.at_event * hazard.frequency)

        if save_mat:
            self.imp_mat = self.imp_mat.tocsr()
all_images = set(all_images)

# create a dict that will provide us with the mapping between the image and the
# index on distance matrix
count = 0
image_index = {}
index_image = {}
for image in all_images:
    image_index[image] = count
    index_image[count] = image
    count += 1

# create a distance matrix for the images (use sparse matrix instead of dense
# to avoid memory issues)
n = len(all_images)
distance_matrix = lil_matrix((n, n))

for pair in all_pairs:
    image1, image2 = extract_pairs(pair, phashes_dict)
    distance = data[pair]
    if distance == 0:
        distance = 0.00000000000001
    index1 = image_index[image1]
    index2 = image_index[image2]
    distance_matrix[index1, index2] = distance
    distance_matrix[index2, index1] = distance
savemat(distance_matrix_file, {'M': distance_matrix.tocsr()})
pickle.dump(index_image, open(index_image_file, 'wb'))
print("Done with dumping data...")

示例#51
0
def prepare_system_matrices(Ybus, Vbus, bus_idx, pqpv, pq, pv, ref):
    """
    Prepare the system matrices
    :param Ybus:
    :param Vbus:
    :param pqpv:
    :param ref:
    :return:
    """
    n_bus = len(Vbus)
    n_bus2 = 2 * n_bus
    npv = len(pv)
    # ##################################################################################################################
    # Compute the starting voltages
    # ##################################################################################################################

    # System matrix
    A = lil_matrix((n_bus2, n_bus2))  # lil matrices are faster to populate

    # Expanded slack voltages
    Vslack = zeros(n_bus2)

    # Populate A
    for a in pqpv:  # rows
        for ii in range(Ybus.indptr[a],
                        Ybus.indptr[a + 1]):  # columns in sparse format
            b = Ybus.indices[ii]

            A[2 * a + 0, 2 * b + 0] = Ybus[a, b].real
            A[2 * a + 0, 2 * b + 1] = -Ybus[a, b].imag
            A[2 * a + 1, 2 * b + 0] = Ybus[a, b].imag
            A[2 * a + 1, 2 * b + 1] = Ybus[a, b].real

    # set vd elements
    for a in ref:
        A[a * 2, a * 2] = 1.0
        A[a * 2 + 1, a * 2 + 1] = 1.0

        Vslack[a * 2] = Vbus[a].real
        Vslack[a * 2 + 1] = Vbus[a].imag

    # Solve starting point voltages
    Vst_expanded = factorized(A.tocsc())(Vslack)
    print('Vst_expanded:\n', Vst_expanded)

    # Invert the voltages obtained: Get the complex voltage and voltage inverse vectors
    Vst = Vst_expanded[2 * bus_idx] + 1j * Vst_expanded[2 * bus_idx + 1]
    Wst = 1.0 / Vst

    # ##################################################################################################################
    # Compute the final system matrix
    # ##################################################################################################################

    # System matrices
    B = lil_matrix((n_bus2, npv))
    C = lil_matrix((npv, n_bus2 + npv))

    for i, a in enumerate(pv):
        # "a" is the actual bus index
        # "i" is the number of the pv bus in the pv buses list

        B[2 * a + 0, i + 0] = Wst[a].imag
        B[2 * a + 1, i + 0] = Wst[a].real

        C[i + 0, 2 * a + 0] = Vst[a].real
        C[i + 0, 2 * a + 1] = Vst[a].imag

    Asys = vstack_s([hstack_s([A, B]), C], format="csc")

    return Asys, Vst, Wst
示例#52
0
    def buildKirchhoff(self, coords, cutoff=10., gamma=1., **kwargs):
        """Build Kirchhoff matrix for given coordinate set.

        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray` or :class:`.Atomic`

        :arg cutoff: cutoff distance (Å) for pairwise interactions
            default is 10.0 Å, , minimum is 4.0 Å
        :type cutoff: float

        :arg gamma: spring constant, default is 1.0
        :type gamma: float

        :arg sparse: elect to use sparse matrices, default is **False**. If
            Scipy is not found, :class:`ImportError` is raised.
        :type sparse: bool

        :arg kdtree: elect to use KDTree for building Kirchhoff matrix faster,
            default is **True**
        :type kdtree: bool


        Instances of :class:`Gamma` classes and custom functions are
        accepted as *gamma* argument.

        When Scipy is available, user can select to use sparse matrices for
        efficient usage of memory at the cost of computation speed."""

        try:
            coords = (coords._getCoords()
                      if hasattr(coords, '_getCoords') else coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')

        cutoff, g, gamma = checkENMParameters(cutoff, gamma)
        self._reset()
        self._cutoff = cutoff
        self._gamma = g

        n_atoms = coords.shape[0]
        start = time.time()
        sparse = kwargs.get('sparse', False)
        if sparse:
            try:
                from scipy import sparse as scipy_sparse
            except ImportError:
                raise ImportError('failed to import scipy.sparse, which  is '
                                  'required for sparse matrix calculations')
            kirchhoff = scipy_sparse.lil_matrix((n_atoms, n_atoms))
        else:
            kirchhoff = np.zeros((n_atoms, n_atoms), 'd')

        if kwargs.get('kdtree', True):
            kdtree = KDTree(coords)
            kdtree.search(cutoff)
            dist2 = kdtree.getDistances()**2
            r = 0
            for i, j in kdtree.getIndices():
                g = gamma(dist2[r], i, j)
                kirchhoff[i, j] = -g
                kirchhoff[j, i] = -g
                kirchhoff[i, i] = kirchhoff[i, i] + g
                kirchhoff[j, j] = kirchhoff[j, j] + g
                r += 1
        else:
            LOGGER.info('Using slower method for building the Kirchhoff.')
            cutoff2 = cutoff * cutoff
            mul = np.multiply
            for i in range(n_atoms):
                xyz_i = coords[i, :]
                i_p1 = i + 1
                i2j = coords[i_p1:, :] - xyz_i
                mul(i2j, i2j, i2j)
                for j, dist2 in enumerate(i2j.sum(1)):
                    if dist2 > cutoff2:
                        continue
                    j += i_p1
                    g = gamma(dist2, i, j)
                    kirchhoff[i, j] = -g
                    kirchhoff[j, i] = -g
                    kirchhoff[i, i] = kirchhoff[i, i] + g
                    kirchhoff[j, j] = kirchhoff[j, j] + g

        if sparse:
            kirchhoff = kirchhoff.tocsr()

        LOGGER.debug('Kirchhoff was built in {0:.2f}s.'.format(time.time() -
                                                               start))
        self._kirchhoff = kirchhoff
        self._n_atoms = n_atoms
        self._dof = n_atoms
示例#53
0
def generateGS(Mesher, Lvl, RestrictDomain=None, ColTol=0.999999):
    Node, Elem, Supp, Load = Mesher.get()
    if RestrictDomain == None:
        RestrictDomain = NoneRestriction
    #Get element connectivity matrix
    Nn = max([max(Node) for Node in Elem
              ]) + 1  # find the largest node to find the number of nodes
    Ne = len(Elem)  # how many elements are there
    A1 = lil_matrix((Nn, Nn))  # sparse matrix
    for i in range(0, Ne):
        A1[ix_(Elem[i],
               Elem[i])] = 1  #first situation is connections in the element
    A1 = A1 - identity(Nn)  # disconnect from yourself
    An = A1  #
    #Level 1 connectivity
    I, J = An.nonzero(
    )  # where there is a connection / this is the opposite because matlab is per column
    Bars = np.column_stack([I, J])
    D = np.column_stack([Node[I, 0] - Node[J, 0], Node[I, 1] - Node[J, 1]])
    L = (np.sqrt(D[:, 0]**2 + D[:, 1]**2))
    D = np.column_stack([D[:, 0].flatten() / L, D[:, 1].flatten() / L])
    #Levels 2 and above
    for i in range(1, Lvl):
        Aold = An
        An = (An * A1).astype(bool)
        Gn = An - Aold
        Gn.setdiag(0)
        I, J = np.nonzero(Gn)
        if len(J) == 0:
            Lvl = i - 1
            print(f'-INFO- No new bars at Level {Lvl}')
            break
        RemoveFlag = RestrictDomain(Node, np.column_stack([I, J]))  #
        I = np.delete(I, np.nonzero(RemoveFlag)[0])
        J = np.delete(J, np.nonzero(RemoveFlag)[0])

        newD = np.column_stack(
            [Node[I, 0] - Node[J, 0], Node[I, 1] - Node[J, 1]])
        L = np.sqrt(newD[:, 0]**2 + newD[:, 1]**2).flatten()
        newD = np.column_stack(
            [newD[:, 0].flatten() / L, newD[:, 1].flatten() / L])
        # Collinearity Check
        p = 0  # where the bars come from
        m = 0
        RemoveFlag = np.zeros(np.size(I))
        Nb = np.size(Bars, 0)
        RemoveFlag = selectRemoveFlag(RemoveFlag, I, Bars, Nn, Nb, ColTol, D,
                                      newD)
        #change due to the fact that python starts with 0
        '''Remove collinear bars and make sym[D[:,0].flatten()/L,D[:,1].flatten()/L]metric again. Bars that have one
        angle marked as collinear but the other not, will be spared
        '''
        ind, = np.nonzero(RemoveFlag == 0)
        H = csr_matrix((np.ones(np.size(ind)), (I[ind], J[ind])),
                       shape=(Nn, Nn))
        I, J = np.nonzero(
            H + H.T
        )  #  guarantees symmetry and eliminates the situation of the node being eliminated in q and not in p
        print(
            f'Lvl {i} - Collinear bars removed: {(len(RemoveFlag)-len(I))/2}')
        Bars = np.concatenate((Bars, np.column_stack([I, J])), axis=0)
        Bars = Bars[Bars[:, 0].argsort()]  # effectively adds the new bars
        D = np.column_stack([
            Node[Bars[:, 0], 0] - Node[Bars[:, 1], 0],
            Node[Bars[:, 0], 1] - Node[Bars[:, 1], 1]
        ])  #directional unit vector
        L = np.sqrt(D[:, 0]**2 + D[:, 1]**2)  #
        D = np.column_stack([D[:, 0].flatten() / L, D[:, 1].flatten() / L])
    A = csr_matrix(
        (np.ones(np.size(Bars, 0)), (Bars[:, 0], Bars[:, 1])),
        shape=(Nn, Nn))  # ends, but still needs to remove repeated bars
    I, J = tril(A).nonzero()  # for this use only the upper triangle
    Bars = np.column_stack([I, J])
    return Bars
示例#54
0
文件: ADM_30.py 项目: jpra2/ADM_mod_2
    else:
        print('erro')
        import pdb
        pdb.set_trace()

wirebasket_numbers_nv1 = [
    len(ids_nv1_internos),
    len(ids_nv1_faces),
    len(ids_nv1_arestas),
    len(ids_nv1_vertices)
]
elems_wirebasket_nv1 = ids_nv1_internos + ids_nv1_faces + ids_nv1_arestas + ids_nv1_vertices
elems_wirebasket_nv1_sep = [
    ids_nv1_internos, ids_nv1_faces, ids_nv1_arestas, ids_nv1_vertices
]
G_nv1 = lil_matrix((len(all_ids_nv1), len(all_ids_nv1)))
G_nv1[all_ids_nv1, elems_wirebasket_nv1] = np.ones(len(all_ids_nv1))

#### nivel 1
ids_wirebasket = M1.mb.tag_get_data(M1.ID_reordenado_tag,
                                    elems_wirebasket,
                                    flat=True)
map_global = dict(zip(elems_wirebasket, ids_wirebasket))
faces_boundary = M1.mb.tag_get_data(get_tag('FACES_BOUNDARY'), 0, flat=True)[0]
faces_boundary = M1.mb.get_entities_by_handle(faces_boundary)

T, b = oth.fine_transmissibility_structured(M1.mb,
                                            M1.mtu,
                                            map_global,
                                            faces_in=rng.subtract(
                                                M1.all_faces, faces_boundary))
示例#55
0
def white(reg):
    """
    Calculates the White test to check for heteroscedasticity.

    Parameters
    ----------
    reg             : regression object
                      output instance from a regression model

    Returns
    -------
    white_result    : dictionary
                      contains the statistic (white), degrees of freedom
                      (df) and the associated p-value (pvalue) for the
                      White test. 
    white           : float
                      scalar value for the White test statistic.
    df              : integer
                      degrees of freedom associated with the test
    pvalue          : float
                      p-value associated with the statistic (chi^2
                      distributed with k df)
    
    Note
    ----
    x attribute in the reg object must have a constant term included. This is
    standard for spreg.OLS so no testing done to confirm constant.

    References
    ----------
    .. [1] H. White. 1980. A heteroscedasticity-consistent covariance
       matrix estimator and a direct test for heteroskdasticity.
       Econometrica. 48(4) 817-838. 


    Examples
    --------
    >>> import numpy as np
    >>> import pysal
    >>> import diagnostics
    >>> from ols import OLS

    Read the DBF associated with the Columbus data.

    >>> db = pysal.open(pysal.examples.get_path("columbus.dbf"),"r")

    Create the dependent variable vector. 

    >>> y = np.array(db.by_col("CRIME"))
    >>> y = np.reshape(y, (49,1))

    Create the matrix of independent variables. 

    >>> X = []
    >>> X.append(db.by_col("INC"))
    >>> X.append(db.by_col("HOVAL"))
    >>> X = np.array(X).T

    Run an OLS regression.

    >>> reg = OLS(y,X)

    Calculate the White test for heteroscedasticity.
    
    >>> testresult = diagnostics.white(reg)

    Print the degrees of freedom for the test.

    >>> testresult['df']
    5

    Print the test statistic.

    >>> print("%12.12f"%testresult['wh'])
    19.946008239903

    Print the associated p-value. 

    >>> print("%12.12f"%testresult['pvalue'])
    0.001279222817

    """
    e = reg.u**2
    k = reg.k
    n = reg.n
    y = reg.y
    X = reg.x
    #constant = constant_check(X)
    
    # Check for constant, if none add one, see Greene 2003, pg. 222
    #if constant == False: 
    #    X = np.hstack((np.ones((n,1)),X))

    # Check for multicollinearity in the X matrix
    ci = condition_index(reg)
    if ci > 30:
        white_result = "Not computed due to multicollinearity."
        return white_result

    # Compute cross-products and squares of the regression variables
    if type(X).__name__ == 'ndarray':
        A = np.zeros((n, (k*(k+1))/2.))
    elif type(X).__name__ == 'csc_matrix' or type(X).__name__ == 'csr_matrix':
        # this is probably inefficient
        A = SP.lil_matrix((n, (k*(k+1))/2.))
    else:
        raise Exception, "unknown X type, %s" %type(X).__name__
    counter = 0
    for i in range(k):
        for j in range(i,k):
            v = spmultiply(X[:,i], X[:,j], False)
            A[:,counter] = v
            counter += 1

    # Append the original variables
    A = sphstack(X,A)   # note: this also converts a LIL to CSR
    n,k = A.shape

    # Check to identify any duplicate or constant columns in A
    omitcolumn = []
    for i in range(k):
        current = A[:,i]
        # remove all constant terms (will add a constant back later)
        if spmax(current) == spmin(current):
            omitcolumn.append(i)
            pass
        # do not allow duplicates
        for j in range(k):
            check = A[:,j]
            if i < j:
                test = abs(current - check).sum()
                if test == 0:
                    omitcolumn.append(j)
    uniqueomit = set(omitcolumn)
    omitcolumn = list(uniqueomit)

    # Now the identified columns must be removed
    if type(A).__name__ == 'ndarray':
        A = np.delete(A,omitcolumn,1)
    elif type(A).__name__ == 'csc_matrix' or type(A).__name__ == 'csr_matrix':
        # this is probably inefficient
        keepcolumn = range(k)
        for i in omitcolumn:
            keepcolumn.remove(i)
        A = A[:,keepcolumn]
    else:
        raise Exception, "unknown A type, %s" %type(X).__name__
    A = sphstack(np.ones((A.shape[0],1)), A)   # add a constant back in
    n,k = A.shape

    # Conduct the auxiliary regression and calculate the statistic
    import ols as OLS
    aux_reg = OLS.BaseOLS(e,A)
    aux_r2 = r2(aux_reg)
    wh = aux_r2*n
    df = k-1
    pvalue = stats.chisqprob(wh,df)
    white_result={'df':df,'wh':wh, 'pvalue':pvalue}
    return white_result 
示例#56
0
    def graph_setup(self,n,r,p,seed=None):
        """ Creates the graph to use for poisson learning.

            Parameters
            ----------
            n : int
                The number of vertices to sample for the graph.
            r : float
                Radius for graph construction.
            p : float
                Weight matrix parameter.
            seed : int, default is None
                Optional seed for random number generator.
        
            Returns
            -------
            poisson_W_matrix : (n,n) scipy.sparse.lil_matrix
                Weight matrix describing similarities of normal vectors.
            poisson_J_matrix : (num_verts,n) scipy.sparse.lil_matrix
                Matrix with indices of nearest neighbors.
            poisson_node_idx : (num_verts,1) int array
                The indices of the closest point in the sample.
        """

        rng = (
            np.random.default_rng(seed=seed)
            if seed is not None
            else np.random.default_rng()
        )

        if self.poisson_W_matrix is None or self.poisson_J_matrix is None or self.poisson_node_idx is None:

            v = self.vertex_normals()
            N = self.num_verts()
        
            #Random subsample
            ss_idx = np.matrix(rng.choice(self.points.shape[0],n,replace=False))
            y = np.squeeze(self.points[ss_idx,:])
            w = np.squeeze(v[ss_idx,:])

            xTree = spatial.cKDTree(self.points)
            nn_idx = xTree.query_ball_point(y, r)
            yTree = spatial.cKDTree(y)
            nodes_idx = yTree.query_ball_point(y, r)
        
            bn = np.zeros((n,3))
            J = sparse.lil_matrix((N,n))
            for i in range(n):
                vj = v[nn_idx[i],:]
                normal_diff = w[i] - vj
                weights = np.exp(-8 * np.sum(np.square(normal_diff),1,keepdims=True))
                bn[i] = np.sum(weights*vj,0) / np.sum(weights,0)
            
                #Set ith row of J
                normal_diff = bn[i]- vj
                weights = np.exp(-8 * np.sum(np.square(normal_diff),1))#,keepdims=True))
                J[nn_idx[i],i] = weights
            
            #Normalize rows of J
            RSM = sparse.spdiags((1 / np.sum(J,1)).ravel(),0,N,N)
            J = RSM @ J
        
            #Compute weight matrix W
            W = sparse.lil_matrix((n,n))
            for i in range(n):
                nj = bn[nodes_idx[i]]
                normal_diff = bn[i] - nj
                weights = np.exp(-32 * ((np.sqrt(np.sum(np.square(normal_diff),1)))/2)**p)
                W[i,nodes_idx[i]] = weights
        
            #Find nearest node to each vertex
            nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(y)
            instances, node_idx = nbrs.kneighbors(self.points)

            self.poisson_W_matrix = W
            self.poisson_J_matrix = J
            self.poisson_node_idx = node_idx
        
        return self.poisson_W_matrix, self.poisson_J_matrix, self.poisson_node_idx   
def iterative_profiler(X,
                       G,
                       train_ids,
                       test_ids,
                       id_index,
                       label_slices,
                       preserve_coef=0.9,
                       iterations=10,
                       alpha=10,
                       c=0,
                       node_order='random',
                       keep_topK=10,
                       edgexplain__scaler=False):
    if edgexplain__scaler:
        print 'edgexplain is so on!'
    else:
        print 'edgexplain is so off!'
    print 'alpha', alpha, 'preserver', preserve_coef, 'c', c, 'topK', keep_topK, 'edgexplain', edgexplain__scaler
    converged = False
    iter_num = 0
    ids = train_ids + test_ids
    train_ids_set = set(train_ids)
    logging.info("iterating with max_iter = " + str(iterations))
    X = X.tolil()
    while not converged:
        if node_order == 'random':
            random.shuffle(ids)
        logging.info("iter: " + str(iter_num))
        for node in ids:
            node_index = id_index[node]
            neighbors_labeldist = lil_matrix((1, X.shape[1]))
            nbrs = [nbr for nbr in G[node]]
            ################

            nbr_indices = [id_index[n] for n in nbrs]
            #all neighbours
            nbrlabeldists = X[nbr_indices]
            #diagonal matrix for edge weight of each neighbor
            weights = lil_matrix(
                (nbrlabeldists.shape[0], nbrlabeldists.shape[0]))
            if edgexplain__scaler:
                #edge weights are scaled according to EdgExplain scalar
                scales = nbrlabeldists.tocsr().dot(
                    X[node_index].tocsr().transpose(copy=True))
                if iter_num == -1:
                    pdb.set_trace()
                scales = expit(-alpha * scales.toarray() - c)
                weights.setdiag(scales)
            else:
                #all edge weights are 1
                weights.setdiag(np.ones(nbrlabeldists.shape[0]))
            neighbors_labeldist = weights.dot(nbrlabeldists)
            neighbors_labeldist = lil_matrix(
                neighbors_labeldist.sum(axis=0) / weights.sum())

            if node in train_ids_set:
                new_labeldist = (
                    preserve_coef * X[node_index] +
                    (1 - preserve_coef) * neighbors_labeldist).tolil()
            else:
                new_labeldist = neighbors_labeldist

            new_labeldist_normalized = None
            for label_slice in label_slices:
                start_index, end_index = label_slice
                slice = new_labeldist[0, start_index:end_index]
                if keep_topK > 0:
                    if keep_topK < 1:
                        keep_topK = min(1, int(keep_topK * slice.shape[1]))
                    sorted_indices = np.argsort(slice.toarray())
                    #topK_indices = sorted_indices[0, -keep_topK:]
                    zero_indices = sorted_indices[0, 0:-keep_topK]
                    slice[0, zero_indices] = 0
                slice = normalize(slice, norm='l1', axis=1, copy=False)
                if str(type(
                        new_labeldist_normalized)) == '<type \'NoneType\'>':
                    new_labeldist_normalized = slice
                else:
                    new_labeldist_normalized = sp.hstack(
                        [new_labeldist_normalized, slice])
            new_labeldist = new_labeldist_normalized
            X[node_index] = new_labeldist.tolil()
        iter_num += 1
        if iter_num == iterations:
            converged = True
    X = X.tocsr()
    return X
示例#58
0
import numpy as np
from scipy import sparse
from numpy.testing import (assert_array_almost_equal, assert_array_equal,
                           assert_equal)

from sklearn import datasets, svm, linear_model, base
from sklearn.datasets import make_classification, load_digits, make_blobs
from sklearn.svm.tests import test_svm
from sklearn.utils import ConvergenceWarning
from sklearn.utils.extmath import safe_sparse_dot
from sklearn.utils.testing import assert_warns, assert_raise_message

# test sample 1
X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
X_sp = sparse.lil_matrix(X)
Y = [1, 1, 1, 2, 2, 2]
T = np.array([[-1, -1], [2, 2], [3, 2]])
true_result = [1, 2, 2]

# test sample 2
X2 = np.array([[0, 0, 0], [1, 1, 1], [2, 0, 0, ],
               [0, 0, 2], [3, 3, 3]])
X2_sp = sparse.dok_matrix(X2)
Y2 = [1, 2, 2, 2, 3]
T2 = np.array([[-1, -1, -1], [1, 1, 1], [2, 2, 2]])
true_result2 = [1, 2, 3]


iris = datasets.load_iris()
# permute
示例#59
0

#------------------------------ENCODING-------------------------------------
#seqList = ['abcdab', 'abcdef']
def findIndexOfFeature(subStr, startIndexOfCurSize=0):
    for index in xrange(startIndexOfCurSize, len(listSubString)):
        if listSubString[index] == subStr:
            return index
    return -1


sizeOfSubstr = (2, 3, 4)
numOfSeq = len(seqList)
listSubString = []  #list of substrings or list of feature
#encodingMat= np.zeros((0,numOfSeq), dtype = 'uint8') #dòng là feature(substr), cột là seq
encodingMat = sparse.lil_matrix((0, numOfSeq), dtype='uint8')

for sizeOfSub in sizeOfSubstr:  #số lượng kí tự của substr
    curNumOfFeature = len(listSubString)
    startIndexOfCurSize = curNumOfFeature

    for indexOfCurSeq in xrange(0, numOfSeq):  #duyệt từng sequence
        seq = seqList[indexOfCurSeq]
        sizeOfSeq = len(seq)

        for index in xrange(0, sizeOfSeq - sizeOfSub + 1):  # duyệt substr
            curSubStr = seq[index:index + sizeOfSub]
            #chỉ tìm trong các sub string có cùng số lượng kí tự nên có startIndexOfCurSize
            foundIndexOfFeature = findIndexOfFeature(curSubStr,
                                                     startIndexOfCurSize)
            if foundIndexOfFeature != -1:
示例#60
0
mesh = cfm.GmshMeshGenerator(g)
mesh.el_size_factor = el_size_factor
mesh.el_type = el_type
mesh.dofs_per_node = dofs_per_node

# Mesh the geometry:
#  The first four return values are the same as those that trimesh2d() returns.
#  value elementmarkers is a list of markers, and is used for finding the
#  marker of a given element (index).

coords, edof, dofs, bdofs, elementmarkers = mesh.create()

# ---- Solve problem --------------------------------------------------------

nDofs = np.size(dofs)
K = lil_matrix((nDofs, nDofs))
ex, ey = cfc.coordxtr(edof, coords, dofs)

cfu.info("Assembling K... (" + str(nDofs) + ")")

for eltopo, elx, ely, elMarker in zip(edof, ex, ey, elementmarkers):

    if el_type == 2:
        Ke = cfc.plante(elx, ely, elprop[elMarker][0], elprop[elMarker][1])
    else:
        Ke = cfc.planqe(elx, ely, elprop[elMarker][0], elprop[elMarker][1])

    cfc.assem(eltopo, K, Ke)

cfu.info("Applying bc and loads...")