示例#1
0
文件: transform.py 项目: jdhenke/uap
def createSparseMatrix(assertions, path, use_left_features = True):

  def _get_matrix_cells(assertion):
    concept1, relation, concept2 = assertion
    value1 = float(1)
    row1 = concept1
    col1 = ('right', relation, concept2)
    yield value1, row1, col1
    if use_left_features:
      value2 = float(1)
      row2 = concept2
      col2 = ('left', relation, concept1)
      yield value2, row2, col2

  values, rows, cols = [], [], []
  for assertion in assertions:
    for value, row, col in _get_matrix_cells(assertion):
      values.append(value)
      rows.append(row)
      cols.append(col)
  row_labels = set(rows)
  col_labels = set(cols)
  sparseMatrix = SparseMatrix((len(row_labels), len(col_labels)), row_labels=row_labels, col_labels=col_labels)
  assert len(values) == len(rows) and len(rows) == len(cols)
  for i in xrange(len(values)):
    value, row, col = values[i], rows[i], cols[i]
    # TODO: more explicit handling of multiple entries for same cell
    sparseMatrix.set_entry_named(row, col, value)
  divisi2.save(sparseMatrix, path)
示例#2
0
    def create(self,
               data,
               row_labels=None,
               col_labels=None,
               foldin=False,
               truncate=False):
        #is_row is what I'm originally folding in
        self._values = map(itemgetter(0), data)
        self._rows = map(itemgetter(1), data)
        self._cols = map(itemgetter(2), data)

        if foldin:  #new to make sure not folding in user and item at same time
            #idea: create matrix normally but keep track of the columns (items) or rows to be folded in before doing update
            if col_labels:  #if col_labels defined then I'm folding in a row
                self._additional_elements = [
                    x for x in self._cols if x not in col_labels
                ]
            else:  #else I am folding in a column
                self._additional_elements = [
                    x for x in self._rows if x not in row_labels
                ]
            if truncate:
                for item in self._additional_elements:
                    if col_labels:
                        index_remove = self._cols.index(item)
                    else:
                        index_remove = self._rows.index(item)
                    del self._values[index_remove]
                    del self._rows[index_remove]
                    del self._cols[index_remove]

        self._matrix = divisiSparseMatrix.from_named_lists(
            self._values, self._rows, self._cols, row_labels, col_labels)
示例#3
0
def blend(mats, factors=None, symmetric=False, post_weights=None):
    """
    Combine multiple labeled matrices into one, with weighted data from
    all the matrices.

    mats: a list of matrices to blend.
    factors: List of scaling factor for each matrix.
      If None, the reciprocal of the first singular value is used.
    post_weights: List of weights to apply to each scaled matrix.
      You can use this to, for example, say that one matrix is twice as
      important as another. If None, no post-weighting is performed.
    symmetric: Use square_from_named_lists.
    """
    assert len(mats) > 0
    if len(mats) == 1:
        if factors is None: return mats[0]
        else: return mats[0] * factors[0]

    b_values = []
    b_row_labels = []
    b_col_labels = []

    if factors is None:
        factors = [blend_factor(mat) for mat in mats]

    if post_weights is not None:
        factors = [
            factor * post_weight
            for factor, post_weight in zip(factors, post_weights)
        ]

    for mat, factor in zip(mats, factors):
        # FIXME: using bare find(), multiplying in numpy form, and
        # translating the labels manually would be a bit faster
        values, row_labels, col_labels = mat.named_lists()
        b_values.extend([v * factor for v in values])
        b_row_labels.extend(row_labels)
        b_col_labels.extend(col_labels)

    if symmetric:
        return SparseMatrix.square_from_named_lists(b_values, b_row_labels,
                                                    b_col_labels)
    else:
        return SparseMatrix.from_named_lists(b_values, b_row_labels,
                                             b_col_labels)
示例#4
0
def blend(mats, factors=None, symmetric=False, post_weights=None):
    """
    Combine multiple labeled matrices into one, with weighted data from
    all the matrices.

    mats: a list of matrices to blend.
    factors: List of scaling factor for each matrix.
      If None, the reciprocal of the first singular value is used.
    post_weights: List of weights to apply to each scaled matrix.
      You can use this to, for example, say that one matrix is twice as
      important as another. If None, no post-weighting is performed.
    symmetric: Use square_from_named_lists.
    """
    assert len(mats) > 0
    if len(mats) == 1:
        if factors is None: return mats[0]
        else: return mats[0] * factors[0]
    
    b_values = []
    b_row_labels = []
    b_col_labels = []
    
    if factors is None:
        factors = [blend_factor(mat) for mat in mats]

    if post_weights is not None:
        factors = [factor*post_weight for factor, post_weight in zip(factors, post_weights)]
    
    for mat, factor in zip(mats, factors):
        # FIXME: using bare find(), multiplying in numpy form, and
        # translating the labels manually would be a bit faster
        values, row_labels, col_labels = mat.named_lists()
        b_values.extend([v*factor for v in values])
        b_row_labels.extend(row_labels)
        b_col_labels.extend(col_labels)
    
    if symmetric:
        return SparseMatrix.square_from_named_lists(b_values, b_row_labels, b_col_labels)
    else:
        return SparseMatrix.from_named_lists(b_values, b_row_labels, b_col_labels)
示例#5
0
    def update(
        self,
        matrix,
        is_batch=False
    ):  #isbatch is for creating the final sparse matrix ,since you will want to collect all then construct final matrix at end
        #To update the stored data matrix with the new values and create a new divisi spare matrix with it to retain the zeroes
        self._values.extend(matrix._values)
        self._rows.extend(matrix._rows)
        self._cols.extend(matrix._cols)

        if not is_batch:
            self._matrix = divisiSparseMatrix.from_named_lists(
                self._values, self._rows, self._cols)
示例#6
0
 def index_sparseMatrix(
         self
 ):  #create the divisi2 sparse matrix from already existing values
     self._matrix = divisiSparseMatrix.from_named_lists(
         self._values, self._rows, self._cols)
示例#7
0
文件: svd.py 项目: jianminsun/cs156b
                mat[j, k] = divisi2.dot(user_mat[i,:], movie_mat[j,:])
    print "Learning process complete."
    start_time = time.time()
    predictions = divisi2.reconstruct(user_mat, axis_weights, movie_mat)
    print "Matrix reconstruction (elapsed time: %f s)." % (time.time() - start_time)    
    return predictions

def predict(mat):
    f_testing = open(TESTING_FILENAME, 'r')
    f_out = open(OUTPUT_FILENAME, 'w')
    print "Making %d predictions..." % NUM_TESTING
    start_time = time.time()
    i = 0
    j = 0
    for line in f_testing:
        user, movie, date = line.strip().split()
        f_out.write(str(mat.entry_named(int(user), int(movie))) + '\n')
        i += 1
        if i % (NUM_TESTING / INCR) == 0:
            j += 100.0 / INCR
            sys.stdout.write("\r%.1f%% done (elapsed time: %f s)." % (j, time.time() - start_time))
            sys.stdout.flush()
    f_testing.close()
    print "Predictions complete (elapsed time: %f s)." % (time.time() - start_time)
    f_out.close()

if __name__=='__main__':
    training_mat = SparseMatrix((NUM_USERS, NUM_MOVIES), range(1,NUM_USERS+1), range(1,NUM_MOVIES+1))
    add_data_to_matrix(training_mat)
    predictions = learn_iter(training_mat)
    predict(predictions)
示例#8
0
 def create(self, data):
     values = map(itemgetter(0), data)
     rows = map(itemgetter(1), data)
     cols = map(itemgetter(2), data)
     self._matrix = divisiSparseMatrix.from_named_lists(values, rows, cols)
示例#9
0
from divisi2.sparse import SparseMatrix
from divisi2.reconstructed import ReconstructedMatrix
from divisi2.operators import dot
import numpy as np

mat_4x3 = SparseMatrix.from_named_entries([
    (2, "apple", "red"),
    (2, "orange", "orange"),
    (1, "apple", "green"),
    (1, "celery", "green"),
    (-1, "apple", "orange"),
    (-1, "banana", "orange")
])

def test_incremental_svd():
    U_sparse, S_sparse, V_sparse = mat_4x3.svd(2)
    rec = dot(U_sparse * S_sparse, V_sparse.T)
    rec2 = ReconstructedMatrix.make_random(mat_4x3.row_labels,
                                           mat_4x3.col_labels,
                                           2,
                                           learning_rate = 0.01)
    for iter in xrange(1000):
        for row in xrange(4):
            for col in xrange(3):
                rec2.hebbian_step(row, col, mat_4x3[row, col])
        print np.linalg.norm(rec2.to_dense() - rec)
    dense = rec2.to_dense()
    assert rec.same_labels_as(rec2)
    assert np.linalg.norm(rec2.to_dense() - rec) < 0.1
示例#10
0
 def create(self, data):
     values = map(itemgetter(0), data)
     rows = map(itemgetter(1), data)
     cols = map(itemgetter(2), data)
     self._matrix = divisiSparseMatrix.from_named_lists(values, rows, cols)