示例#1
0
文件: transform.py 项目: jdhenke/uap
def createSparseMatrix(assertions, path, use_left_features = True):

  def _get_matrix_cells(assertion):
    concept1, relation, concept2 = assertion
    value1 = float(1)
    row1 = concept1
    col1 = ('right', relation, concept2)
    yield value1, row1, col1
    if use_left_features:
      value2 = float(1)
      row2 = concept2
      col2 = ('left', relation, concept1)
      yield value2, row2, col2

  values, rows, cols = [], [], []
  for assertion in assertions:
    for value, row, col in _get_matrix_cells(assertion):
      values.append(value)
      rows.append(row)
      cols.append(col)
  row_labels = set(rows)
  col_labels = set(cols)
  sparseMatrix = SparseMatrix((len(row_labels), len(col_labels)), row_labels=row_labels, col_labels=col_labels)
  assert len(values) == len(rows) and len(rows) == len(cols)
  for i in xrange(len(values)):
    value, row, col = values[i], rows[i], cols[i]
    # TODO: more explicit handling of multiple entries for same cell
    sparseMatrix.set_entry_named(row, col, value)
  divisi2.save(sparseMatrix, path)
def buildMatrix(csv_filename, pickle_name, languages=['en'], cutoff=2):
    csv_file = open(csv_filename, 'rb')
    asserts_dict = extractAsserts(csv_file, languages)
    csv_file.close()
    triples = formTriples(asserts_dict)
    matrix = divisi2.make_sparse(triples)
    matrix = matrix.squish(cutoff)
    divisi2.save(matrix, pickle_name)
示例#3
0
def conceptnet_assoc(lang):
    import divisi2
    try:
        matrix = divisi2.load('data:matrices/conceptnet_assoc_%s.gz' % lang)
        return matrix
    except IOError:
        graph = divisi2.load('data:graphs/conceptnet_%s.graph.gz' % lang)
        matrix = sparse_matrix(graph, 'concepts', 'concepts', 3)
        divisi2.save(matrix, 'data:matrices/conceptnet_assoc_%s.gz' % lang)
        return matrix
示例#4
0
def conceptnet_assoc(lang):
    import divisi2
    try:
        matrix = divisi2.load('data:matrices/conceptnet_assoc_%s.gz' % lang)
        return matrix
    except IOError:
        graph = divisi2.load('data:graphs/conceptnet_%s.graph.gz' % lang)
        matrix = sparse_matrix(graph, 'concepts', 'concepts', 3)
        divisi2.save(matrix, 'data:matrices/conceptnet_assoc_%s.gz' % lang)
        return matrix
示例#5
0
def conceptnet_matrix(lang):
    # load from the included pickle file
    import divisi2
    try:
        matrix = divisi2.load('data:matrices/conceptnet_%s.gz' % lang)
        return matrix
    except IOError:
        graph = divisi2.load('data:graphs/conceptnet_%s.graph.gz' % lang)
        matrix = sparse_matrix(graph, 'concepts', 'features', 3)
        divisi2.save(matrix, 'data:matrices/conceptnet_%s.gz' % lang)
        return matrix
示例#6
0
def conceptnet_matrix(lang):
    # load from the included pickle file
    import divisi2
    try:
        matrix = divisi2.load('data:matrices/conceptnet_%s.gz' % lang)
        return matrix
    except IOError:
        graph = divisi2.load('data:graphs/conceptnet_%s.graph.gz' % lang)
        matrix = sparse_matrix(graph, 'concepts', 'features', 3)
        divisi2.save(matrix, 'data:matrices/conceptnet_%s.gz' % lang)
        return matrix
示例#7
0
def conceptnet5_assoc():
    import divisi2
    try:
        matrix = divisi2.load('data:matrices/conceptnet_assoc_5.1.1.gz')
        return matrix
    except IOError:
        filename = divisi2.fileIO.data_filename('data:graphs/conceptnet-5.1.1-sparse-links.csv')
        triples = divisi2.dataset.conceptnet5_links(filename)
        matrix = divisi2.SparseMatrix.from_named_entries(triples)
        divisi2.save(matrix, 'data:matrices/conceptnet_assoc_5.1.1.gz')
    return matrix
示例#8
0
文件: model.py 项目: blazarus/charmme
    def update_doc_matrix(self, study_name='all'):
        """
        Collect the documents in a particular study, and make a dense matrix
        from them representing their positions in this semantic space.

        FIXME: this filename may conflict with other things like 'tags'.
        """
        docs = self.docs_in_study(study_name)
        npmat = np.zeros((len(docs), self.config['num_axes']))
        dmat = divisi2.DenseMatrix(npmat, row_labels=docs)
        for docid in docs:
            row = dmat.row_index(docid)
            dmat[row] = self.vector_from_document(docid)
        divisi2.save(dmat, self.filename_in_dir(study_name+'.dmat'))
示例#9
0
文件: model.py 项目: blazarus/charmme
 def update_tag_matrix(self):
     """
     Collect the tags in a particular study, and make a dense matrix
     from them representing their average positions in this semantic space.
     """
     all_tags = self.database.all_tags()
     npmat = np.zeros((len(all_tags), self.config['num_axes']))
     dmat = divisi2.DenseMatrix(npmat, row_labels=all_tags)
     for key, value in all_tags:
         row = dmat.row_index((key, value))
         ndocs = 0
         for docid in self.database.documents_with_tag_value(key, value):
             dmat[row] += self.vector_from_document(docid)
             ndocs += 1
         if ndocs > 0:
             dmat[row] /= ndocs
     divisi2.save(dmat, self.filename_in_dir('tags.dmat'))
     self._tag_matrix = dmat
     return dmat
示例#10
0
from conceptnet.models import *
import divisi2
import os
import codecs

sparse_pieces = []
for filename in os.listdir('.'):
    if filename.startswith('conceptnet_zh_'):
        for line in codecs.open(filename, encoding='utf-8', errors='replace'):
            line = line.strip()
            if line:
                parts = line.split(', ')
                user, frame_id, concept1, concept2 = parts
                relation = Frame.objects.get(id=int(frame_id)).relation
                left_feature = u"%s\\%s" % (concept1, relation)
                right_feature = u"%s/%s" % (relation, concept2)

                sparse_pieces.append((1, concept1, right_feature))
                sparse_pieces.append((1, concept2, left_feature))

matrix = divisi2.make_sparse(sparse_pieces)
divisi2.save(matrix, 'feature_matrix_zh.smat')