def createSparseMatrix(assertions, path, use_left_features = True): def _get_matrix_cells(assertion): concept1, relation, concept2 = assertion value1 = float(1) row1 = concept1 col1 = ('right', relation, concept2) yield value1, row1, col1 if use_left_features: value2 = float(1) row2 = concept2 col2 = ('left', relation, concept1) yield value2, row2, col2 values, rows, cols = [], [], [] for assertion in assertions: for value, row, col in _get_matrix_cells(assertion): values.append(value) rows.append(row) cols.append(col) row_labels = set(rows) col_labels = set(cols) sparseMatrix = SparseMatrix((len(row_labels), len(col_labels)), row_labels=row_labels, col_labels=col_labels) assert len(values) == len(rows) and len(rows) == len(cols) for i in xrange(len(values)): value, row, col = values[i], rows[i], cols[i] # TODO: more explicit handling of multiple entries for same cell sparseMatrix.set_entry_named(row, col, value) divisi2.save(sparseMatrix, path)
def buildMatrix(csv_filename, pickle_name, languages=['en'], cutoff=2): csv_file = open(csv_filename, 'rb') asserts_dict = extractAsserts(csv_file, languages) csv_file.close() triples = formTriples(asserts_dict) matrix = divisi2.make_sparse(triples) matrix = matrix.squish(cutoff) divisi2.save(matrix, pickle_name)
def conceptnet_assoc(lang): import divisi2 try: matrix = divisi2.load('data:matrices/conceptnet_assoc_%s.gz' % lang) return matrix except IOError: graph = divisi2.load('data:graphs/conceptnet_%s.graph.gz' % lang) matrix = sparse_matrix(graph, 'concepts', 'concepts', 3) divisi2.save(matrix, 'data:matrices/conceptnet_assoc_%s.gz' % lang) return matrix
def conceptnet_matrix(lang): # load from the included pickle file import divisi2 try: matrix = divisi2.load('data:matrices/conceptnet_%s.gz' % lang) return matrix except IOError: graph = divisi2.load('data:graphs/conceptnet_%s.graph.gz' % lang) matrix = sparse_matrix(graph, 'concepts', 'features', 3) divisi2.save(matrix, 'data:matrices/conceptnet_%s.gz' % lang) return matrix
def conceptnet5_assoc(): import divisi2 try: matrix = divisi2.load('data:matrices/conceptnet_assoc_5.1.1.gz') return matrix except IOError: filename = divisi2.fileIO.data_filename('data:graphs/conceptnet-5.1.1-sparse-links.csv') triples = divisi2.dataset.conceptnet5_links(filename) matrix = divisi2.SparseMatrix.from_named_entries(triples) divisi2.save(matrix, 'data:matrices/conceptnet_assoc_5.1.1.gz') return matrix
def update_doc_matrix(self, study_name='all'): """ Collect the documents in a particular study, and make a dense matrix from them representing their positions in this semantic space. FIXME: this filename may conflict with other things like 'tags'. """ docs = self.docs_in_study(study_name) npmat = np.zeros((len(docs), self.config['num_axes'])) dmat = divisi2.DenseMatrix(npmat, row_labels=docs) for docid in docs: row = dmat.row_index(docid) dmat[row] = self.vector_from_document(docid) divisi2.save(dmat, self.filename_in_dir(study_name+'.dmat'))
def update_tag_matrix(self): """ Collect the tags in a particular study, and make a dense matrix from them representing their average positions in this semantic space. """ all_tags = self.database.all_tags() npmat = np.zeros((len(all_tags), self.config['num_axes'])) dmat = divisi2.DenseMatrix(npmat, row_labels=all_tags) for key, value in all_tags: row = dmat.row_index((key, value)) ndocs = 0 for docid in self.database.documents_with_tag_value(key, value): dmat[row] += self.vector_from_document(docid) ndocs += 1 if ndocs > 0: dmat[row] /= ndocs divisi2.save(dmat, self.filename_in_dir('tags.dmat')) self._tag_matrix = dmat return dmat
from conceptnet.models import * import divisi2 import os import codecs sparse_pieces = [] for filename in os.listdir('.'): if filename.startswith('conceptnet_zh_'): for line in codecs.open(filename, encoding='utf-8', errors='replace'): line = line.strip() if line: parts = line.split(', ') user, frame_id, concept1, concept2 = parts relation = Frame.objects.get(id=int(frame_id)).relation left_feature = u"%s\\%s" % (concept1, relation) right_feature = u"%s/%s" % (relation, concept2) sparse_pieces.append((1, concept1, right_feature)) sparse_pieces.append((1, concept2, left_feature)) matrix = divisi2.make_sparse(sparse_pieces) divisi2.save(matrix, 'feature_matrix_zh.smat')