def context_users_similarity(self): """Compute the similarity between users using context features""" filename = os.path.join(OUTPUT_PATH, "pickle", "context_users_features.pickle") if os.path.isfile(filename): with open(filename) as f: features = pickle.load(f) else: self._processor = ContextProcessor() features = [] # get all the features for each user for user, docs in self._processor.iterate(): features.append(self._processor.get_features(docs, user)) with open(filename, "w+") as f: pickle.dump(features, f) reduced_features = [] for doc in features: reduced_features.append(np.mean(doc, axis=1)) from ipdb import set_trace set_trace() # it is possible to cluster each user's documents # # for alexis, let's print the similarity matrix of his documents draw_matrix(euclidean_distances(features[0], features[0]), "context_alexis", OUTPUT_PATH)
def run_processors(self): args = {'draw_2d': True, 'draw_pie': True} processors = (TextProcessor(**args), ContextProcessor(**args)) for processor in processors: processor.run()