def save_sparse_matrix(self, sm, index, created=datetime.now()): start_time = datetime.now() Similarity.objects.all().delete() sims = [] no_saved = 0 for i in sm.itertuples(): for j in range(1, len(i)): row = i[0] col = sm.columns[j - 1] sim = i[j] if sim > self.min_sim: if len(sims) == 1000: Similarity.objects.bulk_create(sims) sims = [] if row != col: new_similarity = Similarity( created=created, source=row, target=col, similarity=decimal.Decimal(str(sim)) ) no_saved +=1 sims.append(new_similarity) Similarity.objects.bulk_create(sims) print('{} Similarity items saved, done in {} seconds'.format(no_saved, datetime.now() - start_time))
def save_sparse_matrix(self, sm, index, created=datetime.datetime.now()): print('saving similarities (number:{})'.format(sm.shape[0] * sm.shape[1])) Similarity.objects.all().delete() sims = [] rows, cols = self.interesting_indexes(sm, 0.2) for row, col in zip(rows, cols): if len(sims) == 1000: Similarity.objects.bulk_create(sims) sims = [] if row != col: new_similarity = Similarity(created=created, source=index[row], target=index[col], similarity=decimal.Decimal( str(sm[row, col]))) sims.append(new_similarity) Similarity.objects.bulk_create(sims) print('Similarity items saved')
def save_cf(similarites): for sim in similarites: print(sim) print(sim[3]) Similarity( created=sim[0], source=sim[1], target=sim[2], similarity=decimal.Decimal(str(sim[3])) ).save()
def _save_with_django(self, sm, index, created=datetime.now()): ## cor, places start_time = datetime.now() Similarity.objects.all().delete() logger.info( f'truncating table in {datetime.now() - start_time} seconds') sims = [] no_saved = 0 start_time = datetime.now() coo = coo_matrix(sm) csr = coo.tocsr() logger.debug( f'instantiation of coo_matrix in {datetime.now() - start_time} seconds' ) logger.debug(f'{coo.count_nonzero()} similarities to save') xs, ys = coo.nonzero() for x, y in zip(xs, ys): if x == y: continue sim = csr[x, y] if sim < self.min_sim: continue if len(sims) == 500000: Similarity.objects.bulk_create(sims) sims = [] logger.debug( f"{no_saved} saved in {datetime.now() - start_time}") new_similarity = Similarity(source=index[x], target=index[y], created=created, similarity=sim) no_saved += 1 sims.append(new_similarity) Similarity.objects.bulk_create(sims) logger.info('{} Similarity items saved, done in {} seconds'.format( no_saved, datetime.now() - start_time))
def save_cf(self, df, created=datetime.datetime.now()): print("Save item-item model") Similarity.objects.all().delete() sims = [] inx = 0 for row in df.iterrows(): if inx == 100: Similarity.objects.bulk_create(sims) sims = [] else: inx += 1 new_similarity = Similarity(created=created, source=int(row[0]), target=int(row[1].values[0]), similarity=decimal.Decimal( str(row[1].values[1]))) if not new_similarity.target == new_similarity.source and new_similarity.similarity > 0: sims.append(new_similarity) Similarity.objects.bulk_create(sims) print('Similarity items saved')