示例#1
0
    def save_sparse_matrix(self, sm, index, created=datetime.now()):
        start_time = datetime.now()
        Similarity.objects.all().delete()
        sims = []
        no_saved = 0

        for i in sm.itertuples():
            for j in range(1, len(i)):
                row = i[0]
                col = sm.columns[j - 1]
                sim = i[j]
                if sim > self.min_sim:

                    if len(sims) == 1000:
                        Similarity.objects.bulk_create(sims)
                        sims = []

                    if row != col:
                        new_similarity = Similarity(
                            created=created,
                            source=row,
                            target=col,
                            similarity=decimal.Decimal(str(sim))
                        )
                        no_saved +=1
                        sims.append(new_similarity)

        Similarity.objects.bulk_create(sims)
        print('{} Similarity items saved, done in {} seconds'.format(no_saved, datetime.now() - start_time))
示例#2
0
    def save_sparse_matrix(self, sm, index, created=datetime.datetime.now()):

        print('saving similarities (number:{})'.format(sm.shape[0] *
                                                       sm.shape[1]))
        Similarity.objects.all().delete()
        sims = []

        rows, cols = self.interesting_indexes(sm, 0.2)
        for row, col in zip(rows, cols):

            if len(sims) == 1000:
                Similarity.objects.bulk_create(sims)
                sims = []

            if row != col:
                new_similarity = Similarity(created=created,
                                            source=index[row],
                                            target=index[col],
                                            similarity=decimal.Decimal(
                                                str(sm[row, col])))

                sims.append(new_similarity)

        Similarity.objects.bulk_create(sims)
        print('Similarity items saved')
def save_cf(similarites):

    for sim in similarites:
        print(sim)
        print(sim[3])

        Similarity(
            created=sim[0],
            source=sim[1],
            target=sim[2],
            similarity=decimal.Decimal(str(sim[3]))
        ).save()
    def _save_with_django(self, sm, index,
                          created=datetime.now()):  ##  cor, places
        start_time = datetime.now()
        Similarity.objects.all().delete()
        logger.info(
            f'truncating table in {datetime.now() - start_time} seconds')
        sims = []
        no_saved = 0
        start_time = datetime.now()
        coo = coo_matrix(sm)
        csr = coo.tocsr()

        logger.debug(
            f'instantiation of coo_matrix in {datetime.now() - start_time} seconds'
        )
        logger.debug(f'{coo.count_nonzero()} similarities to save')
        xs, ys = coo.nonzero()
        for x, y in zip(xs, ys):

            if x == y:
                continue

            sim = csr[x, y]

            if sim < self.min_sim:
                continue

            if len(sims) == 500000:

                Similarity.objects.bulk_create(sims)
                sims = []
                logger.debug(
                    f"{no_saved} saved in {datetime.now() - start_time}")

            new_similarity = Similarity(source=index[x],
                                        target=index[y],
                                        created=created,
                                        similarity=sim)
            no_saved += 1
            sims.append(new_similarity)

        Similarity.objects.bulk_create(sims)
        logger.info('{} Similarity items saved, done in {} seconds'.format(
            no_saved,
            datetime.now() - start_time))
示例#5
0
    def save_cf(self, df, created=datetime.datetime.now()):
        print("Save item-item model")

        Similarity.objects.all().delete()
        sims = []
        inx = 0
        for row in df.iterrows():
            if inx == 100:
                Similarity.objects.bulk_create(sims)
                sims = []

            else:
                inx += 1

            new_similarity = Similarity(created=created,
                                        source=int(row[0]),
                                        target=int(row[1].values[0]),
                                        similarity=decimal.Decimal(
                                            str(row[1].values[1])))
            if not new_similarity.target == new_similarity.source and new_similarity.similarity > 0:
                sims.append(new_similarity)
        Similarity.objects.bulk_create(sims)
        print('Similarity items saved')