Exemplo n.º 1
0
    def divide_dataset(self):
        tp = ReadFile(self.dataset, space_type=self.space_type)
        tp.split_dataset()

        for fold in xrange(self.n_folds):
            dict_feedback = list()
            tp.triple_dataset = list(set(tp.triple_dataset))
            random.shuffle(tp.triple_dataset)
            sp = int((1-self.test_ratio) * len(tp.triple_dataset))
            train = tp.triple_dataset[:sp]
            test = tp.triple_dataset[sp:]
            train.sort()
            test.sort(key=lambda x: x[0])
            train_set = list()
            test_set = list()

            for i, feedback in enumerate(self.dataset):
                dict_individual = dict()

                for triple in train:
                    try:
                        dict_individual.setdefault(triple[0], {}).update(
                            {triple[1]: tp.individual_interaction[i][triple[0]][triple[1]]})
                        train_set.append([triple[0], triple[1], tp.individual_interaction[i][triple[0]][triple[1]]])
                    except KeyError:
                        pass

                for triple_test in test:
                    try:
                        test_set.append([triple_test[0], triple_test[1],
                                         tp.individual_interaction[i][triple_test[0]][triple_test[1]]])
                    except KeyError:
                        pass

                dict_feedback.append(dict_individual)

            self.dict_feedback_folds[fold] = dict_feedback
            self.dict_folds[fold] = {'train': train_set, 'test': test_set}