Exemplo n.º 1
0
    def train(self, train_samples, train_labels, **kwargs):
        checks.check_equality(len(train_samples),
                              len(train_labels),
                              message="Samples and labels have different "
                              "sizes")

        self.most_popular_goods = kwargs["most_popular_goods"]
        self.most_popular_good_ids = kwargs["most_popular_good_ids"]

        checks.check_value(len(self.most_popular_good_ids),
                           lower=self.num_popular_ids,
                           strict_less=False,
                           var_name="most_popular_good_ids")

        self.max_good_id = kwargs["max_good_id"]

        # Get person ids from train samples, samples format:
        # [[person_id, month, day], [person_id, month, day], ...].
        persons_ids = [person_data[0] for person_data in train_samples]
        for persons_id, label in zip(persons_ids, train_labels):
            if self.orders.get(persons_id) is None:
                self.orders[persons_id] = np.array(label)
            else:
                self.orders[persons_id] += np.array(label)

        self.process_orders()
Exemplo n.º 2
0
    def __init__(self, num_popular_ids):
        super().__init__()
        self.num_popular_ids = num_popular_ids

        checks.check_types(self.num_popular_ids,
                           int,
                           var_name="num_popular_ids")
        checks.check_value(self.num_popular_ids,
                           0,
                           100,
                           True,
                           False,
                           var_name="num_popular_ids")

        self.orders = dict()
        self.most_popular_goods = dict()
        self.most_popular_good_ids = list()
        self.max_good_id = 0
    def __init__(self,
                 proportion=0.7,
                 raw_date=True,
                 n_rows=None,
                 num_popular_ids=5,
                 debug=False):
        self._train_samples_num = 0
        self._list_of_instances = []
        self._list_of_labels = []
        self._list_of_samples = []
        self._help_data = dict()
        self._chknums = list()
        self._most_popular_good_ids = list()
        self._answers_for_train = list()

        self._proportion = proportion
        checks.check_types(self._proportion, float, var_name="proportion")
        checks.check_value(self._proportion,
                           0.0,
                           1.0,
                           True,
                           False,
                           var_name="proportion")

        self._raw_date = raw_date
        checks.check_types(self._raw_date, bool, var_name="raw_date")

        self._n_rows = n_rows
        checks.check_types(self._n_rows, type(None), int, var_name="n_rows")
        if self._n_rows is not None:
            checks.check_value(self._n_rows, 0, None, var_name="n_rows")

        self._num_popular_ids = num_popular_ids
        checks.check_types(self._num_popular_ids,
                           int,
                           var_name="num_popular_ids")

        self._debug = debug
        checks.check_types(self._debug, bool, var_name="debug")