Exemplo n.º 1
0
    def apply_action(self):
        tgt = self.target
        g = Game.getgame()
        n = min(len([p for p in g.players if not p.dead]), 5)
        cards = g.deck.getcards(n)

        assert cards == g.deck.getcards(n)

        tgt.reveal(cards)
        rst = tgt.user_input('ran_prophet', cards, timeout=40)
        if not rst: return False
        try:
            check_type([[int, Ellipsis]]*2, rst)
            upcards = rst[0]
            downcards = rst[1]
            check(sorted(upcards+downcards) == range(n))
        except CheckFailed as e:
            try:
                print 'RAN PROPHET:', upcards, downcards
            except:
                pass
            return act

        deck = g.deck.cards
        for i, j in enumerate(downcards):
            deck[i] = cards[j]
        deck.rotate(-len(downcards))
        for i, j in enumerate(upcards):
            deck[i] = cards[j]

        cl = [cards[i] for i in upcards]
        assert g.deck.getcards(len(upcards)) == cl

        return True
Exemplo n.º 2
0
def user_choose_cards(initiator, actor, categories, timeout=None, trans=None):
    check_type([str, Ellipsis], categories)

    _, rst = ask_for_action(initiator, [actor], categories, (), timeout=timeout, trans=trans)
    if not rst:
        return None

    return rst[0]  # cards
Exemplo n.º 3
0
def user_choose_cards(initiator, actor, categories):
    check_type([str, Ellipsis], categories)

    _, rst = ask_for_action(initiator, [actor], categories, [])
    if not rst:
        return None

    return rst[0]  # cards
Exemplo n.º 4
0
    def parse(self, data):
        n = self.num
        try:
            check(data)
            check_type([int] * n, data)
            check(set(data) == set(range(n)))
            return data

        except CheckFailed:
            return range(n)
Exemplo n.º 5
0
 def test_check_type__invalid(self):
     """with invalid value."""
     self.assertTrue(utils.check_type('loan_type', 11, params) is None)
     self.assertTrue(utils.check_type('rate_structure', 11, params) is None)
     self.assertTrue(utils.check_type('arm_type', 'String', params) is None)
     self.assertTrue(utils.check_type('loan_term', 'A Week', params) is None)
     self.assertTrue(utils.check_type('price', 'String', params) is None)
     self.assertTrue(utils.check_type('loan_amount', 'String', params) is None)
     self.assertTrue(utils.check_type('state', 'Virginia', params) is None)
     self.assertTrue(utils.check_type('fico', 'ABC', params) is None)
     self.assertTrue(utils.check_type('minfico', 'ABC', params) is None)
     self.assertTrue(utils.check_type('maxfico', 'ABC', params) is None)
Exemplo n.º 6
0
 def parse(self, i):
     m = self.mapping
     actor = self.actor
     try:
         check(actor in m)
         check_type(int, i)
         check(0 <= i < len(m[actor]))
         choice = m[actor][i]
         check(not choice.chosen)
         return choice
     except CheckFailed:
         return None
Exemplo n.º 7
0
    def parse(self, data):
        # data = [
        #     [skill_index1, ...],
        #     [card_syncid1, ...],
        #     [player_id1, ...],
        #     {'action_param1': 'AttackCard'},
        # ]

        actor = self.actor
        g = Game.getgame()
        categories = self.categories
        categories = [getattr(actor, i) for i in categories] if categories else None
        candidates = self.candidates

        skills = []
        cards = []
        players = []
        params = {}

        _ = Ellipsis
        try:
            check_type([[int, _]] * 3 + [dict], data)

            sid_list, cid_list, pid_list, params = data

            if candidates:
                check(candidates)
                pl = [g.player_fromid(i) for i in pid_list]
                check(all([p in candidates for p in pl]))
                players = pl

            if categories:
                cards = g.deck.lookupcards(cid_list)
                check(len(cards) == len(cid_list))  # Invalid id

                cs = set(cards)
                check(len(cs) == len(cid_list))  # repeated ids

                if sid_list:
                    assert actor.cards in categories or actor.showncards in categories
                    check(all(cat.owner is actor for cat in categories))
                    check(all(c.resides_in.owner is actor for c in cards))  # Cards belong to actor?
                    for skill_id in sid_list:
                        check(0 <= skill_id < len(actor.skills))
                    skills = [actor.skills[i] for i in sid_list]
                else:
                    check(all(c.resides_in in categories for c in cards))  # Cards in desired categories?

            return [skills, cards, players, params]

        except CheckFailed:
            return None
Exemplo n.º 8
0
def user_choose_players_logic(input, act, target, candidates):
    try:
        g = Game.getgame()
        check_type([[int, Ellipsis]] * 3, input)
        _, _, pids = input
        check(pids)
        pl = [g.player_fromid(i) for i in pids]
        from game import AbstractPlayer
        check(all(p in candidates for p in pl))
        pl, valid = act.choose_player_target(pl)
        check(valid)
        return pl
    except CheckFailed:
        return None
Exemplo n.º 9
0
    def parse(self, data):
        try:
            check_type([[int, Ellipsis]] * 2, data)
            upcards = data[0]
            downcards = data[1]
            check(sorted(upcards+downcards) == range(len(self.cards)))
        except CheckFailed:
            return [self.cards, []]

        cards = self.cards
        upcards = [cards[i] for i in upcards]
        downcards = [cards[i] for i in downcards]

        return [upcards, downcards]
Exemplo n.º 10
0
    def apply_action(self):
        g = Game.getgame()
        target = self.target
        if target.dead: return False

        shuffle_here()

        try:
            while not target.dead:
                g.emit_event('action_stage_action', target)
                input = target.user_input('action_stage_usecard')
                check_type([[int, Ellipsis]] * 3, input)

                skill_ids, card_ids, target_list = input

                if card_ids:
                    cards = g.deck.lookupcards(card_ids)
                    check(cards)
                    check(all(c.resides_in.owner is target for c in cards))
                else:
                    cards = []

                target_list = [g.player_fromid(i) for i in target_list]
                from game import AbstractPlayer
                check(all(isinstance(p, AbstractPlayer) for p in target_list))

                # skill selected
                if skill_ids:
                    card = skill_wrap(target, skill_ids, cards)
                    check(card)
                else:
                    check(len(cards) == 1)
                    g.players.exclude(target).reveal(cards)
                    card = cards[0]
                    from .cards import HiddenCard
                    assert not card.is_card(HiddenCard)
                    check(card.resides_in in (target.cards, target.showncards))
                if not g.process_action(ActionStageLaunchCard(target, target_list, card)):
                    # invalid input
                    log.debug('ActionStage: LaunchCard failed.')
                    break

                shuffle_here()

        except CheckFailed as e:
            pass

        return True
Exemplo n.º 11
0
    def parse(self, data):
        _ = Ellipsis
        try:
            check_type([[int, _]] * 2, data)
            putback = data[0]
            acquire = data[1]
            check(sorted(putback + acquire) == range(len(self.cards)))

            cards = self.cards
            putback = [cards[i] for i in putback]
            acquire = [cards[i] for i in acquire]

        except CheckFailed:
            return [self.cards, []]

        return [putback, acquire]
Exemplo n.º 12
0
    def tokens(self):
        """
        Breaks a call number into tokens, which are its atomic parts. A token
        will contain either letters or a number but not both.

        :return: a list of Token objects
        """
        tokens_list = []
        new_token = self.value[0]

        for i in range(len(self.value) - 1):
            c = self.value[i]
            d = self.value[i + 1]

            # d is part of token
            if check_type(c) == check_type(d):
                new_token += d

            # d is the beginning of a new token
            else:
                # Prevents adding a space as a token
                if check_type(new_token) != 2:
                    tokens_list.append(Token(new_token))

                if (check_type(c) == 0) and (check_type(d) == 1) and (i > 1):
                    """
                    In call number sorting rules, the first number should be
                    treated as a whole number and all following numbers should
                    be treated as decimals.

                    For example, M101 K78 would be M, 101, K, 0.78
                    """

                    d = '0.' + d

                new_token = d

        tokens_list.append(Token(new_token))
        return tokens_list
Exemplo n.º 13
0
def user_choose_cards_logic(input, act, target, categories=None):
    from utils import check, CheckFailed
    g = Game.getgame()

    try:
        check_type([[int, Ellipsis]] * 3, input)

        sid_list, cid_list, pid_list = input

        cards = g.deck.lookupcards(cid_list)
        check(len(cards) == len(cid_list)) # Invalid id

        cs = set(cards)
        check(len(cs) == len(cid_list)) # repeated ids

        if not categories:
            categories = [target.cards, target.showncards]

        if sid_list:
            check(all(cat.owner is target for cat in categories))
            check(all(c.resides_in.owner is target for c in cards)) # Cards belong to target?

            # associated_cards will be revealed here
            c = skill_wrap(target, sid_list, cards)
            check(c)
            cards = [c]
        else:
            check(all(c.resides_in in categories for c in cards)) # Cards in desired categories?
            if not getattr(act, 'no_reveal', False):
                g.players.exclude(target).reveal(cards)

        check(act.cond(cards))

        log.debug('user_choose_cards: %s %s %s', repr(act), target.__class__.__name__, repr(cards))
        return cards
    except CheckFailed as e:
        log.debug('user_choose_cards FAILED: %s %s', repr(act), target.__class__.__name__)
        return None
Exemplo n.º 14
0
    def fit(self, X, y, split_type: str = "extreme"):
        """Split multi-label y dataset into train and test subsets.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features).

        y : {array-like, sparse matrix} of shape (n_samples, n_labels).

        split_type : Splitting type of {naive, extreme, iterative}.

        Returns
        -------
        data partition : two lists of indices representing the resulted data split
        """

        if X is None:
            raise Exception("Please provide a dataset.")
        if y is None:
            raise Exception("Please provide labels for the dataset.")
        assert X.shape[0] == y.shape[0]

        check, X = check_type(X=X, return_list=False)
        if not check:
            tmp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data"
            raise Exception(tmp)
        check, y = check_type(X=y, return_list=False)
        if not check:
            tmp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data"
            raise Exception(tmp)

        num_examples, num_labels = y.shape

        # check whether data is singly labeled
        if num_labels == 1:
            # transform it to multi-label data
            classes = list(set([i[0] if i else 0 for i in y.data]))
            mlb = LabelBinarizer(labels=classes)
            y = mlb.transform(y)

        if not self.is_fit:
            desc = '\t>> Building Graph...'
            print(desc)
            # Construct graph
            if self.shuffle:
                sample_idx = custom_shuffle(num_examples=num_examples)
                X = X[sample_idx, :]
                y = y[sample_idx, :]
            P = lil_matrix(cosine_similarity(X=X))
            P = normalize_laplacian(A=P, sigma=self.sigma, return_adj=True, norm_adj=True)
            P = triu(P)
            D = y
            for epoch in range(self.num_epochs):
                D = self.alpha * P * D + (1 - self.alpha) * y
            idx = np.random.choice(a=list(range(num_examples)), size=self.num_subsamples, replace=True)
            self.community_labels = self.__graph_construction(D[idx])
        mlb = LabelBinarizer(labels=list(range(self.num_communities)))
        y = mlb.reassign_labels(y, mapping_labels=self.community_labels)
        self.is_fit = True

        # perform splitting
        if split_type == "extreme":
            st = ExtremeStratification(swap_probability=self.swap_probability,
                                       threshold_proportion=self.threshold_proportion, decay=self.decay,
                                       shuffle=self.shuffle, split_size=self.split_size,
                                       num_epochs=self.num_epochs, verbose=False)
            train_list, test_list = st.fit(X=X, y=y)
        elif split_type == "iterative":
            st = IterativeStratification(shuffle=self.shuffle, split_size=self.split_size, verbose=False)
            train_list, test_list = st.fit(y=y)
        else:
            st = NaiveStratification(shuffle=self.shuffle, split_size=self.split_size, batch_size=self.batch_size,
                                     num_jobs=self.num_jobs, verbose=False)
            train_list, test_list = st.fit(y=y)
        return train_list, test_list
Exemplo n.º 15
0
    def __init__(self, input=None, eps=.001, diff_order = 5, verbose=None):
        if not scipy_imported:
            raise ImportError, 'Scipy must be installed to use NormApprox and MAP.'

        Model.__init__(self, input, verbose=verbose)

        # Allocate memory for internal traces and get stochastic slices
        self._slices = {}
        self.len = 0
        self.stochastic_len = {}
        self.fitted = False

        self.stochastic_list = list(self.stochastics)
        self.N_stochastics = len(self.stochastic_list)
        self.stochastic_indices = []
        self.stochastic_types = []
        self.stochastic_type_dict = {}

        for i in xrange(len(self.stochastic_list)):

            stochastic = self.stochastic_list[i]

            # Check types of all stochastics.
            type_now = check_type(stochastic)[0]
            self.stochastic_type_dict[stochastic] = type_now

            if not type_now is float:
                print "Warning: Stochastic " + stochastic.__name__ + "'s value is neither numerical nor array with " + \
                            "floating-point dtype. Recommend fitting method fmin (default)."

            # Inspect shapes of all stochastics and create stochastic slices.
            if isinstance(stochastic.value, ndarray):
                self.stochastic_len[stochastic] = len(ravel(stochastic.value))
            else:
                self.stochastic_len[stochastic] = 1
            self._slices[stochastic] = slice(self.len, self.len + self.stochastic_len[stochastic])
            self.len += self.stochastic_len[stochastic]

            # Record indices that correspond to each stochastic.
            for j in range(len(ravel(stochastic.value))):
                self.stochastic_indices.append((stochastic, j))
                self.stochastic_types.append(type_now)

        self.data_len = 0
        for datum in self.observed_stochastics:
            self.data_len += len(ravel(datum.value))

        # Unpack step
        self.eps = zeros(self.len,dtype=float)
        if isinstance(eps,dict):
            for stochastic in self.stochastics:
                self.eps[self._slices[stochastic]] = eps[stochastic]
        else:
            self.eps[:] = eps

        self.diff_order = diff_order

        self._len_range = arange(self.len)

        # Initialize gradient and Hessian matrix.
        self.grad = zeros(self.len, dtype=float)
        self.hess = asmatrix(zeros((self.len, self.len), dtype=float))

        self._mu = None

        # Initialize NormApproxMu object.
        self.mu = NormApproxMu(self)

        def func_for_diff(val, index):
            """
            The function that gets passed to the derivatives.
            """
            self[index] = val
            return self.i_logp(index)

        self.func_for_diff = func_for_diff
Exemplo n.º 16
0
    def test_check_types(self):
        i = ['42', 'c', 'test', '42.0']
        self.assertEqual(check_types(self.func1.params, i), [42, 'c', 'test', 42.0])

        i = ['c', 'c', 'test', '42.0']
        self.assertEqual(check_types(self.func1.params, i), None)

        i = ['42', '42', 'test', '42.0']
        self.assertEqual(check_types(self.func1.params, i), None)

        i = ['42', 'c', 23, '42.0']
        self.assertEqual(check_types(self.func1.params, i), [42, 'c', 23, 42.0])

        i = ['42', 'c', 'test', 'test']
        self.assertEqual(check_types(self.func1.params, i), None)

        i = ['42.0', '1.2,3.4,5.6', '1,2,3,4', '"lorem","ipsum","dolor,sit"']
        exp_out = [42.0,
                   [1.2, 3.4, 5.6],
                   [1, 2, 3, 4],
                   ['lorem', 'ipsum', 'dolor,sit']]

        self.assertEqual(check_types(self.func2.params, i), exp_out)
        self.assertIsNone(check_type('float_array', '1.2a,2.3'))
        self.assertEqual(check_type('float_array', '12, 23.0'), [12.0, 23.0])
        self.assertIsNone(check_type('int_array', '1,2,3a'))
        self.assertIsNone(check_type('char_array', 'a,,b'))
        self.assertIsNone(check_type('string_array', '"asd",asd,"asd"'))
        self.assertEqual(check_type('string_array', r'"lorem\"ipsum","dolor"'),
                         ['lorem"ipsum', 'dolor'])
        self.assertEqual(check_type('string_array',
                                    '"lorem, ipsum","dolor","sit123","amet",'),
                         ['lorem, ipsum', 'dolor', 'sit123', 'amet'])
        self.assertIsNone(check_type('string_array', '"lorem",ipsum,"dolor"'))
        self.assertIsNone(check_type('string_array', '"lor"em,"ipsum"'))
        self.assertEqual(check_type('string_array', '"lorem ipsum"'),
                         ['lorem ipsum'])
        self.assertIsNone(check_type('string_array', 'lorem,ipsum'))
Exemplo n.º 17
0
 def delete_razred(self, razred):
     check_type(razred, Razred)
     self.session.delete(razred)
Exemplo n.º 18
0
 def test_check_type__empty(self):
     """with empty value."""
     result = utils.check_type('item_name', None, params)
     self.assertTrue(result is None)
Exemplo n.º 19
0
 def test_check_type__valid(self):
     """with valid value."""
     self.assertEqual(utils.check_type('loan_type', 'conf', params), 'conf')
     self.assertEqual(utils.check_type('rate_structure', 'Fixed', params), 'Fixed')
     self.assertEqual(utils.check_type('arm_type', '3-1', params), '3/1')
     self.assertEqual(utils.check_type('loan_term', '111', params), 111)
     self.assertEqual(utils.check_type('loan_term', 114, params), 114)
     self.assertEqual(utils.check_type('price', 20.10, params), 20.10)
     self.assertEqual(utils.check_type('price', '20.20', params), 20.20)
     self.assertEqual(utils.check_type('loan_amount', 19.99, params), 19.99)
     self.assertEqual(utils.check_type('loan_amount', '29.99', params), 29.99)
     self.assertEqual(utils.check_type('state', 'VA', params), 'VA')
     self.assertEqual(utils.check_type('state', 'va', params), 'VA')
     self.assertEqual(utils.check_type('state', 'Va', params), 'VA')
     self.assertEqual(utils.check_type('fico', 100, params), 100)
     self.assertEqual(utils.check_type('fico', '200', params), 200)
     self.assertEqual(utils.check_type('minfico', 300, params), 300)
     self.assertEqual(utils.check_type('minfico', '400', params), 400)
     self.assertEqual(utils.check_type('maxfico', 500, params), 500)
     self.assertEqual(utils.check_type('maxfico', '600', params), 600)
Exemplo n.º 20
0
 def build(cls, pred, use_lemma=True):
     check_type(pred, Predicate)
     word = pred.get_representation(use_lemma=use_lemma)
     return cls(word, pred.neg, pred.prt)
Exemplo n.º 21
0
 def __init__(self, pred_pointer, arguments):
     check_type(pred_pointer, RichTreePointer)
     self.pred_pointer = pred_pointer
     self.arguments = arguments
Exemplo n.º 22
0
 def add_razred(self, razred):
     check_type(razred, Razred)
     self.session.add(razred)
Exemplo n.º 23
0
 def get_mention(self, idx):
     assert 0 <= idx < self.num_mentions, \
         'Mention index {} out of range'.format(idx)
     result = self._mentions[idx]
     check_type(result, Mention)
     return result
Exemplo n.º 24
0
    def fit(self, X, y):
        """Split multi-label y dataset into train and test subsets.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_examples, n_features).

        y : {array-like, sparse matrix} of shape (n_examples, n_labels).

        Returns
        -------
        data partition : two lists of indices representing the resulted data split
        """

        if X is None:
            raise Exception("Please provide a dataset.")
        if y is None:
            raise Exception("Please provide labels for the dataset.")
        assert X.shape[0] == y.shape[0]

        check, X = check_type(X=X, return_list=False)
        if not check:
            temp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data"
            raise Exception(temp)
        check, y = check_type(X=y, return_list=False)
        if not check:
            temp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data"
            raise Exception(temp)

        # collect properties from data
        num_examples, num_features = X.shape
        self.num_labels = y.shape[1]

        # check whether data is singly labeled
        if self.num_labels == 1:
            # transform it to multi-label data
            classes = list(set([i[0] if i else 0 for i in y.data]))
            mlb = LabelBinarizer(labels=classes)
            y = mlb.transform(y)
            self.num_labels = y.shape[1]

        if not self.is_fit:
            print('\t>> Training to learn a model...')
            self.__init_variables(num_labels=self.num_labels,
                                  num_features=num_features)

            old_cost = np.inf
            optimal_init = self.__optimal_learning_rate(alpha=self.lr)
            n_epochs = self.num_epochs + 1
            timeref = time.time()

            for epoch in np.arange(start=1, stop=n_epochs):
                desc = '\t   {0:d})- Epoch count ({0:d}/{1:d})...'.format(
                    epoch, n_epochs - 1)
                print(desc)

                # shuffle dataset
                if epoch == 1:
                    example_idx = custom_shuffle(num_examples=num_examples)
                    example_idx = list(example_idx)
                    X = X[example_idx, :]
                    y = y[example_idx, :]
                else:
                    if self.calc_ads:
                        temp = [
                            s for s in range(num_examples)
                            if s not in example_idx
                        ]
                        sub_sampled_size = int(self.ads_percent * len(temp))
                        temp = list(
                            np.random.choice(a=temp,
                                             size=sub_sampled_size,
                                             replace=False))
                        example_idx.extend(temp)

                # usual optimization technique
                learning_rate = 1.0 / (self.lr * (optimal_init + epoch - 1))

                # set epoch time
                start_epoch = time.time()

                self.__parallel_backward(X=X,
                                         y=y,
                                         learning_rate=learning_rate,
                                         examples_idx=example_idx)
                prob = self.__parallel_forward(X=X,
                                               y=y,
                                               example_idx=example_idx)
                H = self.__predictive_uncertainty(prob=prob, y=y[example_idx])
                if self.calc_ads:
                    example_idx = self.__subsample_strategy(
                        H=H, num_examples=num_examples)
                    example_idx = list(example_idx)
                    H = H[example_idx]

                end_epoch = time.time()
                self.is_fit = True

                # Save models parameters based on test frequencies
                if (epoch %
                        self.display_interval) == 0 or epoch == n_epochs - 1:
                    # compute loss
                    new_cost = self.__parallel_cost(X=X[example_idx],
                                                    y=y[example_idx])
                    print('\t\t\t--> New cost: {0:.4f}; Old cost: {1:.4f}'.
                          format(new_cost, old_cost))
                    if old_cost >= new_cost or epoch == n_epochs - 1:
                        old_cost = new_cost
                print('\t\t\t--> Epoch {0} took {1} seconds...'.format(
                    epoch, round(end_epoch - start_epoch, 3)))
            print('\t  --> Training consumed %.2f mintues' % (round(
                (time.time() - timeref) / 60., 3)))
        else:
            print('\t>> Estimating examples scores...')
            example_idx = list(range(num_examples))
            prob = self.__parallel_forward(X=X, y=y, example_idx=example_idx)
            H = self.__predictive_uncertainty(prob=prob, y=y[example_idx])
            if self.calc_ads:
                example_idx = self.__subsample_strategy(
                    H=H, num_examples=num_examples)
                example_idx = list(example_idx)
                H = H[example_idx]

        X = X[example_idx]
        y = y[example_idx]
        example_idx = list(range(len(example_idx)))
        examples_scores = dict(list(zip(example_idx, H)))

        # perform calibrated splitting
        extreme = ExtremeStratification(
            swap_probability=self.swap_probability,
            threshold_proportion=self.threshold_proportion,
            decay=self.decay,
            shuffle=self.shuffle,
            split_size=self.split_size,
            num_epochs=self.num_epochs,
            verbose=False)
        train_list, test_list = extreme.fit(X=X,
                                            y=y,
                                            examples_scores=examples_scores)
        return train_list, test_list
Exemplo n.º 25
0
 def get_token(self, idx):
     assert 0 <= idx < self.num_tokens, \
         'Token idx {} out of range'.format(idx)
     result = self._tokens[idx]
     check_type(result, Token)
     return result
Exemplo n.º 26
0
 def add_dep(self, dep):
     check_type(dep, Dependency)
     self._deps.append(dep)
Exemplo n.º 27
0
 def set_embedding_model(self, embedding_model):
     check_type(embedding_model, Word2VecModel)
     self.logger.info('set embedding model: {}'.format(
         embedding_model.name))
     self.embedding_model = embedding_model
     self.embedding_model_name = embedding_model.name
Exemplo n.º 28
0
    def compute_coherence_score(self,
                                event_comp_model,
                                use_max_score=True,
                                missing_labels_mapping=None):
        assert len(self.all_rich_predicates) > 0

        if type(event_comp_model) == list:
            assert len(event_comp_model) == self.n_splits
            word2vec_model = event_comp_model[0].word2vec
        else:
            word2vec_model = event_comp_model.word2vec

        self.get_index(word2vec_model)
        context_input_list_mapping = \
            self.get_context_input_list_mapping(word2vec_model)

        exclude_pred_idx_list = []

        pbar = tqdm(total=len(self.all_rich_predicates),
                    desc='Processed',
                    ncols=100)

        for fold_idx in range(self.n_splits):
            for pred_idx in self.train_test_folds[fold_idx][1]:
                pbar.update(1)
                rich_predicate = self.all_rich_predicates[pred_idx]
                if len(rich_predicate.imp_args) == 0:
                    continue

                for imp_arg in rich_predicate.imp_args:
                    imp_arg.reset_coherence_score_list()

                if missing_labels_mapping is not None:
                    missing_labels = missing_labels_mapping[str(
                        self.all_predicates[pred_idx].pred_pointer)]
                else:
                    missing_labels = None

                if missing_labels is not None and len(missing_labels) == 0:
                    continue

                context_input_list = \
                    context_input_list_mapping[rich_predicate.fileid]
                num_context = len(context_input_list)

                if num_context == 0:
                    exclude_pred_idx_list.append(pred_idx)
                    continue

                if type(event_comp_model) == list:
                    pair_composition_network = \
                        event_comp_model[fold_idx].pair_composition_network
                else:
                    pair_composition_network = \
                        event_comp_model.pair_composition_network

                coherence_fn = pair_composition_network.coherence_fn
                use_salience = pair_composition_network.use_salience
                salience_features = pair_composition_network.salience_features

                pred_input_a = np.zeros(num_context, dtype=np.int32)
                subj_input_a = np.zeros(num_context, dtype=np.int32)
                obj_input_a = np.zeros(num_context, dtype=np.int32)
                pobj_input_a = np.zeros(num_context, dtype=np.int32)
                for context_idx, context_input in enumerate(
                        context_input_list):
                    check_type(context_input, IndexedEvent)
                    pred_input_a[context_idx] = context_input.pred_input
                    subj_input_a[context_idx] = context_input.subj_input
                    obj_input_a[context_idx] = context_input.obj_input
                    pobj_input_a[context_idx] = context_input.pobj_input

                eval_input_list_all = \
                    rich_predicate.get_eval_input_list_all(
                        include_salience=True, missing_labels=missing_labels)

                num_candidates = rich_predicate.num_candidates

                coherence_score_list_all = []

                for label, arg_idx, eval_input_list in eval_input_list_all:
                    coherence_score_list = []

                    arg_idx_input = \
                        np.asarray([float(arg_idx)] * num_context).astype(
                            np.float32)

                    for eval_input, arg_salience in eval_input_list:
                        check_type(eval_input, IndexedEvent)
                        pred_input_b = np.asarray([eval_input.pred_input] *
                                                  num_context).astype(np.int32)
                        subj_input_b = np.asarray([eval_input.subj_input] *
                                                  num_context).astype(np.int32)
                        obj_input_b = np.asarray([eval_input.obj_input] *
                                                 num_context).astype(np.int32)
                        pobj_input_b = np.asarray([eval_input.pobj_input] *
                                                  num_context).astype(np.int32)

                        if use_salience:
                            if arg_salience is not None:
                                salience_feature = \
                                    arg_salience.get_feature_list(
                                        salience_features)
                            else:
                                # NOBUG: this should never happen
                                log.warning(
                                    'salience feature = None, filled with 0')
                                salience_feature = [0.0
                                                    ] * len(salience_features)

                            saliance_input = np.tile(salience_feature,
                                                     [num_context, 1]).astype(
                                                         np.float32)

                            coherence_output = coherence_fn(
                                pred_input_a, subj_input_a, obj_input_a,
                                pobj_input_a, pred_input_b, subj_input_b,
                                obj_input_b, pobj_input_b, arg_idx_input,
                                saliance_input)
                        else:
                            coherence_output = coherence_fn(
                                pred_input_a, subj_input_a, obj_input_a,
                                pobj_input_a, pred_input_b, subj_input_b,
                                obj_input_b, pobj_input_b, arg_idx_input)

                        if use_max_score:
                            coherence_score_list.append(coherence_output.max())
                        else:
                            coherence_score_list.append(coherence_output.sum())

                    assert len(coherence_score_list) == num_candidates + 1
                    coherence_score_list_all.append(
                        (label, coherence_score_list))

                num_label = len(eval_input_list_all)
                coherence_score_matrix = np.ndarray(shape=(num_label,
                                                           num_candidates + 1))
                row_idx = 0
                for label, coherence_score_list in coherence_score_list_all:
                    coherence_score_matrix[row_idx, :] = np.array(
                        coherence_score_list)
                    row_idx += 1

                for column_idx in range(1, num_candidates):
                    max_coherence_score_idx = \
                        coherence_score_matrix[:, column_idx].argmax()
                    for row_idx in range(num_label):
                        if row_idx != max_coherence_score_idx:
                            coherence_score_matrix[row_idx, column_idx] = -1.0
                '''
                max_coherence_score_idx_list = []
                for row_idx in range(num_label):
                    max_coherence_score_idx_list.append(
                        coherence_score_matrix[row_idx, 1:].argmax())
                '''

                label_list = [label for label, _ in coherence_score_list_all]
                for imp_arg in rich_predicate.imp_args:
                    if imp_arg.label in label_list:
                        row_idx = label_list.index(imp_arg.label)
                        imp_arg.set_coherence_score_list(
                            coherence_score_matrix[row_idx, :])
                '''
                for row_idx in range(num_label):
                    assert coherence_score_list_all[row_idx][0] == \
                           rich_predicate.imp_args[row_idx].label
                    rich_predicate.imp_args[row_idx].set_coherence_score_list(
                        coherence_score_matrix[row_idx, :])
                '''
        pbar.close()

        log.info('Predicates with no context events:')
        for pred_idx in exclude_pred_idx_list:
            rich_predicate = self.all_rich_predicates[pred_idx]
            log.info('Predicate #{}: {}, missing_imp_args = {}, '
                     'imp_args = {}'.format(
                         pred_idx, rich_predicate.n_pred,
                         len(rich_predicate.imp_args),
                         len([
                             imp_arg for imp_arg in rich_predicate.imp_args
                             if imp_arg.exist
                         ])))
Exemplo n.º 29
0
    def fit(self, X, y, examples_scores=None):
        """Split multi-label y dataset into train and test subsets.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features).

        y : {array-like, sparse matrix} of shape (n_samples, n_labels).

        examples_scores : a dictionary of shape (n_samples, 1) that contains
            uncertainty score to each example.

        Returns
        -------
        data partition : two lists of indices representing the resulted data split
        """

        if X is None:
            raise Exception("Please provide a dataset.")
        if y is None:
            raise Exception("Please provide labels for the dataset.")
        assert X.shape[0] == y.shape[0]

        check, X = check_type(X=X, return_list=False)
        if not check:
            tmp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data"
            raise Exception(tmp)

        check, y = check_type(X=y, return_list=False)
        if not check:
            tmp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data"
            raise Exception(tmp)

        num_examples, num_labels = X.shape

        # check whether data is singly labeled
        if num_labels == 1:
            # transform it to multi-label data
            classes = list(set([i[0] if i else 0 for i in X.data]))
            mlb = LabelBinarizer(labels=classes)
            y = mlb.transform(y)

        if self.shuffle:
            sample_idx = custom_shuffle(num_examples=num_examples)
            X = X[sample_idx, :]
            y = y[sample_idx, :]

        # Keep track how how many instances have been swapped to train or test
        swap_counter = {'to_train': 0, 'to_test': 0}

        # 1. Create instances_dict to keep track of instance information:
        instances_dict = self.__create_instances_dict(X, y)

        # 2 Get average number of labels per instance
        labels_per_instance = [
            len(instance_dict['labels'])
            for idx, instance_dict in instances_dict.items()
        ]
        average_labels_per_instance = sum(labels_per_instance) / len(
            labels_per_instance)

        # 3. Create labels_dict to keep track of label information:
        labels_dict = self.__create_labels_dict(instances_dict)

        # 4. Calculate the label score for each label in labels_dict
        # Positive score if too much of the label is in the test set
        # Negative score if too much of the label is in the train set
        self.__score_labels(labels_dict, average_labels_per_instance)

        # 5. Calculate the instance score for each instance in instances_dict
        # A high score means the instance is a good candidate for swapping
        self.__score_instances(instances_dict,
                               labels_dict,
                               examples_scores=examples_scores)

        # 6. Calculate the total score
        # The higher the score, the more 'imbalanced' the distribution of labels between train and test sets
        total_score = self.__calculate_total_score(instances_dict)
        desc = '\t>> Perform splitting (extreme)...'
        print(desc)
        print('\t\t--> Starting score: {0}'.format(round(total_score)))

        # Main loop to create stratified train-test split
        for epoch in range(self.num_epochs):
            # To keep track of how long each iteration takes

            # 1. Calculate the threshold score for swapping
            threshold_score = self.__calculte_threshold_score(
                instances_dict=instances_dict,
                average_labels_per_instance=average_labels_per_instance,
                epoch=epoch)

            # 2. Swap the instances with instance_score that is greater than the threshold score
            # Probability of swapping an instance is swap_probability
            self.__swap_instances(
                instances_dict=instances_dict,
                threshold_score=threshold_score,
                swap_counter=swap_counter,
                average_labels_per_instance=average_labels_per_instance,
                epoch=epoch)

            # 3. Recreate labels_dict with updated train-test split
            labels_dict = self.__create_labels_dict(
                instances_dict=instances_dict)

            # 4. Recalculate the label score for each label in labels_dict
            self.__score_labels(
                labels_dict=labels_dict,
                average_labels_per_instance=average_labels_per_instance)

            # 5. Recalculate the instance score for each instance in instances_dict
            self.__score_instances(instances_dict=instances_dict,
                                   labels_dict=labels_dict,
                                   examples_scores=examples_scores)

            # 6. Recalculate the total score
            total_score = self.__calculate_total_score(
                instances_dict=instances_dict)
            desc = '\t\t--> Splitting progress: {0:.2f}%; score: {1:.2f}'.format(
                ((epoch + 1) / self.num_epochs * 100), total_score)
            if epoch + 1 == self.num_epochs:
                print(desc)
            else:
                print(desc, end="\r")

        # Prepare train_list, test_list
        train_list = []
        test_list = []
        for idx, instance_dict in instances_dict.items():
            if instance_dict['train_or_test'] == 'train':
                train_list.append(idx)
            elif instance_dict['train_or_test'] == 'test':
                test_list.append(idx)
            else:
                print(f'Something went wrong: {idx}')
        return sorted(train_list), sorted(test_list)
Exemplo n.º 30
0
 def test_check_type__empty(self):
     """with empty value."""
     result = utils.check_type('item_name', None, params)
     self.assertTrue(result is None)
Exemplo n.º 31
0
    def from_doc(cls, doc):
        check_type(doc, document.Document)
        script = cls(doc.doc_name)

        # add all entities from document
        for coref in doc.corefs:
            entity = Entity.from_coref(coref)
            script.add_entity(entity)

        if not script.has_entities():
            log.warning('script {} has no entities'.format(doc.doc_name))

        # add all events from document
        for sent in doc.sents:
            # iterate through all tokens
            for pred_token in sent.tokens:
                if pred_token.pos.startswith('VB'):
                    # exclude "be" verbs
                    if pred_token.lemma == 'be':
                        continue
                    # exclude modifying verbs
                    if sent.dep_graph.lookup_label('head',
                                                   pred_token.token_idx,
                                                   'xcomp'):
                        continue
                    # TODO: exclude verbs in quotes
                    # NOBUG: do not exclude stop verbs now
                    # both negation and particle need to be counted in
                    # detecting a stop verb, we will remove stop verbs
                    # in constructing RichScript

                    # find whether the verb has negation
                    neg = False
                    if sent.dep_graph.lookup_label('head',
                                                   pred_token.token_idx,
                                                   'neg'):
                        neg = True

                    # find whether the verb has particle
                    prt = ''
                    prt_tokens = sent.lookup_label('head',
                                                   pred_token.token_idx,
                                                   'compound:prt')
                    if prt_tokens:
                        if len(prt_tokens) > 1:
                            log.warning(
                                'Predicate {} contains {} particles'.format(
                                    pred_token.pretty_print(),
                                    len(prt_tokens)))
                        prt = prt_tokens[0].lemma

                    subj_list = sent.get_subj_list(pred_token.token_idx)
                    dobj_list = sent.get_dobj_list(pred_token.token_idx)
                    pobj_list = sent.get_pobj_list(pred_token.token_idx)

                    if (not subj_list) and (not dobj_list):
                        continue
                    if not subj_list:
                        subj_list.append(None)
                    if not dobj_list:
                        dobj_list.append(None)

                    for arg_tuple in product(subj_list, dobj_list):
                        event = Event.from_tokens(pred_token,
                                                  arg_tuple[0],
                                                  arg_tuple[1],
                                                  pobj_list,
                                                  neg=neg,
                                                  prt=prt)
                        script.add_event(event)

        if not script.has_events():
            log.warning('script {} has no events'.format(doc.doc_name))

        return script
Exemplo n.º 32
0
 def get_index(self, model, include_type=True, use_unk=True):
     check_type(model, Word2VecModel)
     self.core_wv = \
         self.core.get_index(
             model, self.arg_type if include_type else '', use_unk=use_unk)
Exemplo n.º 33
0
 def test_check_type__valid(self):
     """with valid value."""
     self.assertEqual(utils.check_type('loan_type', 'conf', params), 'conf')
     self.assertEqual(utils.check_type('rate_structure', 'Fixed', params),
                      'Fixed')
     self.assertEqual(utils.check_type('arm_type', '3-1', params), '3/1')
     self.assertEqual(utils.check_type('loan_term', '111', params), 111)
     self.assertEqual(utils.check_type('loan_term', 114, params), 114)
     self.assertEqual(utils.check_type('price', 20.10, params), 20.10)
     self.assertEqual(utils.check_type('price', '20.20', params), 20.20)
     self.assertEqual(utils.check_type('loan_amount', 19.99, params), 19.99)
     self.assertEqual(utils.check_type('loan_amount', '29.99', params),
                      29.99)
     self.assertEqual(utils.check_type('state', 'VA', params), 'VA')
     self.assertEqual(utils.check_type('state', 'va', params), 'VA')
     self.assertEqual(utils.check_type('state', 'Va', params), 'VA')
     self.assertEqual(utils.check_type('fico', 100, params), 100)
     self.assertEqual(utils.check_type('fico', '200', params), 200)
     self.assertEqual(utils.check_type('minfico', 300, params), 300)
     self.assertEqual(utils.check_type('minfico', '400', params), 400)
     self.assertEqual(utils.check_type('maxfico', 500, params), 500)
     self.assertEqual(utils.check_type('maxfico', '600', params), 600)