def apply_action(self): tgt = self.target g = Game.getgame() n = min(len([p for p in g.players if not p.dead]), 5) cards = g.deck.getcards(n) assert cards == g.deck.getcards(n) tgt.reveal(cards) rst = tgt.user_input('ran_prophet', cards, timeout=40) if not rst: return False try: check_type([[int, Ellipsis]]*2, rst) upcards = rst[0] downcards = rst[1] check(sorted(upcards+downcards) == range(n)) except CheckFailed as e: try: print 'RAN PROPHET:', upcards, downcards except: pass return act deck = g.deck.cards for i, j in enumerate(downcards): deck[i] = cards[j] deck.rotate(-len(downcards)) for i, j in enumerate(upcards): deck[i] = cards[j] cl = [cards[i] for i in upcards] assert g.deck.getcards(len(upcards)) == cl return True
def user_choose_cards(initiator, actor, categories, timeout=None, trans=None): check_type([str, Ellipsis], categories) _, rst = ask_for_action(initiator, [actor], categories, (), timeout=timeout, trans=trans) if not rst: return None return rst[0] # cards
def user_choose_cards(initiator, actor, categories): check_type([str, Ellipsis], categories) _, rst = ask_for_action(initiator, [actor], categories, []) if not rst: return None return rst[0] # cards
def parse(self, data): n = self.num try: check(data) check_type([int] * n, data) check(set(data) == set(range(n))) return data except CheckFailed: return range(n)
def test_check_type__invalid(self): """with invalid value.""" self.assertTrue(utils.check_type('loan_type', 11, params) is None) self.assertTrue(utils.check_type('rate_structure', 11, params) is None) self.assertTrue(utils.check_type('arm_type', 'String', params) is None) self.assertTrue(utils.check_type('loan_term', 'A Week', params) is None) self.assertTrue(utils.check_type('price', 'String', params) is None) self.assertTrue(utils.check_type('loan_amount', 'String', params) is None) self.assertTrue(utils.check_type('state', 'Virginia', params) is None) self.assertTrue(utils.check_type('fico', 'ABC', params) is None) self.assertTrue(utils.check_type('minfico', 'ABC', params) is None) self.assertTrue(utils.check_type('maxfico', 'ABC', params) is None)
def parse(self, i): m = self.mapping actor = self.actor try: check(actor in m) check_type(int, i) check(0 <= i < len(m[actor])) choice = m[actor][i] check(not choice.chosen) return choice except CheckFailed: return None
def parse(self, data): # data = [ # [skill_index1, ...], # [card_syncid1, ...], # [player_id1, ...], # {'action_param1': 'AttackCard'}, # ] actor = self.actor g = Game.getgame() categories = self.categories categories = [getattr(actor, i) for i in categories] if categories else None candidates = self.candidates skills = [] cards = [] players = [] params = {} _ = Ellipsis try: check_type([[int, _]] * 3 + [dict], data) sid_list, cid_list, pid_list, params = data if candidates: check(candidates) pl = [g.player_fromid(i) for i in pid_list] check(all([p in candidates for p in pl])) players = pl if categories: cards = g.deck.lookupcards(cid_list) check(len(cards) == len(cid_list)) # Invalid id cs = set(cards) check(len(cs) == len(cid_list)) # repeated ids if sid_list: assert actor.cards in categories or actor.showncards in categories check(all(cat.owner is actor for cat in categories)) check(all(c.resides_in.owner is actor for c in cards)) # Cards belong to actor? for skill_id in sid_list: check(0 <= skill_id < len(actor.skills)) skills = [actor.skills[i] for i in sid_list] else: check(all(c.resides_in in categories for c in cards)) # Cards in desired categories? return [skills, cards, players, params] except CheckFailed: return None
def user_choose_players_logic(input, act, target, candidates): try: g = Game.getgame() check_type([[int, Ellipsis]] * 3, input) _, _, pids = input check(pids) pl = [g.player_fromid(i) for i in pids] from game import AbstractPlayer check(all(p in candidates for p in pl)) pl, valid = act.choose_player_target(pl) check(valid) return pl except CheckFailed: return None
def parse(self, data): try: check_type([[int, Ellipsis]] * 2, data) upcards = data[0] downcards = data[1] check(sorted(upcards+downcards) == range(len(self.cards))) except CheckFailed: return [self.cards, []] cards = self.cards upcards = [cards[i] for i in upcards] downcards = [cards[i] for i in downcards] return [upcards, downcards]
def apply_action(self): g = Game.getgame() target = self.target if target.dead: return False shuffle_here() try: while not target.dead: g.emit_event('action_stage_action', target) input = target.user_input('action_stage_usecard') check_type([[int, Ellipsis]] * 3, input) skill_ids, card_ids, target_list = input if card_ids: cards = g.deck.lookupcards(card_ids) check(cards) check(all(c.resides_in.owner is target for c in cards)) else: cards = [] target_list = [g.player_fromid(i) for i in target_list] from game import AbstractPlayer check(all(isinstance(p, AbstractPlayer) for p in target_list)) # skill selected if skill_ids: card = skill_wrap(target, skill_ids, cards) check(card) else: check(len(cards) == 1) g.players.exclude(target).reveal(cards) card = cards[0] from .cards import HiddenCard assert not card.is_card(HiddenCard) check(card.resides_in in (target.cards, target.showncards)) if not g.process_action(ActionStageLaunchCard(target, target_list, card)): # invalid input log.debug('ActionStage: LaunchCard failed.') break shuffle_here() except CheckFailed as e: pass return True
def parse(self, data): _ = Ellipsis try: check_type([[int, _]] * 2, data) putback = data[0] acquire = data[1] check(sorted(putback + acquire) == range(len(self.cards))) cards = self.cards putback = [cards[i] for i in putback] acquire = [cards[i] for i in acquire] except CheckFailed: return [self.cards, []] return [putback, acquire]
def tokens(self): """ Breaks a call number into tokens, which are its atomic parts. A token will contain either letters or a number but not both. :return: a list of Token objects """ tokens_list = [] new_token = self.value[0] for i in range(len(self.value) - 1): c = self.value[i] d = self.value[i + 1] # d is part of token if check_type(c) == check_type(d): new_token += d # d is the beginning of a new token else: # Prevents adding a space as a token if check_type(new_token) != 2: tokens_list.append(Token(new_token)) if (check_type(c) == 0) and (check_type(d) == 1) and (i > 1): """ In call number sorting rules, the first number should be treated as a whole number and all following numbers should be treated as decimals. For example, M101 K78 would be M, 101, K, 0.78 """ d = '0.' + d new_token = d tokens_list.append(Token(new_token)) return tokens_list
def user_choose_cards_logic(input, act, target, categories=None): from utils import check, CheckFailed g = Game.getgame() try: check_type([[int, Ellipsis]] * 3, input) sid_list, cid_list, pid_list = input cards = g.deck.lookupcards(cid_list) check(len(cards) == len(cid_list)) # Invalid id cs = set(cards) check(len(cs) == len(cid_list)) # repeated ids if not categories: categories = [target.cards, target.showncards] if sid_list: check(all(cat.owner is target for cat in categories)) check(all(c.resides_in.owner is target for c in cards)) # Cards belong to target? # associated_cards will be revealed here c = skill_wrap(target, sid_list, cards) check(c) cards = [c] else: check(all(c.resides_in in categories for c in cards)) # Cards in desired categories? if not getattr(act, 'no_reveal', False): g.players.exclude(target).reveal(cards) check(act.cond(cards)) log.debug('user_choose_cards: %s %s %s', repr(act), target.__class__.__name__, repr(cards)) return cards except CheckFailed as e: log.debug('user_choose_cards FAILED: %s %s', repr(act), target.__class__.__name__) return None
def fit(self, X, y, split_type: str = "extreme"): """Split multi-label y dataset into train and test subsets. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). y : {array-like, sparse matrix} of shape (n_samples, n_labels). split_type : Splitting type of {naive, extreme, iterative}. Returns ------- data partition : two lists of indices representing the resulted data split """ if X is None: raise Exception("Please provide a dataset.") if y is None: raise Exception("Please provide labels for the dataset.") assert X.shape[0] == y.shape[0] check, X = check_type(X=X, return_list=False) if not check: tmp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data" raise Exception(tmp) check, y = check_type(X=y, return_list=False) if not check: tmp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data" raise Exception(tmp) num_examples, num_labels = y.shape # check whether data is singly labeled if num_labels == 1: # transform it to multi-label data classes = list(set([i[0] if i else 0 for i in y.data])) mlb = LabelBinarizer(labels=classes) y = mlb.transform(y) if not self.is_fit: desc = '\t>> Building Graph...' print(desc) # Construct graph if self.shuffle: sample_idx = custom_shuffle(num_examples=num_examples) X = X[sample_idx, :] y = y[sample_idx, :] P = lil_matrix(cosine_similarity(X=X)) P = normalize_laplacian(A=P, sigma=self.sigma, return_adj=True, norm_adj=True) P = triu(P) D = y for epoch in range(self.num_epochs): D = self.alpha * P * D + (1 - self.alpha) * y idx = np.random.choice(a=list(range(num_examples)), size=self.num_subsamples, replace=True) self.community_labels = self.__graph_construction(D[idx]) mlb = LabelBinarizer(labels=list(range(self.num_communities))) y = mlb.reassign_labels(y, mapping_labels=self.community_labels) self.is_fit = True # perform splitting if split_type == "extreme": st = ExtremeStratification(swap_probability=self.swap_probability, threshold_proportion=self.threshold_proportion, decay=self.decay, shuffle=self.shuffle, split_size=self.split_size, num_epochs=self.num_epochs, verbose=False) train_list, test_list = st.fit(X=X, y=y) elif split_type == "iterative": st = IterativeStratification(shuffle=self.shuffle, split_size=self.split_size, verbose=False) train_list, test_list = st.fit(y=y) else: st = NaiveStratification(shuffle=self.shuffle, split_size=self.split_size, batch_size=self.batch_size, num_jobs=self.num_jobs, verbose=False) train_list, test_list = st.fit(y=y) return train_list, test_list
def __init__(self, input=None, eps=.001, diff_order = 5, verbose=None): if not scipy_imported: raise ImportError, 'Scipy must be installed to use NormApprox and MAP.' Model.__init__(self, input, verbose=verbose) # Allocate memory for internal traces and get stochastic slices self._slices = {} self.len = 0 self.stochastic_len = {} self.fitted = False self.stochastic_list = list(self.stochastics) self.N_stochastics = len(self.stochastic_list) self.stochastic_indices = [] self.stochastic_types = [] self.stochastic_type_dict = {} for i in xrange(len(self.stochastic_list)): stochastic = self.stochastic_list[i] # Check types of all stochastics. type_now = check_type(stochastic)[0] self.stochastic_type_dict[stochastic] = type_now if not type_now is float: print "Warning: Stochastic " + stochastic.__name__ + "'s value is neither numerical nor array with " + \ "floating-point dtype. Recommend fitting method fmin (default)." # Inspect shapes of all stochastics and create stochastic slices. if isinstance(stochastic.value, ndarray): self.stochastic_len[stochastic] = len(ravel(stochastic.value)) else: self.stochastic_len[stochastic] = 1 self._slices[stochastic] = slice(self.len, self.len + self.stochastic_len[stochastic]) self.len += self.stochastic_len[stochastic] # Record indices that correspond to each stochastic. for j in range(len(ravel(stochastic.value))): self.stochastic_indices.append((stochastic, j)) self.stochastic_types.append(type_now) self.data_len = 0 for datum in self.observed_stochastics: self.data_len += len(ravel(datum.value)) # Unpack step self.eps = zeros(self.len,dtype=float) if isinstance(eps,dict): for stochastic in self.stochastics: self.eps[self._slices[stochastic]] = eps[stochastic] else: self.eps[:] = eps self.diff_order = diff_order self._len_range = arange(self.len) # Initialize gradient and Hessian matrix. self.grad = zeros(self.len, dtype=float) self.hess = asmatrix(zeros((self.len, self.len), dtype=float)) self._mu = None # Initialize NormApproxMu object. self.mu = NormApproxMu(self) def func_for_diff(val, index): """ The function that gets passed to the derivatives. """ self[index] = val return self.i_logp(index) self.func_for_diff = func_for_diff
def test_check_types(self): i = ['42', 'c', 'test', '42.0'] self.assertEqual(check_types(self.func1.params, i), [42, 'c', 'test', 42.0]) i = ['c', 'c', 'test', '42.0'] self.assertEqual(check_types(self.func1.params, i), None) i = ['42', '42', 'test', '42.0'] self.assertEqual(check_types(self.func1.params, i), None) i = ['42', 'c', 23, '42.0'] self.assertEqual(check_types(self.func1.params, i), [42, 'c', 23, 42.0]) i = ['42', 'c', 'test', 'test'] self.assertEqual(check_types(self.func1.params, i), None) i = ['42.0', '1.2,3.4,5.6', '1,2,3,4', '"lorem","ipsum","dolor,sit"'] exp_out = [42.0, [1.2, 3.4, 5.6], [1, 2, 3, 4], ['lorem', 'ipsum', 'dolor,sit']] self.assertEqual(check_types(self.func2.params, i), exp_out) self.assertIsNone(check_type('float_array', '1.2a,2.3')) self.assertEqual(check_type('float_array', '12, 23.0'), [12.0, 23.0]) self.assertIsNone(check_type('int_array', '1,2,3a')) self.assertIsNone(check_type('char_array', 'a,,b')) self.assertIsNone(check_type('string_array', '"asd",asd,"asd"')) self.assertEqual(check_type('string_array', r'"lorem\"ipsum","dolor"'), ['lorem"ipsum', 'dolor']) self.assertEqual(check_type('string_array', '"lorem, ipsum","dolor","sit123","amet",'), ['lorem, ipsum', 'dolor', 'sit123', 'amet']) self.assertIsNone(check_type('string_array', '"lorem",ipsum,"dolor"')) self.assertIsNone(check_type('string_array', '"lor"em,"ipsum"')) self.assertEqual(check_type('string_array', '"lorem ipsum"'), ['lorem ipsum']) self.assertIsNone(check_type('string_array', 'lorem,ipsum'))
def delete_razred(self, razred): check_type(razred, Razred) self.session.delete(razred)
def test_check_type__empty(self): """with empty value.""" result = utils.check_type('item_name', None, params) self.assertTrue(result is None)
def test_check_type__valid(self): """with valid value.""" self.assertEqual(utils.check_type('loan_type', 'conf', params), 'conf') self.assertEqual(utils.check_type('rate_structure', 'Fixed', params), 'Fixed') self.assertEqual(utils.check_type('arm_type', '3-1', params), '3/1') self.assertEqual(utils.check_type('loan_term', '111', params), 111) self.assertEqual(utils.check_type('loan_term', 114, params), 114) self.assertEqual(utils.check_type('price', 20.10, params), 20.10) self.assertEqual(utils.check_type('price', '20.20', params), 20.20) self.assertEqual(utils.check_type('loan_amount', 19.99, params), 19.99) self.assertEqual(utils.check_type('loan_amount', '29.99', params), 29.99) self.assertEqual(utils.check_type('state', 'VA', params), 'VA') self.assertEqual(utils.check_type('state', 'va', params), 'VA') self.assertEqual(utils.check_type('state', 'Va', params), 'VA') self.assertEqual(utils.check_type('fico', 100, params), 100) self.assertEqual(utils.check_type('fico', '200', params), 200) self.assertEqual(utils.check_type('minfico', 300, params), 300) self.assertEqual(utils.check_type('minfico', '400', params), 400) self.assertEqual(utils.check_type('maxfico', 500, params), 500) self.assertEqual(utils.check_type('maxfico', '600', params), 600)
def build(cls, pred, use_lemma=True): check_type(pred, Predicate) word = pred.get_representation(use_lemma=use_lemma) return cls(word, pred.neg, pred.prt)
def __init__(self, pred_pointer, arguments): check_type(pred_pointer, RichTreePointer) self.pred_pointer = pred_pointer self.arguments = arguments
def add_razred(self, razred): check_type(razred, Razred) self.session.add(razred)
def get_mention(self, idx): assert 0 <= idx < self.num_mentions, \ 'Mention index {} out of range'.format(idx) result = self._mentions[idx] check_type(result, Mention) return result
def fit(self, X, y): """Split multi-label y dataset into train and test subsets. Parameters ---------- X : {array-like, sparse matrix} of shape (n_examples, n_features). y : {array-like, sparse matrix} of shape (n_examples, n_labels). Returns ------- data partition : two lists of indices representing the resulted data split """ if X is None: raise Exception("Please provide a dataset.") if y is None: raise Exception("Please provide labels for the dataset.") assert X.shape[0] == y.shape[0] check, X = check_type(X=X, return_list=False) if not check: temp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data" raise Exception(temp) check, y = check_type(X=y, return_list=False) if not check: temp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data" raise Exception(temp) # collect properties from data num_examples, num_features = X.shape self.num_labels = y.shape[1] # check whether data is singly labeled if self.num_labels == 1: # transform it to multi-label data classes = list(set([i[0] if i else 0 for i in y.data])) mlb = LabelBinarizer(labels=classes) y = mlb.transform(y) self.num_labels = y.shape[1] if not self.is_fit: print('\t>> Training to learn a model...') self.__init_variables(num_labels=self.num_labels, num_features=num_features) old_cost = np.inf optimal_init = self.__optimal_learning_rate(alpha=self.lr) n_epochs = self.num_epochs + 1 timeref = time.time() for epoch in np.arange(start=1, stop=n_epochs): desc = '\t {0:d})- Epoch count ({0:d}/{1:d})...'.format( epoch, n_epochs - 1) print(desc) # shuffle dataset if epoch == 1: example_idx = custom_shuffle(num_examples=num_examples) example_idx = list(example_idx) X = X[example_idx, :] y = y[example_idx, :] else: if self.calc_ads: temp = [ s for s in range(num_examples) if s not in example_idx ] sub_sampled_size = int(self.ads_percent * len(temp)) temp = list( np.random.choice(a=temp, size=sub_sampled_size, replace=False)) example_idx.extend(temp) # usual optimization technique learning_rate = 1.0 / (self.lr * (optimal_init + epoch - 1)) # set epoch time start_epoch = time.time() self.__parallel_backward(X=X, y=y, learning_rate=learning_rate, examples_idx=example_idx) prob = self.__parallel_forward(X=X, y=y, example_idx=example_idx) H = self.__predictive_uncertainty(prob=prob, y=y[example_idx]) if self.calc_ads: example_idx = self.__subsample_strategy( H=H, num_examples=num_examples) example_idx = list(example_idx) H = H[example_idx] end_epoch = time.time() self.is_fit = True # Save models parameters based on test frequencies if (epoch % self.display_interval) == 0 or epoch == n_epochs - 1: # compute loss new_cost = self.__parallel_cost(X=X[example_idx], y=y[example_idx]) print('\t\t\t--> New cost: {0:.4f}; Old cost: {1:.4f}'. format(new_cost, old_cost)) if old_cost >= new_cost or epoch == n_epochs - 1: old_cost = new_cost print('\t\t\t--> Epoch {0} took {1} seconds...'.format( epoch, round(end_epoch - start_epoch, 3))) print('\t --> Training consumed %.2f mintues' % (round( (time.time() - timeref) / 60., 3))) else: print('\t>> Estimating examples scores...') example_idx = list(range(num_examples)) prob = self.__parallel_forward(X=X, y=y, example_idx=example_idx) H = self.__predictive_uncertainty(prob=prob, y=y[example_idx]) if self.calc_ads: example_idx = self.__subsample_strategy( H=H, num_examples=num_examples) example_idx = list(example_idx) H = H[example_idx] X = X[example_idx] y = y[example_idx] example_idx = list(range(len(example_idx))) examples_scores = dict(list(zip(example_idx, H))) # perform calibrated splitting extreme = ExtremeStratification( swap_probability=self.swap_probability, threshold_proportion=self.threshold_proportion, decay=self.decay, shuffle=self.shuffle, split_size=self.split_size, num_epochs=self.num_epochs, verbose=False) train_list, test_list = extreme.fit(X=X, y=y, examples_scores=examples_scores) return train_list, test_list
def get_token(self, idx): assert 0 <= idx < self.num_tokens, \ 'Token idx {} out of range'.format(idx) result = self._tokens[idx] check_type(result, Token) return result
def add_dep(self, dep): check_type(dep, Dependency) self._deps.append(dep)
def set_embedding_model(self, embedding_model): check_type(embedding_model, Word2VecModel) self.logger.info('set embedding model: {}'.format( embedding_model.name)) self.embedding_model = embedding_model self.embedding_model_name = embedding_model.name
def compute_coherence_score(self, event_comp_model, use_max_score=True, missing_labels_mapping=None): assert len(self.all_rich_predicates) > 0 if type(event_comp_model) == list: assert len(event_comp_model) == self.n_splits word2vec_model = event_comp_model[0].word2vec else: word2vec_model = event_comp_model.word2vec self.get_index(word2vec_model) context_input_list_mapping = \ self.get_context_input_list_mapping(word2vec_model) exclude_pred_idx_list = [] pbar = tqdm(total=len(self.all_rich_predicates), desc='Processed', ncols=100) for fold_idx in range(self.n_splits): for pred_idx in self.train_test_folds[fold_idx][1]: pbar.update(1) rich_predicate = self.all_rich_predicates[pred_idx] if len(rich_predicate.imp_args) == 0: continue for imp_arg in rich_predicate.imp_args: imp_arg.reset_coherence_score_list() if missing_labels_mapping is not None: missing_labels = missing_labels_mapping[str( self.all_predicates[pred_idx].pred_pointer)] else: missing_labels = None if missing_labels is not None and len(missing_labels) == 0: continue context_input_list = \ context_input_list_mapping[rich_predicate.fileid] num_context = len(context_input_list) if num_context == 0: exclude_pred_idx_list.append(pred_idx) continue if type(event_comp_model) == list: pair_composition_network = \ event_comp_model[fold_idx].pair_composition_network else: pair_composition_network = \ event_comp_model.pair_composition_network coherence_fn = pair_composition_network.coherence_fn use_salience = pair_composition_network.use_salience salience_features = pair_composition_network.salience_features pred_input_a = np.zeros(num_context, dtype=np.int32) subj_input_a = np.zeros(num_context, dtype=np.int32) obj_input_a = np.zeros(num_context, dtype=np.int32) pobj_input_a = np.zeros(num_context, dtype=np.int32) for context_idx, context_input in enumerate( context_input_list): check_type(context_input, IndexedEvent) pred_input_a[context_idx] = context_input.pred_input subj_input_a[context_idx] = context_input.subj_input obj_input_a[context_idx] = context_input.obj_input pobj_input_a[context_idx] = context_input.pobj_input eval_input_list_all = \ rich_predicate.get_eval_input_list_all( include_salience=True, missing_labels=missing_labels) num_candidates = rich_predicate.num_candidates coherence_score_list_all = [] for label, arg_idx, eval_input_list in eval_input_list_all: coherence_score_list = [] arg_idx_input = \ np.asarray([float(arg_idx)] * num_context).astype( np.float32) for eval_input, arg_salience in eval_input_list: check_type(eval_input, IndexedEvent) pred_input_b = np.asarray([eval_input.pred_input] * num_context).astype(np.int32) subj_input_b = np.asarray([eval_input.subj_input] * num_context).astype(np.int32) obj_input_b = np.asarray([eval_input.obj_input] * num_context).astype(np.int32) pobj_input_b = np.asarray([eval_input.pobj_input] * num_context).astype(np.int32) if use_salience: if arg_salience is not None: salience_feature = \ arg_salience.get_feature_list( salience_features) else: # NOBUG: this should never happen log.warning( 'salience feature = None, filled with 0') salience_feature = [0.0 ] * len(salience_features) saliance_input = np.tile(salience_feature, [num_context, 1]).astype( np.float32) coherence_output = coherence_fn( pred_input_a, subj_input_a, obj_input_a, pobj_input_a, pred_input_b, subj_input_b, obj_input_b, pobj_input_b, arg_idx_input, saliance_input) else: coherence_output = coherence_fn( pred_input_a, subj_input_a, obj_input_a, pobj_input_a, pred_input_b, subj_input_b, obj_input_b, pobj_input_b, arg_idx_input) if use_max_score: coherence_score_list.append(coherence_output.max()) else: coherence_score_list.append(coherence_output.sum()) assert len(coherence_score_list) == num_candidates + 1 coherence_score_list_all.append( (label, coherence_score_list)) num_label = len(eval_input_list_all) coherence_score_matrix = np.ndarray(shape=(num_label, num_candidates + 1)) row_idx = 0 for label, coherence_score_list in coherence_score_list_all: coherence_score_matrix[row_idx, :] = np.array( coherence_score_list) row_idx += 1 for column_idx in range(1, num_candidates): max_coherence_score_idx = \ coherence_score_matrix[:, column_idx].argmax() for row_idx in range(num_label): if row_idx != max_coherence_score_idx: coherence_score_matrix[row_idx, column_idx] = -1.0 ''' max_coherence_score_idx_list = [] for row_idx in range(num_label): max_coherence_score_idx_list.append( coherence_score_matrix[row_idx, 1:].argmax()) ''' label_list = [label for label, _ in coherence_score_list_all] for imp_arg in rich_predicate.imp_args: if imp_arg.label in label_list: row_idx = label_list.index(imp_arg.label) imp_arg.set_coherence_score_list( coherence_score_matrix[row_idx, :]) ''' for row_idx in range(num_label): assert coherence_score_list_all[row_idx][0] == \ rich_predicate.imp_args[row_idx].label rich_predicate.imp_args[row_idx].set_coherence_score_list( coherence_score_matrix[row_idx, :]) ''' pbar.close() log.info('Predicates with no context events:') for pred_idx in exclude_pred_idx_list: rich_predicate = self.all_rich_predicates[pred_idx] log.info('Predicate #{}: {}, missing_imp_args = {}, ' 'imp_args = {}'.format( pred_idx, rich_predicate.n_pred, len(rich_predicate.imp_args), len([ imp_arg for imp_arg in rich_predicate.imp_args if imp_arg.exist ])))
def fit(self, X, y, examples_scores=None): """Split multi-label y dataset into train and test subsets. Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features). y : {array-like, sparse matrix} of shape (n_samples, n_labels). examples_scores : a dictionary of shape (n_samples, 1) that contains uncertainty score to each example. Returns ------- data partition : two lists of indices representing the resulted data split """ if X is None: raise Exception("Please provide a dataset.") if y is None: raise Exception("Please provide labels for the dataset.") assert X.shape[0] == y.shape[0] check, X = check_type(X=X, return_list=False) if not check: tmp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data" raise Exception(tmp) check, y = check_type(X=y, return_list=False) if not check: tmp = "The method only supports scipy.sparse, numpy.ndarray, and list type of data" raise Exception(tmp) num_examples, num_labels = X.shape # check whether data is singly labeled if num_labels == 1: # transform it to multi-label data classes = list(set([i[0] if i else 0 for i in X.data])) mlb = LabelBinarizer(labels=classes) y = mlb.transform(y) if self.shuffle: sample_idx = custom_shuffle(num_examples=num_examples) X = X[sample_idx, :] y = y[sample_idx, :] # Keep track how how many instances have been swapped to train or test swap_counter = {'to_train': 0, 'to_test': 0} # 1. Create instances_dict to keep track of instance information: instances_dict = self.__create_instances_dict(X, y) # 2 Get average number of labels per instance labels_per_instance = [ len(instance_dict['labels']) for idx, instance_dict in instances_dict.items() ] average_labels_per_instance = sum(labels_per_instance) / len( labels_per_instance) # 3. Create labels_dict to keep track of label information: labels_dict = self.__create_labels_dict(instances_dict) # 4. Calculate the label score for each label in labels_dict # Positive score if too much of the label is in the test set # Negative score if too much of the label is in the train set self.__score_labels(labels_dict, average_labels_per_instance) # 5. Calculate the instance score for each instance in instances_dict # A high score means the instance is a good candidate for swapping self.__score_instances(instances_dict, labels_dict, examples_scores=examples_scores) # 6. Calculate the total score # The higher the score, the more 'imbalanced' the distribution of labels between train and test sets total_score = self.__calculate_total_score(instances_dict) desc = '\t>> Perform splitting (extreme)...' print(desc) print('\t\t--> Starting score: {0}'.format(round(total_score))) # Main loop to create stratified train-test split for epoch in range(self.num_epochs): # To keep track of how long each iteration takes # 1. Calculate the threshold score for swapping threshold_score = self.__calculte_threshold_score( instances_dict=instances_dict, average_labels_per_instance=average_labels_per_instance, epoch=epoch) # 2. Swap the instances with instance_score that is greater than the threshold score # Probability of swapping an instance is swap_probability self.__swap_instances( instances_dict=instances_dict, threshold_score=threshold_score, swap_counter=swap_counter, average_labels_per_instance=average_labels_per_instance, epoch=epoch) # 3. Recreate labels_dict with updated train-test split labels_dict = self.__create_labels_dict( instances_dict=instances_dict) # 4. Recalculate the label score for each label in labels_dict self.__score_labels( labels_dict=labels_dict, average_labels_per_instance=average_labels_per_instance) # 5. Recalculate the instance score for each instance in instances_dict self.__score_instances(instances_dict=instances_dict, labels_dict=labels_dict, examples_scores=examples_scores) # 6. Recalculate the total score total_score = self.__calculate_total_score( instances_dict=instances_dict) desc = '\t\t--> Splitting progress: {0:.2f}%; score: {1:.2f}'.format( ((epoch + 1) / self.num_epochs * 100), total_score) if epoch + 1 == self.num_epochs: print(desc) else: print(desc, end="\r") # Prepare train_list, test_list train_list = [] test_list = [] for idx, instance_dict in instances_dict.items(): if instance_dict['train_or_test'] == 'train': train_list.append(idx) elif instance_dict['train_or_test'] == 'test': test_list.append(idx) else: print(f'Something went wrong: {idx}') return sorted(train_list), sorted(test_list)
def from_doc(cls, doc): check_type(doc, document.Document) script = cls(doc.doc_name) # add all entities from document for coref in doc.corefs: entity = Entity.from_coref(coref) script.add_entity(entity) if not script.has_entities(): log.warning('script {} has no entities'.format(doc.doc_name)) # add all events from document for sent in doc.sents: # iterate through all tokens for pred_token in sent.tokens: if pred_token.pos.startswith('VB'): # exclude "be" verbs if pred_token.lemma == 'be': continue # exclude modifying verbs if sent.dep_graph.lookup_label('head', pred_token.token_idx, 'xcomp'): continue # TODO: exclude verbs in quotes # NOBUG: do not exclude stop verbs now # both negation and particle need to be counted in # detecting a stop verb, we will remove stop verbs # in constructing RichScript # find whether the verb has negation neg = False if sent.dep_graph.lookup_label('head', pred_token.token_idx, 'neg'): neg = True # find whether the verb has particle prt = '' prt_tokens = sent.lookup_label('head', pred_token.token_idx, 'compound:prt') if prt_tokens: if len(prt_tokens) > 1: log.warning( 'Predicate {} contains {} particles'.format( pred_token.pretty_print(), len(prt_tokens))) prt = prt_tokens[0].lemma subj_list = sent.get_subj_list(pred_token.token_idx) dobj_list = sent.get_dobj_list(pred_token.token_idx) pobj_list = sent.get_pobj_list(pred_token.token_idx) if (not subj_list) and (not dobj_list): continue if not subj_list: subj_list.append(None) if not dobj_list: dobj_list.append(None) for arg_tuple in product(subj_list, dobj_list): event = Event.from_tokens(pred_token, arg_tuple[0], arg_tuple[1], pobj_list, neg=neg, prt=prt) script.add_event(event) if not script.has_events(): log.warning('script {} has no events'.format(doc.doc_name)) return script
def get_index(self, model, include_type=True, use_unk=True): check_type(model, Word2VecModel) self.core_wv = \ self.core.get_index( model, self.arg_type if include_type else '', use_unk=use_unk)