def transform(self, X): stanparse_depths = get_stanparse_depths() stanparse_data = get_stanparse_data() mat = np.zeros((len(X), 2)) for i, (_, s) in enumerate(X.iterrows()): try: sp_data = stanparse_data[s.articleId] sp_depths = stanparse_depths[s.articleId] min_hedge_depth = min_refute_depth = 100 for j, sentence in enumerate(sp_data['sentences']): grph, grph_labels, grph_depths = sp_depths[j] lemmas = list(enumerate([d[1]['Lemma'].lower() for d in sentence['words']], start=1)) hedge_match = self._find_matching(lemmas, _hedging_words) refute_match = self._find_matching(lemmas, _refuting_words) hedge_depths = [grph_depths[d] for d in hedge_match if d > 0] refute_depths = [grph_depths[d] for d in refute_match if d > 0] hedge_depths.append(min_hedge_depth) refute_depths.append(min_refute_depth) min_hedge_depth = min(hedge_depths) min_refute_depth = min(refute_depths) except: pass mat[i, 0] = min_hedge_depth mat[i, 1] = min_refute_depth return mat
def transform(self, X): mat = np.zeros((len(X), 1)) stanparse_data = get_stanparse_data() def strip_idx(x): return x[:x.rfind('-')] for i, (_, s) in enumerate(X.iterrows()): try: for sentence in stanparse_data[s.articleId]['sentences']: for dependency in sentence['dependencies']: rel, head, dependent = dependency if (rel == 'neg' or (rel == 'nn' and strip_idx(dependent).lower() == 'not')) \ and strip_idx(head).lower() in _refuting_words: mat[i, 0] = 1 except KeyError: pass return mat
def transform(self, X): stanparse_depths = get_stanparse_depths() stanparse_data = get_stanparse_data() min_hedge_depth = min_refute_depth = 100 mat = np.zeros((len(X), 2)) for i, (_, s) in enumerate(X.iterrows()): try: sp_data = stanparse_data[s.articleId] sp_depths = stanparse_depths[s.articleId] min_hedge_depth = min_refute_depth = 100 for j, sentence in enumerate(sp_data['sentences']): grph, grph_labels, grph_depths = sp_depths[j] lemmas = list( enumerate( [d[1]['Lemma'].lower() for d in sentence['words']], start=1)) hedge_match = self._find_matching(lemmas, _hedging_words) refute_match = self._find_matching(lemmas, _refuting_words) hedge_depths = [ grph_depths[d] for d in hedge_match if d > 0 ] refute_depths = [ grph_depths[d] for d in refute_match if d > 0 ] hedge_depths.append(min_hedge_depth) refute_depths.append(min_refute_depth) min_hedge_depth = min(hedge_depths) min_refute_depth = min(refute_depths) except: pass mat[i, 0] = min_hedge_depth mat[i, 1] = min_refute_depth return mat
_, head_idx = get_stanford_idx(head) _, dep_idx = get_stanford_idx(dep) dep_graph.setdefault(head_idx, set()).add(dep_idx) dep_graph_labels[(head_idx, dep_idx)] = rel return dep_graph, dep_graph_labels def _calc_depths(grph, n=0, d=0, depths=None): if depths is None: depths = {n: d} sx = grph.get(n) if sx: for s in sx: depths[s] = d+1 _calc_depths(grph, s, d+1, depths) return depths if __name__ == "__main__": dep_parse_data = get_stanparse_data() data = {} for id, dep_parse in dep_parse_data.items(): for i, s in enumerate(dep_parse['sentences']): grph, grph_labels = _build_dep_graph(s['dependencies']) grph_depths = _calc_depths(grph) d = data.setdefault(id, {}) d[i] = grph, grph_labels, grph_depths with open(os.path.join('..', 'data', 'pickled', 'stanparse-depths.pickle'), 'wb') as f: pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
def _get_word_in_sentence_at_pos(id, s_num, pos): stanparse_data = get_stanparse_data() sentence = stanparse_data[id]['sentences'][s_num] return sentence['words'][pos-1][1]['Lemma'].lower()
def _get_word_in_sentence_at_pos(id, s_num, pos): stanparse_data = get_stanparse_data() sentence = stanparse_data[id]['sentences'][s_num] return sentence['words'][pos - 1][1]['Lemma'].lower()