def bind_more_if_same(): word1 = words[i-2] word1a = words[i-1] if not word1a.items or word1a.items[0].pos != 'adj': return False if not same(word1, word1a, pos_check=False): return False word2 = words[i+1] with_2a = False if i+2 < M: word2a = words[i+2] if word2a.items and word2a.items[0].pos == 'adj': if same(word2, word2a, pos_check=False): with_2a = True if same(word1, word2): print "// #%d #%d ET #%d (#%d): %s == %s" % (i-2, i-1, i+1, i+2, word1.surface_utf8(), word2.surface_utf8()) cl = AndOr(u'et') word1.add_modifier(word1a) cl.add([word1]) visited.add(i-2) visited.add(i-1) if with_2a: word2.add_modifier(word2a) visited.add(i+2) cl.add([word2]) visited.add(i+1) words[i-2] = cl ao_loc.add(i-2) return True else: return False
def bind_two_if_same(): word1 = words[i-1] word2 = words[i+1] if same(word1, word2): print "// #%d ET #%d: %s == %s" % (i-1, i+1, word1.surface_utf8(), word2.surface_utf8()) cl = AndOr(u'et') cl.add([word1]) cl.add([word2]) for j in range(i-1, i+2): visited.add(j) words[i-1] = cl ao_loc.add(i-1) return True else: return False
def detect(and_or_word): ao_indices = [word_indices_in_group(and_or_word, group) for group in groups] for i, aos in enumerate(ao_indices): num_of_ao = len(aos) if num_of_ao >= 2: upper = and_or_word.upper() # print " %s-%s- found in #%d" % (upper, upper, i), aos cl = AndOr(and_or_word) # 区間が確定しているもの(=最後の1つ以外)をまず追加 for j in range(num_of_ao-1): this_et_idx = aos[j] next_et_idx = aos[j+1] cl.add(words[this_et_idx+1:next_et_idx]) # 最後の1つは、どこで終わるか確かめながら追加 last_et_idx = aos[num_of_ao-1] end_idx = groups[i][-1] + 1 ws = [] for idx in range(last_et_idx+1, end_idx): word = words[idx] first_item = word.items[0] if cl.pos == 'noun': # 格変化のある語に限る if first_item._ is None: break if non_genitive(first_item._): cases = [x[0] for x in first_item._] # これまでの物と格が一致する可能性がなければ排除 cases_x = filter(lambda case:case in cases, cl.cases) if not cases_x: break ws.append(word) idx += 1 if ws: cl.add(ws) else: pass # ERROR: type mismatch # print "CL: [%d..%d)" % (aos[0], idx) # cl.dump() for j in range(aos[0], idx): # words[j] = None # visited[j] = True visited.add(j) cl.restrict() words[aos[0]] = cl ao_loc.add(aos[0])