def validate_nodes(sentence1, sentence2): """ Compares tag and text nodes between two sentences :return is_valid (Boolean), reason (String) """ # Extract tags position sent1_tags = get_tags_positions(sentence1) sent2_tags = get_tags_positions(sentence2) # Extract text and tags position sent1_pos, _ = _split_text_tags(sentence1, sent1_tags) sent2_pos, _ = _split_text_tags(sentence2, sent2_tags) # Get list with separate node strings sent1_nodes = [sentence1[i[0]:i[1]] for i in sent1_pos if i[0] != i[1]] sent2_nodes = [sentence2[i[0]:i[1]] for i in sent2_pos if i[0] != i[1]] if len(sent1_nodes) != len(sent2_nodes): return False, u'Number of nodes don\'t match' for i, (s1, s2) in enumerate(itertools.izip(sent1_nodes, sent2_nodes)): s1_type = u'tag' if patt.match(s1) else u'text' s2_type = u'tag' if patt.match(s2) else u'text' if s1_type != s2_type: return False, (u'Nodes on index {} have different types. ' u'Sentence 1: {}, ' u'Sentence 2: {}').format(i, s1_type, s2_type) return True, None