class SentenceHolder(object): def __init__(self): self.item_dicts = [] self._dict_count = 0 self.cleaner = StringCleaner() self.pairs = None def load_file(self, file_name, replace=None): if len(self.item_dicts) == 2 and not replace: raise RuntimeError("Only two item lists may be loaded at a time.") temp_item_dict = {} with open(file_name) as f: for line in f: data = line.split("\t") temp_item_dict[int(data[0])] = self.cleaner.clean_string(data[1]) if replace: if replace not in (0, 1): raise ValueError("You can only replace item lists at 0 or 1.") else: self.item_dicts[replace] = temp_item_dict else: self.item_dicts.append(temp_item_dict) self._dict_count += 1 def make_pairs(self): if self._dict_count == 2: self._make_single_pairs() elif self._dict_count == 1: self._make_cross_pairs() elif self._dict_count == 0: raise RuntimeError("You can only make pairs after loading at least one item list.") def _make_single_pairs(self): self.pairs = combinations(sorted(self.item_dicts[0].keys()), 2) def _make_cross_pairs(self): self.pairs = product(sorted(self.item_dicts[0].keys()), sorted(self.item_dicts[1].keys())) def get_pair_info(self, left_id, right_id): if self._dict_count == 1: return (left_id, self.item_dicts[0][left_id]), (right_id, self.item_dicts[0][right_id]) else: return (left_id, self.item_dicts[0][left_id]), (right_id, self.item_dicts[1][right_id]) def pair_iterator(self): for p in self.pairs: yield self.get_pair_info(*p)
def __init__(self): self.item_dicts = [] self._dict_count = 0 self.cleaner = StringCleaner() self.pairs = None