Пример #1
0
class SentenceHolder(object):

    def __init__(self):
        self.item_dicts = []
        self._dict_count = 0
        self.cleaner = StringCleaner()
        self.pairs = None

    def load_file(self, file_name, replace=None):
        if len(self.item_dicts) == 2 and not replace:
            raise RuntimeError("Only two item lists may be loaded at a time.")
        temp_item_dict = {}
        with open(file_name) as f:
            for line in f:
                data = line.split("\t")
                temp_item_dict[int(data[0])] = self.cleaner.clean_string(data[1])
        if replace:
            if replace not in (0, 1):
                raise ValueError("You can only replace item lists at 0 or 1.")
            else:
                self.item_dicts[replace] = temp_item_dict
        else:
            self.item_dicts.append(temp_item_dict)
            self._dict_count += 1

    def make_pairs(self):
        if self._dict_count == 2:
            self._make_single_pairs()
        elif self._dict_count == 1:
            self._make_cross_pairs()
        elif self._dict_count == 0:
            raise RuntimeError("You can only make pairs after loading at least one item list.")

    def _make_single_pairs(self):
        self.pairs = combinations(sorted(self.item_dicts[0].keys()), 2)

    def _make_cross_pairs(self):
        self.pairs = product(sorted(self.item_dicts[0].keys()), sorted(self.item_dicts[1].keys()))

    def get_pair_info(self, left_id, right_id):
        if self._dict_count == 1:
            return (left_id, self.item_dicts[0][left_id]), (right_id, self.item_dicts[0][right_id])
        else:
            return (left_id, self.item_dicts[0][left_id]), (right_id, self.item_dicts[1][right_id])

    def pair_iterator(self):
        for p in self.pairs:
            yield self.get_pair_info(*p)
Пример #2
0
 def __init__(self):
     self.item_dicts = []
     self._dict_count = 0
     self.cleaner = StringCleaner()
     self.pairs = None