Python zipped_flatten примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils.utils

Метод/Функция: zipped_flatten

Примеров на hotexamples.com: 2

Python zipped_flatten - 2 примера найдено. Это лучшие примеры Python кода для utils.utils.zipped_flatten, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

    def load_data(self, path):
        if ".pickle" in path:
            print("Loading data from: {}".format(path))
            data_utils.load_existing_data_loader(self, path)

            return True

        for split in self.data:
            file_name = "v4_atomic_{}.csv".format(map_name(split))

            df = pandas.read_csv("{}/{}".format(path, file_name), index_col=0)
            df.iloc[:, :9] = df.iloc[:, :9].apply(
                lambda col: col.apply(json.loads))

            for cat in self.categories:
                attr = df[cat]
                self.data[split]["total"] += utils.zipped_flatten(
                    zip(attr.index, ["<{}>".format(cat)] * len(attr),
                        attr.values))

        if do_take_partial_dataset(self.opt.data):
            self.data["train"]["total"] = select_partial_dataset(
                self.opt.data, self.data["train"]["total"])

        return False

Пример #2

Показать файл

Файл: atomic.py Проект: mnskim/teamproject

    def load_data(self, path):
        if ".pickle" in path:
            print("Loading data from: {}".format(path))
            data_utils.load_existing_data_loader(self, path)

            return True

        for split in self.data:
            if split == 'train':
                n_data = self.n_train
            elif split == 'dev':
                n_data = self.n_dev
            elif split == 'test':
                n_data = self.n_test

            # Read & load ATOMIC dataset file
            file_name = "v4_atomic_{}.csv".format(map_name(split))

            df = pandas.read_csv("{}/{}".format(path, file_name), index_col=0)
            df.iloc[:, :9] = df.iloc[:, :9].apply(lambda col: col.apply(json.loads))
 
            if self.comet:
                """
                For replicating original COMET settings we don't need a graph.
                """
                for cat in [item for item in self.categories if not 'Inverse' in item]:
                    attr = df[cat]
                    self.data[split]["total"] += utils.zipped_flatten(zip(
                        attr.index, ["<{}>".format(cat)] * len(attr), attr.values))
                #ipdb.set_trace()

            elif self.pathcomet:
                """
                Replicate original COMET, but prepend every s,r with a path from a graph
                """
                comet_orig = {}
                comet_orig['train'] = {"total": []}
                comet_orig['dev'] = {"total": []}
                comet_orig['test'] = {"total": []}

                for cat in [item for item in self.categories if not 'Inverse' in item]:
                    attr = df[cat]
                    #self.data[split]["total"] += utils.zipped_flatten(zip(
                    #    attr.index, ["<{}>".format(cat)] * len(attr), attr.values))
                    comet_orig[split]["total"] += utils.zipped_flatten(zip(
                        attr.index, ["<{}>".format(cat)] * len(attr), attr.values))
                
                comet_orig[split]["total"] = [list(item) for item in comet_orig[split]["total"]] # Convert tuples into list

                # Build graph
                #G=nx.Graph()
                G=nx.DiGraph()
                entities = set()

               
                for cat in [item for item in self.categories if not 'Inverse' in item]:
                    attr = df[cat]
                    triples = utils.zipped_flatten(zip(attr.index, ["<{}>".format(cat)] * len(attr), attr.values))

                    # Add to the graph
                    for triple in triples:
                        m1, rel, m2 = triple
                        entities.add(m1)
                        entities.add(m2)
                        G.add_node(m1, type='subj')
                        G.add_node(m2, type='obj')
                        G.add_edge(m1, m2, rel=rel) 
                        G.add_edge(m2, m1, rel=rel.replace('>','Inverse>')) # Inverse relation

                        #ipdb.set_trace()
                    #self.data[split]["total"] += utils.zipped_flatten(zip(
                    #    attr.index, ["<{}>".format(cat)] * len(attr), attr.values))

                examples = []
                for base_subj, base_rel, base_obj in comet_orig[split]["total"]:
                    unique_paths = set()

                    for _ in range(self.n_per_node[split]):
                        curr_node = base_subj
                        walk = data_utils.Path(curr_node)
                        walk.nodes.add(base_obj) # We don't want to see the target object in the input path

                        n_attempts = 0
                        while len(walk.walk) * 1 < self.max_path_len:
                            obj, relation, dead_end = data_utils.single_step_reverse(curr_node, G)
                            if dead_end:
                                n_attempts += 1
                                break
                            updated = walk.update(obj, relation, prepend=True)
                            if updated:
                                curr_node = obj
                            else:
                                n_attempts += 1

                            if n_attempts > 10 :
                                break

                        if not ' '.join(walk.walk + [base_rel] + [base_obj]) in unique_paths:
                            assert walk.walk[-1] == base_subj
                            walk.walk.append(base_rel)
                            walk.walk.append(base_obj)
                            examples.append(walk.walk)
                            unique_paths.add(' '.join(walk.walk))

                        #ipdb.set_trace()

                        if len(examples) % 500 == 0:
                            print("\nGenerated {} {} examples".format(len(examples), split))
                            print(walk.walk)


                        #if len(examples) >= n_data:
                        #    break
                    #ipdb.set_trace()
                #examples = examples[:n_data]    
                if self.add_orig[split]:
                    self.data[split]["total"] += comet_orig[split]["total"]
                    self.data[split]["total"] += examples
                else:
                    self.data[split]["total"] = examples 
                #ipdb.set_trace()
 

            else:
                """
                Graph based path data generation
                """

                #G=nx.Graph()
                G=nx.DiGraph()
                entities = set()

               
                for cat in [item for item in self.categories if not 'Inverse' in item]:
                    attr = df[cat]
                    triples = utils.zipped_flatten(zip(attr.index, ["<{}>".format(cat)] * len(attr), attr.values))

                    # Add to the graph
                    for triple in triples:
                        m1, rel, m2 = triple
                        entities.add(m1)
                        entities.add(m2)
                        G.add_node(m1, type='subj')
                        G.add_node(m2, type='obj')
                        G.add_edge(m1, m2, rel=rel) 
                        G.add_edge(m2, m1, rel=rel.replace('>','Inverse>'))# Inverse relation

                        #ipdb.set_trace()
                    #self.data[split]["total"] += utils.zipped_flatten(zip(
                    #    attr.index, ["<{}>".format(cat)] * len(attr), attr.values))


                examples = []
                all_nodes = list(G.nodes())
                random.shuffle(all_nodes)
                for node in all_nodes:
                    unique_paths = set() # Use for filtering out duplicate paths starting from the same start_node

                    for _ in range(self.n_per_node[split]):
                        curr_node = node
                        walk = data_utils.Path(curr_node)

                        n_attempts = 0
                        while len(walk.walk) * 1 < self.max_path_len:
                            obj, relation, dead_end = data_utils.single_step(curr_node, G)
                            if dead_end:
                                n_attempts += 1
                                break
                            updated = walk.update(obj, relation)
                            if updated:
                                curr_node = obj
                            else:
                                #ipdb.set_trace()
                                n_attempts += 1
                            #print(walk.walk)

                            if n_attempts > 10 :
                                break

                        if not ' '.join(walk.walk) in unique_paths:
                            examples.append(walk.walk)
                            unique_paths.add(' '.join(walk.walk))
                            #print(' '.join(walk.walk))

                        if len(examples) % 500 == 0:
                            print("\nGenerated {} {} examples".format(len(examples), split))
                            print(walk.walk)

                    #ipdb.set_trace()

                    if len(examples) >= n_data:
                        break

                examples = examples[:n_data]    
                self.data[split]["total"] = examples 
                #ipdb.set_trace()
                
        if do_take_partial_dataset(self.opt.data):
            self.data["train"]["total"] = select_partial_dataset(
                self.opt.data, self.data["train"]["total"])

        return False