Пример #1
0
def generate_dag(optimal_indvidual, stage_name, num_nodes):
    # create nodes for the graph
    nodes = np.empty((0), dtype=np.str)
    for n in range(1, (num_nodes + 1)):
        nodes = np.append(nodes, ''.join([stage_name, "_", str(n)]))

    # initialize directed asyclic graph (DAG) and add nodes to it
    dag = DAG()
    for n in nodes:
        dag.add_node(n)

    # split best indvidual found via GA to identify vertices connections and connect them in DAG
    edges = np.split(optimal_indvidual, np.cumsum(range(num_nodes - 1)))[1:]
    v2 = 2
    for e in edges:
        v1 = 1
        for i in e:
            if i:
                dag.add_edge(''.join([stage_name, "_",
                                      str(v1)]),
                             ''.join([stage_name, "_",
                                      str(v2)]))
            v1 += 1
        v2 += 1

    # delete nodes not connected to anyother node from DAG
    for n in nodes:
        if len(dag.predecessors(n)) == 0 and len(dag.downstream(n)) == 0:
            dag.delete_node(n)
            nodes = np.delete(nodes, np.where(nodes == n)[0][0])

    return dag, nodes
Пример #2
0
def generate_dag(optimal_indvidual, stage_name, num_nodes):
    # optimal_individual为本stage的二进制字符串
    # create nodes for the graph
    nodes = np.empty((0), dtype=np.str)
    # 给stage的节点命名,比如s1 stage,节点名字为s1_1,s1_2,...
    for n in range(1, (num_nodes + 1)):
        nodes = np.append(nodes, ''.join([stage_name, "_", str(n)]))

    # initialize directed asyclic graph (DAG) and add nodes to it
    # 加入所有节点
    dag = DAG()
    for n in nodes:
        dag.add_node(n)

    # split best indvidual found via GA to identify vertices connections and connect them in DAG
    # cumsum累积和,cumsum([0, 1, 2, 3])返回[0, 1, 3, 6]
    # 在这里体现为比如有4个node,二进制字符串长度为6,切割成s[:0], s[0:1], s[1:3], s[3:6]
    # 即连接每个节点的二进制字符串
    # 最后再删除第一个节点没有连的数据(上面的s[:0])
    edges = np.split(optimal_indvidual, np.cumsum(range(num_nodes - 1)))[1:]
    v2 = 2
    # 遍历所有节点的连接情况
    for e in edges:
        v1 = 1
        # 遍历这个节点的二进制字符串
        # 如果是1,添加边到dag
        # 这里其实for循环替代v1会好看些
        for i in e:
            if i:
                dag.add_edge(''.join([stage_name, "_",
                                      str(v1)]),
                             ''.join([stage_name, "_",
                                      str(v2)]))
            v1 += 1
        v2 += 1

    # delete nodes not connected to anyother node from DAG
    # 删除孤立的点
    for n in nodes:
        if len(dag.predecessors(n)) == 0 and len(dag.downstream(n)) == 0:
            dag.delete_node(n)
            nodes = np.delete(nodes, np.where(nodes == n)[0][0])

    return dag, nodes
Пример #3
0
class Pipeline:

    def __init__(self, input_file):
        self.cfg = self._read(input_file)

        self.info = self.cfg['pipeline']

        self.owner = self.info['owner']
        self.basename = self.info['basename']
        self.version = self.info['version']
        self.dag = DAG()
        self.stages = {}

        for name in self.info['stages']:
            self.stages[name] = self.load_stage(name)
            self.dag.add_node(name)

        for name in self.info['stages']:
            stage_info = self.cfg[name]
            for parent in stage_info['depends-on']:
                self.dag.add_edge(parent, name)

    def build(self):
        for dirname in self.info['images']:
            os.system("cd {}; make".format(dirname))

    def push(self):
        for dirname in self.info['images']:
            os.system("cd {}; make push".format(dirname))

    def pull(self):
        for dirname in self.info['images']:
            os.system("cd {}; make pull".format(dirname))

    def load_stage(self, name):
        for dirname in self.info['images']:
            dirpath = os.path.join(dirname,name)
            dockerfile_path = os.path.join(dirpath, "Dockerfile")
            run_path = os.path.join(dirpath, "run.py")
            if os.path.isdir(dirpath) and os.path.isfile(dockerfile_path) and os.path.isfile(run_path):
                path = run_path
                break
        else:
            raise PipelineError("""No Stage called {} was found - needs to be in one
                of the images directories and contain Dockerfile, run.py""".format(name))

        # We want to load a module based on a python.  The python people keep changing how to do this in obscure
        # ways.  This one is deprecated but works back in python 3.4 which is what centos 7 can provide.
        loader = importlib.machinery.SourceFileLoader(name, path)
        module = loader.load_module()
        return module.Stage


    def input_tags(self):
        "Return a set of all input tags required by the pipeline and not generated inside it."
        pipeline_inputs = set()
        # Find all the inputs expected by the pipeline
        for stage in self.stages.values():
            pipeline_inputs.update(stage.inputs.keys())
        # Remove any stages that are generated by any step in the pipeline
        for stage in self.stages.values():
            pipeline_inputs.difference_update(stage.outputs.keys())

        return pipeline_inputs

    def output_filenames(self):
        outputs = set()
        # Find all the outputs generated by the pipeline
        for stage in self.stages.values():
            outputs += stage.get_output_filenames()

        return outputs



    def _read(self, input_file):
        "Read a YAML file represnting a pipline"
        if not hasattr(input_file, 'read'):
            input_file = open(input_file)
        info = yaml.load(input_file)
        return info



    def image_name(self, name):
        "Return the expected image name for a given stage based on information in the pipeline file"
        return '{}/{}-{}:{}'.format(self.owner,self.basename, name, self.version)


    def sequence(self):
        "Return an acceptable serial ordering for the pipeline elements"
        order = self.dag.topological_sort()
        return [(name,self.stages[name]) for name in order]


    def dependencies(self, name):
        return self.dag.predecessors(name)