예제 #1
0
class PythonSourceToFile(Module):
    """
    This is the class for specifying a python code snippet for running
    with Hadoop Streaming, it will take its contents and output to a
    temporary Python file. The code will not be passed around.
    
    """
    _settings = ModuleSettings(
        namespace='hadoop',
        configure_widget=PythonSourceToFileConfigurationWidget)

    _input_ports = [
        IPort('Input File', File),
        IPort('source', String, optional=True)
    ]

    _output_ports = [OPort('Temporary File', File)]

    def compute(self):
        inputFile = self.force_get_input('Input File')

        if inputFile != None:
            #            tempFile = file_pool.make_local_copy(inputFile.name)
            tempFile = inputFile
        else:
            source = urllib.unquote(self.force_get_input('source', ''))
            tempFile = self.interpreter.filePool.create_file()
            f = open(tempFile.name, 'w')
            f.write(source)
            f.close()
        self.set_output('Temporary File', tempFile)
예제 #2
0
class vtkPolyDataInspector(vtkDataSetInspector):

    _settings = ModuleSettings(abstract=False, signature=vtk_hasher)
    _input_ports = [('SetInputConnection0', 'vtkAlgorithmOutput'),
                    ('SetInput', 'vtkDataSet'),
                    ]
    _output_ports = [('GetVerts', 'vtkCellArray'),
                     ('GetLines', 'vtkCellArray'),
                     ('GetPolys', 'vtkCellArray'),
                     ('GetStrips', 'vtkCellArray'),
                     ('GetPoints', 'vtkPoints'),
                     ('GetNumberOfVerts', [Integer]),
                     ('GetNumberOfLines', [Integer]),
                     ('GetNumberOfPolys', [Integer]),
                     ('GetNumberOfStrips', [Integer]),
                     ]

    def compute(self):
        vtk_object = None
        if self.has_input("SetInputConnection0"):
            port_object = self.get_input("SetInputConnection0")
            if hasattr(port_object, "vtkInstance"):
                port_object = port_object.vtkInstance
            producer = port_object.GetProducer()
            try:
                vtk_object = producer.GetOutput()
            except AttributeError:
                raise ModuleError(self, 
                                  "expected a module that supports GetOutput")
        elif self.has_input("SetInput"):
            vtk_object = self.get_input("SetInput")
            if hasattr(vtk_object, "vtkInstance"):
                vtk_object = vtk_object.vtkInstance
        if vtk_object:
            self.auto_set_results(vtk_object)
예제 #3
0
class RichTextOutput(FileOutput):
    _settings = ModuleSettings(
        configure_widget=
        "vistrails.gui.modules.output_configuration:OutputModuleConfigurationWidget"
    )
    _input_ports = [('value', 'File')]
    _output_modes = [HtmlToFileMode, (FileToStdoutMode, 50), IPythonHtmlMode]
예제 #4
0
class RichTextOutput(OutputModule):
    _settings = ModuleSettings(
        configure_widget=
        "vistrails.gui.modules.output_configuration:OutputModuleConfigurationWidget"
    )
    # need specific spreadsheet richtext mode here
    pass
예제 #5
0
파일: bases.py 프로젝트: hjanime/VisTrails
class MplFigureOutput(OutputModule):
    _settings = ModuleSettings(
        configure_widget=
        "vistrails.gui.modules.output_configuration:OutputModuleConfigurationWidget"
    )
    _input_ports = [('value', 'MplFigure')]
    _output_modes = [MplFigureToFile, MplIPythonMode]
예제 #6
0
class vtkDataSetInspector(vtkBaseInspector):

    _settings = ModuleSettings(abstract=False, signature=vtk_hasher)
    _input_ports = [('SetInputConnection0', 'vtkAlgorithmOutput'),
                    ('SetInput', 'vtkDataSet'),
                    ]
    _output_ports = [('GetBounds', [Float] * 6),
                     ('GetScalarRange', [Float] * 2),
                     ('GetLength', [Float]),
                     ('GetCenter', [Float] * 3),
                     ('GetNumberOfPoints', [Integer]),
                     ('GetNumberOfCells', [Integer]),
                     ('GetPointData', 'vtkPointData'),
                     ('GetCellData', 'vtkCellData'),
                     ]

    def compute(self):
        port_object = None
        if self.has_input("SetInputConnection0"):
            ic = self.get_input("SetInputConnection0")
            if hasattr(ic, "vtkInstance"):
                ic = ic.vtkInstance
            producer = ic.GetProducer()
            try:
                port_object = producer.GetOutput()
            except AttributeError:
                raise ModuleError(self, 
                                  "expected a module that supports GetOutput")
        elif self.has_input("SetInput"):
            port_object = self.get_input("SetInput")
            if hasattr(port_object, "vtkInstance"):
                port_object = port_object.vtkInstance
        if port_object:
            self.auto_set_results(port_object)
예제 #7
0
class URICreator(HadoopBaseModule):
    """
    The class for caching HDFS file onto the TaskNode local drive
    
    """
    _settings = ModuleSettings(namespace='hadoop')
    _input_ports = [IPort('HDFS File/URI', String),
                    IPort('Symlink',       String),
                    IPort('Machine',        
                          '(org.vistrails.vistrails.remoteq:Machine)')]

    _output_ports = [OPort('Machine',
                           '(org.vistrails.vistrails.remoteq:Machine)'),
                     OPort('URI', String)]

    def compute(self):
        machine = self.get_machine()
        jm = self.job_monitor()
        id = self.signature
        job = jm.getCache(id)
        if not job:
            uri = self.force_get_input('HDFS File/URI')
            symlink = self.force_get_input('Symlink')
            if uri==None or symlink==None:
                raise ModuleError(self,
                                "Missing 'HDFS File/URI' or 'Symlink' values")
            if '://' not in uri:
                uri = self.add_prefix(uri, machine)
            uri += '#' + symlink
            d = {'uri':uri}
            self.set_job_machine(d, machine)
            jm.setCache(id, d, self.job_name())
            job = jm.getCache(id)
        self.set_output('URI', job.parameters['uri'])
        self.set_output('Machine', machine)
예제 #8
0
class TrainTestSplit(Module):
    """Split data into training and testing randomly."""
    _settings = ModuleSettings(namespace="cross-validation")
    _input_ports = [("data", "basic:List", {
        'shape': 'circle'
    }), ("target", "basic:List", {
        'shape': 'circle'
    }), ("test_size", "basic:Float", {
        "defaults": [.25]
    })]
    _output_ports = [("training_data", "basic:List", {
        'shape': 'circle'
    }), ("training_target", "basic:List", {
        'shape': 'circle'
    }), ("test_data", "basic:List", {
        'shape': 'circle'
    }), ("test_target", "basic:List", {
        'shape': 'circle'
    })]

    def compute(self):
        X_train, X_test, y_train, y_test = \
            train_test_split(self.get_input("data"), self.get_input("target"),
                             test_size=try_convert(self.get_input("test_size")))
        self.set_output("training_data", X_train)
        self.set_output("training_target", y_train)
        self.set_output("test_data", X_test)
        self.set_output("test_target", y_test)
예제 #9
0
class HDFSEnsureNew(HadoopBaseModule):
    """
    Make sure the file is removed
    
    """
    _settings = ModuleSettings(namespace='hadoop')
    _input_ports = [IPort('Name', String),
                    IPort('Machine', '(org.vistrails.vistrails.remoteq:Machine)')]

    _output_ports = [OPort('Machine', '(org.vistrails.vistrails.remoteq:Machine)'),
                     OPort('Name', String)]

    def __init__(self):
        HadoopBaseModule.__init__(self)

    def compute(self):
        machine = self.get_machine()
        jm = self.job_monitor()
        id = self.signature
        job = jm.getCache(id)
        if not job:
            entry_name = self.get_input('Name')
            if '://' not in entry_name:
                entry_name = self.add_prefix(entry_name, machine)
            if not int(self.call_hdfs('dfs -test -e ' + entry_name +
                                      '; echo $?', machine)):
                #self.call_hdfs('dfs -rm -r ' + entry_name, machine)
                # we are using -rmr but it is deprecated
                self.call_hdfs('dfs -rmr ' + entry_name, machine)
            d = {'entry_name':entry_name}
            self.set_job_machine(d, machine)
            jm.setCache(id, d, self.job_name())
            job = jm.getCache(id)
        self.set_output('Name', job.parameters['entry_name'])
        self.set_output('Machine', machine)
예제 #10
0
파일: common.py 프로젝트: licode/VisTrails
class TableOutput(OutputModule):
    _settings = ModuleSettings(
        configure_widget=
        "vistrails.gui.modules.output_configuration:OutputModuleConfigurationWidget"
    )
    _input_ports = [('value', 'Table')]
    _output_modes = [TableToFileMode]
예제 #11
0
파일: hdfs.py 프로젝트: hjanime/VisTrails
class HDFSGet(HadoopBaseModule):
    """
    Getting a file from the Hadoop DFS
    Then getting it from the server
    
    """
    _settings = ModuleSettings(namespace='hadoop')
    _input_ports = [
        IPort('Local File', Path),
        IPort('Remote Location', String),
        IPort('Override', Boolean),
        IPort('Machine', '(org.vistrails.vistrails.remoteq:Machine)')
    ]

    _output_ports = [
        OPort('Machine', '(org.vistrails.vistrails.remoteq:Machine)'),
        OPort('Local File', File)
    ]

    def __init__(self):
        HadoopBaseModule.__init__(self)

    def compute(self):
        machine = self.get_machine()
        jm = self.job_monitor()
        id = self.signature
        job = jm.getCache(id)
        if not job:
            remote = self.get_input('Remote Location')
            local = self.get_input('Local File')
            override = self.force_get_input('Override', False)
            if '://' not in remote:
                remote = self.add_prefix(remote, machine)
            if os.path.exists(local.name):
                if override == False:
                    raise ModuleError(self, 'Output already exists')
                else:
                    if os.path.isdir(local.name):
                        shutil.rmtree(local.name)
                    else:
                        os.unlink(local.name)

            tempfile = machine.remote.send_command('mktemp -d -u').strip()
            result = self.call_hdfs('dfs -get %s %s' % (remote, tempfile),
                                    machine)
            # too slow with many files
            #res = machine.send_command("get -r %s %s" % (tempfile, local.name) )
            # tar files to increase speed
            result = machine.local.send_command('mkdir %s' % local.name)
            result = machine.sync(local.name,
                                  tempfile,
                                  mode=machine.MODE_REMOTE_LOCAL,
                                  use_tar=True)
            result = machine.remote.rm(tempfile, force=True, recursively=True)
            d = {'remote': remote, 'local': local.name}
            self.set_job_machine(d, machine)
            jm.setCache(id, d, self.job_name())
            job = jm.getCache(id)
        self.set_output('Local File', PathObject(job.parameters['local']))
        self.set_output('Machine', machine)
예제 #12
0
class FileOutput(OutputModule):
    _settings = ModuleSettings(
        configure_widget=
        "vistrails.gui.modules.output_configuration:OutputModuleConfigurationWidget"
    )
    _input_ports = [('value', 'File')]
    # Stdout is low priority, probably a bad plan
    _output_modes = [(FileToStdoutMode, 50), (FileToFileMode, 200)]
예제 #13
0
class ImageOutput(FileOutput):
    _settings = ModuleSettings(
        configure_widget=
        "vistrails.gui.modules.output_configuration:OutputModuleConfigurationWidget"
    )
    _input_ports = [('value', 'File')]
    # FileToStdoutMode is disabled, since it's definitely binary
    _output_modes = [FileToFileMode, (FileToStdoutMode, -1)]
예제 #14
0
class FileOutput(OutputModule):
    _settings = ModuleSettings(
        configure_widget=
        "vistrails.gui.modules.output_configuration:OutputModuleConfigurationWidget"
    )
    # should set file as a higher priority here...
    _input_ports = [('value', 'File')]
    _output_modes = [FileToStdoutMode, FileToFileMode]
예제 #15
0
class Abstraction(Group):
    # We need Abstraction to be a subclass of Group so that the hierarchy of
    # modules is right
    # But the pipeline comes from somewhere else, so skip the transfer_attrs()
    _settings = ModuleSettings(name="SubWorkflow", hide_descriptor=True)

    def transfer_attrs(self, module):
        Module.transfer_attrs(self, module)
예제 #16
0
class SubmitShellJob(BaseSubmitJob):
    """Submits a shell script.
    """
    _settings = ModuleSettings(
        configure_widget=('%s.widgets' % this_pkg,
                          'ShellSourceConfigurationWidget'))
    _input_ports = [('source', '(basic:String)')]
    _output_ports = [('stderr', '(basic:File)'), ('stdout', '(basic:File)')]

    def job_start(self, params):
        """Creates a temporary job with the given source, upload and submit it.
        """
        queue = QueueCache.get(params['destination'], params['queue'],
                               params.get('setup_runtime') or None,
                               params.get('need_runtime') or None)

        # First, check if job already exists
        try:
            with ServerLogger.hide_output():
                queue.status(params['job_id'])
        except (tej.JobNotFound, tej.QueueDoesntExist):
            pass
        else:
            return params

        # Alright, submit a new job
        directory = self.interpreter.filePool.create_directory(
            prefix='vt_tmp_shelljob_').name
        # We use io.open() here because we could be writing scripts on Windows
        # before uploading them to a POSIX server
        source = urllib.unquote(self.get_input('source'))
        if isinstance(source, bytes):
            kwargs = {'mode': 'wb'}
        else:
            kwargs = {'mode': 'w', 'newline': '\n'}
        with io.open(os.path.join(directory, 'start.sh'), **kwargs) as fp:
            fp.write(source)

        queue.submit(params['job_id'], directory)

        return params

    def job_set_results(self, params):
        """Gets stderr and stdout.
        """
        super(SubmitShellJob, self).job_set_results(params)

        temp_dir = self.interpreter.filePool.create_directory(
            prefix='vt_tmp_shelljobout_').name
        queue = QueueCache.get(params['destination'], params['queue'],
                               params.get('setup_runtime') or None,
                               params.get('need_runtime') or None)
        queue.download(params['job_id'], ['_stderr', '_stdout'],
                       directory=temp_dir)
        self.set_output('stderr', PathObject(os.path.join(temp_dir,
                                                          '_stderr')))
        self.set_output('stdout', PathObject(os.path.join(temp_dir,
                                                          '_stdout')))
예제 #17
0
class TFOperation(Module):
    """A TensorFlow operation that will be run by Run as part of the graph.
    """
    _settings = ModuleSettings(abstract=True)
    _output_ports = [('output',
                      '(org.vistrails.vistrails.tensorflow:TFOperation)')]

    def compute(self):
        raise NotImplementedError
예제 #18
0
파일: common.py 프로젝트: licode/VisTrails
class BuildTable(Module):
    """Builds a table by putting together columns from multiple sources.

    Input can be a mix of lists, which will be used as single columns, and
    whole tables, whose column names will be mangled.
    """
    _settings = ModuleSettings(
        configure_widget='vistrails.packages.tabledata.widgets:BuildTableWidget'
    )
    _output_ports = [('value', Table)]

    def __init__(self):
        Module.__init__(self)
        self.input_ports_order = []

    def transfer_attrs(self, module):
        Module.transfer_attrs(self, module)
        self.input_ports_order = [p.name for p in module.input_port_specs]

    def compute(self):
        items = None
        if self.input_ports_order:  # pragma: no branch
            items = [(p, self.get_input(p)) for p in self.input_ports_order]
        if not items:
            raise ModuleError(self, "No inputs were provided")

        nb_rows = None
        cols = []
        names = []
        for portname, item in items:
            if isinstance(item, TableObject):
                if nb_rows is not None:
                    if item.rows != nb_rows:
                        raise ModuleError(
                            self, "Different row counts: %d != %d" %
                            (item.rows, nb_rows))
                else:
                    nb_rows = item.rows
                cols.extend(item.get_column(c) for c in xrange(item.columns))
                if item.names is not None:
                    names.extend(item.names)
                else:
                    names.extend("%s col %d" % (portname, i)
                                 for i in xrange(len(cols) - len(names)))
            else:
                if nb_rows is not None:
                    if len(item) != nb_rows:
                        raise ModuleError(
                            self, "Different row counts: %d != %d" %
                            (len(item), nb_rows))
                else:
                    nb_rows = len(item)
                cols.append(item)
                names.append(portname)

        self.set_output('value', TableObject(cols, nb_rows, names))
예제 #19
0
class SubmitJob(AssembleDirectoryMixin, BaseSubmitJob):
    """Submits a generic job (a directory).
    """
    _settings = ModuleSettings(
        configure_widget=('%s.widgets' % this_pkg,
                          'DirectoryConfigurationWidget'))
    _input_ports = [('job', '(basic:Directory)', {
        'optional': True
    }),
                    ('script', '(basic:String)', {
                        'optional': True,
                        'defaults': "['start.sh']"
                    })]

    def __init__(self):
        AssembleDirectoryMixin.__init__(self)
        Module.__init__(self)

    def job_start(self, params):
        """Sends the directory and submits the job.
        """
        queue = QueueCache.get(params['destination'], params['queue'],
                               params.get('setup_runtime') or None,
                               params.get('need_runtime') or None)

        # First, check if job already exists
        try:
            with ServerLogger.hide_output():
                queue.status(params['job_id'])
        except (tej.JobNotFound, tej.QueueDoesntExist):
            pass
        else:
            return params

        if self.has_input('job'):
            job_dir = self.get_input('job')
            if not os.path.exists(job_dir.name):
                raise ModuleError(self, "Directory doesn't exist")
        else:
            job_dir = None

        # Use AssembleDirectoryMixin to get additional files from port specs
        job_dir = self.assemble_directory(job_dir, False)

        # Check that the script exists
        script = self.get_input('script')
        if not os.path.exists(os.path.join(job_dir.name, script)):
            raise ModuleError(self, "Script does not exist")

        # Alright, submit a new job
        queue.submit(params['job_id'], job_dir.name, script)
        return params
예제 #20
0
class UnsupervisedEstimator(Estimator):
    """Base class for all sklearn transformer modules."""
    _settings = ModuleSettings(abstract=True)

    def compute(self):
        params = dict([(p, try_convert(self.get_input(p)))
                       for p in self.inputPorts
                       if p not in ["training_data", "training_target"]])
        trans = self._estimator_class(**params)
        if "training_data" in self.inputPorts:
            training_data = np.vstack(self.get_input("training_data"))
            trans.fit(training_data)
        self.set_output("model", trans)
예제 #21
0
class ManifoldLearner(Module):
    """Base class for all sklearn manifold modules.
    """
    _settings = ModuleSettings(abstract=True)
    _output_ports = [("transformed_data", "basic:List", {'shape': 'circle'})]

    def compute(self):
        params = dict([(p, try_convert(self.get_input(p)))
                       for p in self.inputPorts if p not in ["training_data"]])
        trans = self._estimator_class(**params)
        training_data = np.vstack(self.get_input("training_data"))
        transformed_data = trans.fit_transform(training_data)
        self.set_output("transformed_data", transformed_data)
예제 #22
0
파일: init.py 프로젝트: hjanime/VisTrails
class vtkRendererOutput(OutputModule):
    _settings = ModuleSettings(
        configure_widget="vistrails.gui.modules."
        "output_configuration:OutputModuleConfigurationWidget")
    _input_ports = [('value', 'vtkRenderer', {
        'depth': 1
    }), ('interactorStyle', 'vtkInteractorStyle'),
                    ('picker', 'vtkAbstractPicker')]
    _output_modes = [vtkRendererToFile, vtkRendererToIPythonMode]
    if registry.has_module('org.vistrails.vistrails.spreadsheet',
                           'SpreadsheetCell'):
        from .vtkcell import vtkRendererToSpreadsheet
        _output_modes.append(vtkRendererToSpreadsheet)
예제 #23
0
class Iris(Module):
    """Example dataset: iris.
    """
    _settings = ModuleSettings(namespace="datasets")
    _output_ports = [("data", "basic:List", {
        'shape': 'circle'
    }), ("target", "basic:List", {
        'shape': 'circle'
    })]

    def compute(self):
        data = datasets.load_iris()
        self.set_output("data", data.data)
        self.set_output("target", data.target)
예제 #24
0
class MakeDirectory(AssembleDirectoryMixin, Module):
    """Creates a temporary directory and puts the given files in it.
    """
    _settings = ModuleSettings(configure_widget=(
            '%s.widgets' % this_pkg, 'DirectoryConfigurationWidget'))
    _output_ports = [('directory', '(basic:Directory)')]

    def __init__(self):
        AssembleDirectoryMixin.__init__(self)
        Module.__init__(self)

    def compute(self):
        directory = self.assemble_directory()
        self.set_output('directory', directory)
예제 #25
0
class vtkDataArrayInspector(vtkBaseInspector):

    _settings = ModuleSettings(abstract=False, signature=vtk_hasher)
    _input_ports = [('SetInput', 'vtkDataArray')]
    _output_ports = [('GetMaxNorm', [Float]), ('GetRange', [Float] * 2)]

    def compute(self):
        vtk_object = None
        if self.has_input("SetInput"):
            vtk_object = self.get_input("SetInput")
            if hasattr(vtk_object, "vtkInstance"):
                vtk_object = vtk_object.vtkInstance
        if vtk_object:
            self.auto_set_results(vtk_object)
예제 #26
0
class minimize(TFOperation):
    __doc__ = tensorflow.train.Optimizer.__doc__

    _settings = ModuleSettings(namespace='train|optimizer')
    _input_ports = [('optimizer', Optimizer), ('loss', TFOperation),
                    ('global_step', Variable, {
                        'optional': True
                    }), ('var_list', Variable, {
                        'depth': 1,
                        'optional': True
                    }),
                    ('gate_gradients', '(basic:String)', {
                        'optional': True,
                        'entry_types': '["enum"]',
                        'values': '[["GATE_NONE", "GATE_OP", "GATE_GRAPH"]]'
                    }), ('name', '(basic:String)', {
                        'optional': True
                    })]

    _GATE_GRADIENTS = {
        'GATE_NONE': tensorflow.train.Optimizer.GATE_NONE,
        'GATE_OP': tensorflow.train.Optimizer.GATE_OP,
        'GATE_GRAPH': tensorflow.train.Optimizer.GATE_GRAPH
    }

    def compute(self):
        if self.has_input('gate_gradients'):
            gate_gradients = self._GATE_GRADIENTS[self.get_input(
                'gate_gradients')]
        else:
            gate_gradients = None
        name = self.force_get_input('name')

        def output(optimizer, loss, **kwargs):
            kw = {'loss': loss, 'name': name}
            if gate_gradients is not None:
                kw['gate_gradients'] = gate_gradients
            kw.update(kwargs)
            ret = optimizer.minimize(**kw)
            return ret

        kwargs = {
            'optimizer': self.get_input('optimizer'),
            'loss': self.get_input('loss')
        }
        if self.has_input('global_step'):
            kwargs['global_step'] = self.get_input('global_step')
        if self.has_input('var_list'):
            kwargs['var_list'] = self.get_input('var_list')
        self.set_output('output', Op(output, kwargs))
예제 #27
0
class vtkRendererOutput(OutputModule):
    # DAK: no render view here, use a separate module for this...
    _settings = ModuleSettings(
        configure_widget=
        "vistrails.gui.modules.output_configuration:OutputModuleConfigurationWidget"
    )
    _input_ports = [('value', 'vtkRenderer')]
    # DK: these ports can be enabled, I think, just
    # have to be laoded without the spreadsheet being
    # enabled
    # ('interactionHandler', 'vtkInteractionHandler'),
    # ('interactorStyle', 'vtkInteractorStyle'),
    # ('picker', 'vtkAbstractPicker')]
    _output_modes = [vtkRendererToFile]
예제 #28
0
def gen_class_module(spec, lib, klasses, **module_settings):
    """Create a module from a python class specification

    Parameters
    ----------
    spec : ClassSpec
        A class to module specification
    """
    module_settings.update(spec.get_module_settings())
    _settings = ModuleSettings(**module_settings)

    # convert input/output specs into VT port objects
    input_ports = [
        CIPort(ispec.name, ispec.get_port_type(), **ispec.get_port_attrs())
        for ispec in spec.input_port_specs
    ]
    output_ports = [
        COPort(ospec.name, ospec.get_port_type(), **ospec.get_port_attrs())
        for ospec in spec.output_port_specs
    ]
    output_ports.insert(0,
                        COPort('Instance',
                               spec.module_name))  # Adds instance output port

    _input_spec_table = {}
    for ps in spec.input_port_specs:
        _input_spec_table[ps.name] = ps
    _output_spec_table = {}
    for ps in spec.output_port_specs:
        _output_spec_table[ps.name] = ps

    d = {
        '__module__': __name__,
        '_settings': _settings,
        '__doc__': spec.docstring,
        '__name__': spec.name or spec.module_name,
        '_input_ports': input_ports,
        '_output_ports': output_ports,
        '_input_spec_table': _input_spec_table,
        '_output_spec_table': _output_spec_table,
        '_module_spec': spec,
        'is_cacheable': lambda self: spec.cacheable,
        '_lib': lib
    }

    superklass = klasses.get(spec.superklass, BaseClassModule)
    new_klass = type(str(spec.module_name), (superklass, ), d)
    klasses[spec.module_name] = new_klass
    return new_klass
예제 #29
0
파일: hdfs.py 프로젝트: hjanime/VisTrails
class HDFSPut(HadoopBaseModule):
    """
    Putting a local file to the Hadoop DFS
    First copying it to the server
    """
    _settings = ModuleSettings(namespace='hadoop')
    _input_ports = [
        IPort('Local File', File),
        IPort('Remote Location', String),
        IPort('Override', Boolean),
        IPort('Machine', '(org.vistrails.vistrails.remoteq:Machine)')
    ]

    _output_ports = [
        OPort('Machine', '(org.vistrails.vistrails.remoteq:Machine)'),
        OPort('Remote Location', String)
    ]

    def __init__(self):
        HadoopBaseModule.__init__(self)

    def compute(self):
        machine = self.get_machine()
        jm = self.job_monitor()
        id = self.signature
        job = jm.getCache(id)
        if not job:
            remote = self.get_input('Remote Location')
            local = self.get_input('Local File')
            override = self.force_get_input('Override', False)
            if '://' not in remote:
                remote = self.add_prefix(remote, machine)
            if not int(
                    self.call_hdfs('dfs -test -e ' + remote + '; echo $?',
                                   machine)):
                if override:
                    self.call_hdfs('dfs -rm -r ' + remote, machine)
                else:
                    raise ModuleError(self, 'Remote entry already exists')
            tempfile = machine.remote.send_command('mktemp -u').strip()
            result = machine.sendfile(local.name, tempfile)
            self.call_hdfs('dfs -put %s %s' % (tempfile, remote), machine)
            result = machine.remote.rm(tempfile, force=True, recursively=True)
            d = {'remote': remote, 'local': local.name}
            self.set_job_machine(d, machine)
            jm.setCache(id, d, self.job_name())
            job = jm.getJob(id)
        self.set_output('Remote Location', job.parameters['remote'])
        self.set_output('Machine', machine)
예제 #30
0
class PersistedDir(PersistedPath):
    """Records a directory in the file store.
    """

    _input_ports = [
            IPort('path', Directory),
            IPort('metadata', Metadata, optional=True)]
    _output_ports = [
            OPort('path', Directory)]
    _settings = ModuleSettings(configure_widget=
            'vistrails.packages.persistent_archive.widgets:SetMetadataWidget')

    def check_path_type(self, path):
        if not os.path.isdir(path):
            raise ModuleError(self, "Path is not a directory")