Пример #1
0
def toDatasets(dsstrs, delim=r'\s', eqdelim='='):
    out = []
    delim = re.compile(r'[%s]' % delim)
    for dsstr in dsstrs:
        args = delim.split(dsstr)
        type = filter(lambda a: a.find(eqdelim) < 0, args)
        if len(type) > 1:
            fail("Dataset with multiple types specified: %s", dsstr)
        if len(type) == 0:
            type = ["unknown"]
        ds = Dataset(type[0])
        ds.ids = {}
        for arg in args:
            if arg.find(eqdelim) < 0:
                continue

            parts = arg.split(eqdelim, 1)
            try:
                parts[1] = int(parts[1])
            except ValueError:
                pass
            ds.ids[parts[0]] = parts[1]
        out.append(ds)

    return out
Пример #2
0
def toDatasets(dsstrs, delim=r'\s', eqdelim='='):
    out = []
    delim = re.compile(r'[%s]' % delim)
    for dsstr in dsstrs:
        args = delim.split(dsstr)
        type = filter(lambda a: a.find(eqdelim) < 0, args)
        if len(type) > 1:
            fail("Dataset with multiple types specified: %s", dsstr)
        if len(type) == 0:
            type = ["unknown"]
        ds = Dataset(type[0])
        ds.ids = {}
        for arg in args:
            if arg.find(eqdelim) < 0:
                continue

            parts = arg.split(eqdelim, 1)
            try:
                parts[1] = int(parts[1])
            except ValueError:
                pass
            ds.ids[parts[0]] = parts[1]
        out.append(ds)

    return out
Пример #3
0
def toDatasets(lines, ctrl, intids=None):
    if not isinstance(lines, list):
        lines = [lines]
    if intids is None:
        intids = ctrl["intids"]

    out = []
    if ctrl["format"]:
        for line in lines:
            dataset = ctrl["format"].parse(line)
            out.append(dataset)

    else:

        for line in lines:
            if ctrl["iddelim"]:
                args = line.split(ctrl["iddelim"])
            else:
                args = line.split()
            type = filter(lambda a: a.find(ctrl["eqdelim"]) < 0, args)
            if len(type) > 1:
                raise ValueError("Multiple dataset types given: " +
                                 " ".join(type))

            if len(type) == 0:
                type = ["unknown"]
            ds = Dataset(type[0])
            ds.ids = {}
            for arg in args:
                if arg.find(ctrl["eqdelim"]) < 0:
                    continue

                parts = arg.split(ctrl["eqdelim"], 1)
                ds.ids[parts[0]] = parts[1]

            if intids:
                # convert values of selected to integers
                for idname in intids:
                    if ds.ids.has_key(idname):
                        try:
                            ds.ids[idname] = int(ds.ids[idname])
                        except ValueError, ex:
                            raise ValueError("ID %s value is not an int: %s" %
                                             (idname, ds.ids[idname]))

            out.append(ds)
Пример #4
0
    def _determineJobIdentity(self, outputs, inputs=None):
        # return an identifier for the job implied by the outputs and inputs.
        # this identifier is returned in the form of a Dataset type (even
        # though, semantically, it represents a job.
        
        if inputs is None:  inputs = []
        
        if self.jobIdConf:
            # determine our template dataset for our identity
            template = None
            if self.jobIdConf.exists("templateType"):
                # find first dataset (in output, then input) matching
                # this dataset type.
                type = self.jobIdConf.getString("templateType")
                selecttype = lambda d: d.type == type
                template = filter(selecttype, outputs)
                if len(template) == 0: template = filter(selecttype, inputs)
                if len(template) > 0: template = template[0]
            if not template:
                # default to the first output (then input) dataset
                template = len(outputs) > 0 and outputs[0] or inputs[0]

            out = Dataset(template.type)
            if self.jobIdConf.exists("type"):
                out.type = self.jobIdConf.getString("type")
            if self.jobIdConf.exists("id"):
                out.ids = {}
                for id in self.jobIdConf.getStringArray("id"):
                    out.ids[id] = template.ids[id]

            # the identity dataset is complete
            return out

        elif len(outputs) > 0:
            return outputs[0]
        elif len(inputs) > 0:
            return inputs[0]
        else:
            return Dataset("unknown")
Пример #5
0
    def parse(self, line):
        m = self.search(line)
        data = m.groupdict()

        tp = "unknown"
        if data.has_key("type"):
            tp = data["type"]
        out = Dataset(tp)

        del data["type"]
        out.ids = data

        for key in out.ids.keys():
            if self.ids[key] == 'i':
                try:
                    out.ids[key] = int(out.ids[key])
                except ValueError, e:
                    warn("Value is not an integer: %s", out.ids[key])
            elif self.ids[key] == 'f':
                try:
                    out.ids[key] = float(out.ids[key])
                except ValueError, e:
                    warn("Value is not a floating-point number: %s",
                         out.ids[key])