示例#1
0
    def build_iter(self):
        """
        Iteratively build model, yielding any problems as :class:`ValidationError` instances.

        For debugging, the unified model at :attr:`model` my contain intermediate results at any time,
        even if construction has failed.  Check the :attr:`_errored` flag if neccessary.
        """

        steps = [
            (self._prevalidate, 'prevalidate'),
            (self._merge,       'merge'),
            (self._validate,    'validate'),
            (self._curate,      'curate'),
        ]
        self._errored = False
        self.model = None

        for (i, (step, step_name)) in enumerate(steps, start=1):
            try:
                for err in step():
                    yield err
            except ValidationError as ex:
                self._errored = True
                yield ex

            except Exception as ex:
                self._errored = True

                nex = ValidationError(
                    'Model step-%i(%s) failed due to: %s' % (i, step_name, ex))
                nex.cause = ex

                yield nex

            if self._errored:
                yield ValidationError('Gave-up building model after step %i.%s (out of %i).' % (i, step_name, len(steps)))
                break
示例#2
0
    def _rule_properties_draft3(self, properties, instance, schema):
        if not self.is_type(instance, "object"):
            return

        for prop, subschema in self._iter_iprop_pairs(properties):
            if self._is_iprop_in(instance, prop):
                for error in self.descend(
                    self._get_iprop(instance, prop),
                    subschema,
                    path=prop,
                    schema_path=prop,
                ):
                    yield error
            elif subschema.get("required", False):
                error = ValidationError("%r is a required prop" % prop)
                error._set(
                    validator="required",
                    validator_value=subschema["required"],
                    instance=instance,
                    schema=schema,
                )
                error.path.appendleft(prop)
                error.schema_path.extend([prop, "required"])
                yield error
示例#3
0
def convert_duke_haase(schema,
                       encoding,
                       input_file,
                       verbose=True,
                       output=True,
                       output_file=None,
                       config={},
                       enforce_validation=True,
                       reactor=None):

    DEFAULT_BEAD_MODEL = "SpheroTech URCP-38-2K"
    DEFAULT_BEAD_BATCH = "AJ02"

    duke_cytometer_configuration = """{
        "channels": [{
                "emission_filter": {
                    "center": 488,
                    "type": "bandpass",
                    "width": 10
                },
                "excitation_wavelength": 488,
                "name": "FSC-A"
            },
            {
                "emission_filter": {
                    "center": 488,
                    "type": "bandpass",
                    "width": 10
                },
                "excitation_wavelength": 488,
                "name": "SSC-A"
            },
            {
                "emission_filter": {
                    "center": 530,
                    "type": "bandpass",
                    "width": 30
                },
                "excitation_wavelength": 488,
                "name": "BL1-A"
            },
            {
                "emission_filter": {
                    "center": 590,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 488,
                "name": "BL2-A"
            },
            {
                "emission_filter": {
                    "center": 695,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 488,
                "name": "BL3-A"
            },
            {
                "emission_filter": {
                    "center": 780,
                    "type": "bandpass",
                    "width": 60
                },
                "excitation_wavelength": 561,
                "name": "YL4-A"
            },
            {
                "emission_filter": {
                    "center": 695,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 561,
                "name": "YL3-A"
            },
            {
                "emission_filter": {
                    "center": 620,
                    "type": "bandpass",
                    "width": 15
                },
                "excitation_wavelength": 561,
                "name": "YL2-A"
            },
            {
                "emission_filter": {
                    "center": 585,
                    "type": "bandpass",
                    "width": 16
                },
                "excitation_wavelength": 561,
                "name": "YL1-A"
            },
            {
                "emission_filter": {
                    "center": 488,
                    "type": "bandpass",
                    "width": 10
                },
                "excitation_wavelength": 488,
                "name": "FSC-H"
            },
            {
                "emission_filter": {
                    "center": 488,
                    "type": "bandpass",
                    "width": 10
                },
                "excitation_wavelength": 488,
                "name": "SSC-H"
            },
            {
                "emission_filter": {
                    "center": 530,
                    "type": "bandpass",
                    "width": 30
                },
                "excitation_wavelength": 488,
                "name": "BL1-H"
            },
            {
                "emission_filter": {
                    "center": 590,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 488,
                "name": "BL2-H"
            },
            {
                "emission_filter": {
                    "center": 695,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 488,
                "name": "BL3-H"
            },
            {
                "emission_filter": {
                    "center": 780,
                    "type": "bandpass",
                    "width": 60
                },
                "excitation_wavelength": 561,
                "name": "YL4-H"
            },
            {
                "emission_filter": {
                    "center": 695,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 561,
                "name": "YL3-H"
            },
            {
                "emission_filter": {
                    "center": 620,
                    "type": "bandpass",
                    "width": 15
                },
                "excitation_wavelength": 561,
                "name": "YL2-H"
            },
            {
                "emission_filter": {
                    "center": 585,
                    "type": "bandpass",
                    "width": 16
                },
                "excitation_wavelength": 561,
                "name": "YL1-H"
            },
            {
                "emission_filter": {
                    "center": 488,
                    "type": "bandpass",
                    "width": 10
                },
                "excitation_wavelength": 488,
                "name": "FSC-W"
            },
            {
                "emission_filter": {
                    "center": 488,
                    "type": "bandpass",
                    "width": 10
                },
                "excitation_wavelength": 488,
                "name": "SSC-W"
            },
            {
                "emission_filter": {
                    "center": 530,
                    "type": "bandpass",
                    "width": 30
                },
                "excitation_wavelength": 488,
                "name": "BL1-W"
            },
            {
                "emission_filter": {
                    "center": 590,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 488,
                "name": "BL2-W"
            },
            {
                "emission_filter": {
                    "center": 695,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 488,
                "name": "BL3-W"
            },
            {
                "emission_filter": {
                    "center": 780,
                    "type": "bandpass",
                    "width": 60
                },
                "excitation_wavelength": 561,
                "name": "YL4-W"
            },
            {
                "emission_filter": {
                    "center": 695,
                    "type": "bandpass",
                    "width": 40
                },
                "excitation_wavelength": 561,
                "name": "YL3-W"
            },
            {
                "emission_filter": {
                    "center": 620,
                    "type": "bandpass",
                    "width": 15
                },
                "excitation_wavelength": 561,
                "name": "YL2-W"
            },
            {
                "emission_filter": {
                    "center": 585,
                    "type": "bandpass",
                    "width": 16
                },
                "excitation_wavelength": 561,
                "name": "YL1-W"
            }
        ]
    }"""

    duke_cytometer_configuration_object = json.loads(
        duke_cytometer_configuration)

    cytometer_channels = []
    for channel in duke_cytometer_configuration_object['channels']:
        if channel['name'].endswith("-A"):
            cytometer_channels.append(channel['name'])

    if reactor is not None:
        helper = AgaveHelper(reactor.client)
        print("Helper loaded")
    else:
        print("Helper not loaded")

    # for SBH Librarian Mapping
    sbh_query = SynBioHubQuery(SD2Constants.SD2_SERVER)
    sbh_query.login(config["sbh"]["user"], config["sbh"]["password"])

    input_fp_csvreader = csv.reader(open(input_file))

    output_doc = {}

    lab = SampleConstants.LAB_DUKE_HAASE

    output_doc[SampleConstants.LAB] = lab
    output_doc[SampleConstants.SAMPLES] = []

    headers = None
    is_cfu = False
    doe_format = "%Y%m%d"

    # This converter reads both CFU and FCS formatted metadata from Duke. They have different sets of fields
    # We key on the presence of a CFU column to determine which we are parsing
    # CFU Fields
    # 0 strain
    # 1 replicate
    # 2 treatment
    # 3 treatment_concentration
    # 4 treatment_concentration_unit
    # 5 treatment_time
    # 6 treatment_time_unit
    # 7 CFU
    # 8 culture_cells/ml
    # 9 date_of_experiment
    # 10 experiment_reference_url
    # 11 experiment_reference
    # 12 experiment_id
    # 13 parent_id
    # 14 estimated_cells_plated
    # 15 estimated_cells/ml
    # 16 percent_killed
    # 17 strain_class
    # 18 control_type
    # 19 sample_id
    #
    # FCS Fields
    # 0 strain
    # 1 replicate
    # 2 treatment
    # 3 treatment_concentration
    # 4 treatment_concentration_unit
    # 5 treatment_time
    # 6 treatment_time_unit
    # 7 culture_cells/ml
    # 8 date_of_experiment
    # 9 experiment_reference_url
    # 10 experiment_reference
    # 11 experiment_id
    # 12 parent_id
    # 13 strain_class
    # 14 control_type
    # 15 fcs_filename
    # 16 sytox_color
    # 17 sytox_concentration
    # 18 sytox_concentration_unit
    # 19 sample_id

    header_map = {}

    for row in input_fp_csvreader:
        if row[0] == "strain":
            headers = row

            for header_index, header in enumerate(headers):
                header_map[header] = header_index

            if "CFU" in header_map:
                is_cfu = True
            continue
        else:

            # Lookup experiment id, separate by measurement type
            if SampleConstants.EXPERIMENT_REFERENCE not in output_doc:

                if is_cfu:
                    mt = SampleConstants.MT_CFU
                else:
                    mt = SampleConstants.MT_FLOW

                output_doc[SampleConstants.EXPERIMENT_REFERENCE_URL] = row[
                    header_map["experiment_reference_url"]]

                # without measurement type - for filenames
                experiment_id_bak = row[header_map["experiment_id"]]
                output_doc[
                    SampleConstants.EXPERIMENT_ID] = namespace_experiment_id(
                        experiment_id_bak + "_" + mt, lab)

                map_experiment_reference(config, output_doc)
                experiment_id = output_doc.get(SampleConstants.EXPERIMENT_ID)

            sample_doc = {}
            contents = []
            strain = row[header_map["strain"]]
            replicate = row[header_map["replicate"]]
            treatment = row[header_map["treatment"]]

            sample_doc[SampleConstants.SAMPLE_ID] = namespace_sample_id(
                row[header_map["sample_id"]], lab, output_doc)

            sample_doc[
                SampleConstants.REFERENCE_SAMPLE_ID] = namespace_sample_id(
                    row[header_map["parent_id"]], lab, output_doc)

            sample_doc[SampleConstants.STRAIN] = create_mapped_name(
                experiment_id, strain, strain, lab, sbh_query, strain=True)

            sample_doc[SampleConstants.REPLICATE] = int(float(replicate))

            m_time = None

            if len(treatment) > 0:

                treatment_concentration = row[
                    header_map["treatment_concentration"]]
                treatment_concentration_unit = row[
                    header_map["treatment_concentration_unit"]]

                if treatment == "heat":
                    treatment_concentration_unit = treatment_concentration_unit.strip(
                    )
                    if treatment_concentration_unit in ["C", "celsius"]:
                        sample_doc[
                            SampleConstants.TEMPERATURE] = create_value_unit(
                                treatment_concentration + ":celsius")
                    else:
                        raise ValueError("Unknown temperature {}".format(
                            treatment_concentration_unit))
                else:
                    contents_append_value = create_media_component(
                        experiment_id, treatment, treatment, lab, sbh_query,
                        treatment_concentration + ":" +
                        treatment_concentration_unit)
                    contents.append(contents_append_value)

            treatment_time = row[header_map["treatment_time"]]
            treatment_time_unit = row[header_map["treatment_time_unit"]]

            # normalize to hours
            if treatment_time_unit in ["minute", "minutes"]:
                treatment_time = float(treatment_time) / 60.0
                treatment_time_unit = "hour"

            if len(treatment_time_unit) > 0:
                m_time = create_value_unit(
                    str(treatment_time) + ":" + treatment_time_unit)

            # controls
            strain_class = row[header_map["strain_class"]]
            control_type = row[header_map["control_type"]]

            if strain_class == "Control" and control_type == "Negative":
                sample_doc[SampleConstants.
                           CONTROL_TYPE] = SampleConstants.CONTROL_EMPTY_VECTOR
            if strain_class == "Process":
                if control_type == SampleConstants.STANDARD_BEAD_FLUORESCENCE:
                    sample_doc[
                        SampleConstants.
                        STANDARD_TYPE] = SampleConstants.STANDARD_BEAD_FLUORESCENCE
                    sample_doc[SampleConstants.STANDARD_ATTRIBUTES] = {}
                    sample_doc[SampleConstants.STANDARD_ATTRIBUTES][
                        SampleConstants.BEAD_MODEL] = DEFAULT_BEAD_MODEL
                    sample_doc[SampleConstants.STANDARD_ATTRIBUTES][
                        SampleConstants.BEAD_BATCH] = DEFAULT_BEAD_BATCH
                elif control_type == SampleConstants.STANDARD_BEAD_SIZE:
                    sample_doc[
                        SampleConstants.
                        STANDARD_TYPE] = SampleConstants.STANDARD_BEAD_SIZE

            # Styox
            if not is_cfu:
                sytox_color = row[header_map["sytox_color"]]
                if len(sytox_color) > 0:
                    # concentration
                    contents.append(
                        create_media_component(
                            experiment_id, "Sytox", "Sytox", lab, sbh_query,
                            row[header_map["sytox_concentration"]] + ":" +
                            row[header_map["sytox_concentration_unit"]]))

                    #color
                    sytox_color_content = create_media_component(
                        experiment_id, "Sytox_color", "Sytox_color", lab,
                        sbh_query)
                    sytox_color_content["value"] = sytox_color
                    contents.append(sytox_color_content)

            # Default Media
            yepd_media = create_media_component(experiment_id, "Media",
                                                "Media", lab, sbh_query)
            yepd_media["value"] = "YEPD"
            contents.append(yepd_media)

            if len(contents) > 0:
                sample_doc[SampleConstants.CONTENTS] = contents

            if not SampleConstants.TEMPERATURE in sample_doc:
                # default if not specified
                sample_doc[SampleConstants.TEMPERATURE] = create_value_unit(
                    "22:celsius")

            measurement_doc = {}
            measurement_doc[SampleConstants.FILES] = []
            if is_cfu:
                measurement_doc[
                    SampleConstants.MEASUREMENT_TYPE] = SampleConstants.MT_CFU
            else:
                measurement_doc[
                    SampleConstants.MEASUREMENT_TYPE] = SampleConstants.MT_FLOW
                measurement_doc[
                    SampleConstants.M_CHANNELS] = cytometer_channels
                # add default duke cytometer configuration
                if SampleConstants.CYTOMETER_CONFIG not in output_doc:
                    output_doc[
                        SampleConstants.
                        CYTOMETER_CONFIG] = duke_cytometer_configuration_object

            measurement_doc[
                SampleConstants.MEASUREMENT_ID] = namespace_measurement_id(
                    1, lab, sample_doc, output_doc)
            measurement_doc[
                SampleConstants.
                MEASUREMENT_GROUP_ID] = namespace_measurement_id(
                    measurement_doc[SampleConstants.MEASUREMENT_TYPE] + "_1",
                    lab, sample_doc, output_doc)

            if m_time is not None:
                measurement_doc[SampleConstants.TIMEPOINT] = m_time

            #CFU 305
            #culture_cells_ml 2.33E+07
            #estimated_cells_plated 583
            #estimated_cells/ml 1.22E+07
            #percent_killed 47.60%
            #date_of_experiment 6/10/20
            cfu_data = {}

            doe = row[header_map["date_of_experiment"]]
            # excel trailing zeroes on strings: 20210430.0
            if type(doe) == float:
                doe = str(int(doe))
            if type(doe) == str and doe.endswith(".0"):
                doe = str(int(float(doe)))

            if is_cfu:
                if len(row[header_map["CFU"]]) > 0:
                    cfu_data[headers[header_map["CFU"]]] = int(
                        float(row[header_map["CFU"]]))
                cfu_data[headers[header_map["culture_cells/ml"]]] = int(
                    float(row[header_map["culture_cells/ml"]]))
                cfu_data[headers[header_map["estimated_cells_plated"]]] = int(
                    row[header_map["estimated_cells_plated"]])
                cfu_data[headers[header_map["estimated_cells/ml"]]] = int(
                    float(row[header_map["estimated_cells/ml"]]))
                cfu_data[headers[header_map["percent_killed"]]] = float(
                    row[header_map["percent_killed"]])
                cfu_data[headers[header_map[
                    "date_of_experiment"]]] = datetime.datetime.strptime(
                        doe, doe_format).strftime(doe_format)
            else:
                #culture_cells/ml
                #date_of_experiment
                if len(row[header_map["culture_cells/ml"]]) > 0:
                    cfu_data[headers[header_map["culture_cells/ml"]]] = int(
                        float(row[header_map["culture_cells/ml"]]))
                cfu_data[headers[header_map[
                    "date_of_experiment"]]] = datetime.datetime.strptime(
                        doe, doe_format).strftime(doe_format)

            measurement_doc["cfu_data"] = cfu_data

            file_id = namespace_file_id(1, lab, measurement_doc, output_doc)
            if is_cfu:
                file_type = SampleConstants.infer_file_type(input_file)
                measurement_doc[SampleConstants.FILES].append({
                    SampleConstants.M_NAME:
                    experiment_id_bak + "__cfu_and_meta.csv",
                    SampleConstants.M_TYPE:
                    file_type,
                    SampleConstants.M_LAB_LABEL:
                    [SampleConstants.M_LAB_LABEL_RAW],
                    SampleConstants.FILE_ID:
                    file_id,
                    SampleConstants.FILE_LEVEL:
                    SampleConstants.F_LEVEL_0
                })
            else:
                filename = row[header_map["fcs_filename"]]
                file_type = SampleConstants.infer_file_type(filename)
                measurement_doc[SampleConstants.FILES].append({
                    SampleConstants.M_NAME:
                    filename,
                    SampleConstants.M_TYPE:
                    file_type,
                    SampleConstants.M_LAB_LABEL:
                    [SampleConstants.M_LAB_LABEL_RAW],
                    SampleConstants.FILE_ID:
                    file_id,
                    SampleConstants.FILE_LEVEL:
                    SampleConstants.F_LEVEL_0
                })

            if SampleConstants.MEASUREMENTS not in sample_doc:
                sample_doc[SampleConstants.MEASUREMENTS] = []
            sample_doc[SampleConstants.MEASUREMENTS].append(measurement_doc)

            output_doc[SampleConstants.SAMPLES].append(sample_doc)

    try:
        validate(output_doc, schema)

        if output is True or output_file is not None:
            if output_file is None:
                path = os.path.join("output/duke_haase",
                                    os.path.basename(input_file))
            else:
                path = output_file

            if path.endswith(".csv"):
                path = path[:-4] + ".json"

            with open(path, 'w') as outfile:
                json.dump(output_doc, outfile, indent=4)
        return True
    except ValidationError as err:
        if enforce_validation:
            if verbose:
                print("Schema Validation Error: {0}\n".format(err))
            raise ValidationError("Schema Validation Error", err)
        else:
            if verbose:
                print("Schema Validation Error: {0}\n".format(err))
            return False
        return False
示例#4
0
def format_expose(instance):
    if isinstance(instance, six.string_types):
        if not re.match(VALID_EXPOSE_FORMAT, instance):
            raise ValidationError("should be of the format 'PORT[/PROTOCOL]'")

    return True
示例#5
0
def _check_higher_from_n_min_drive_set(n, n_min_drive_set):
    if n < n_min_drive_set:
        raise ValidationError(
            f"Must be higher than `{m.n_min_drive_set}`({n_min_drive_set})!")
示例#6
0
def checkPlusAttributeConsistency(inDict):
    if 'confidentiality' in inDict.get('plus', {}).get('attribute', {}):
        if 'confidentiality' not in inDict.get('attribute', {}):
            raise ValidationError(
                "plus.attribute.confidentiality present but confidentiality is not an affected attribute."
            )
示例#7
0
import json, sys, io
from jsonschema import validate, ValidationError

schema = json.load(io.open('schema.json', encoding='utf-8'))
seen_ids = set()

for file in sys.argv[1:]:
    source = json.load(io.open(file, encoding='utf-8'))
    try:
        validate(source, schema)
        id = source['properties']['id']
        if id in seen_ids:
            raise ValidationError('Id %s used multiple times' % id)
        seen_ids.add(id)
        sys.stdout.write('.')
        sys.stdout.flush()
    except ValidationError as e:
        print(file)
        raise

print('')
示例#8
0
def checkSocialIntegrity(inDict):
  if 'social' in inDict['action']:
    if 'Alter behavior' not in inDict.get('attribute',{}).get('integrity',{}).get('variety',[]):
      raise ValidationError("acton.social present, but Alter behavior not in attribute.integrity.variety")
  return True
示例#9
0
    if not os.path.exists(filename):
        logger.debug("{} does not exist, skip".format(filename))
        continue

    try:

        ## dict_raise_on_duplicates raises error on duplicate keys in geojson
        source = json.load(io.open(filename, encoding='utf-8'),
                           object_pairs_hook=dict_raise_on_duplicates)

        ## jsonschema validate
        validator.validate(source, schema)
        sourceid = source['properties']['id']
        if sourceid in seen_ids:
            raise ValidationError('Id %s used multiple times' % sourceid)
        seen_ids.add(sourceid)

        ## {z} instead of {zoom}
        if '{z}' in source['properties']['url']:
            raise ValidationError('{z} found instead of {zoom} in tile url')

        ## Check for license url. Too many missing to mark as required in schema.
        if 'license_url' not in source['properties']:
            logger.debug("{} has no license_url".format(filename))

        if 'attribution' not in source['properties']:
            logger.debug("{} has no attribution".format(filename))

        ## Check for big fat embedded icons
        if 'icon' in source['properties']:
示例#10
0
 def validate_params(self):
     if not self.event.get('body'):
         raise ValidationError('Request parameter is required')
     validate(self.params, self.get_schema())
示例#11
0
 def _check_harvested(record):
     """Harvested document cannot be linked to an order line."""
     related_document = record.document
     if related_document and related_document.harvested:
         msg = _('Cannot link to an harvested document')
         raise ValidationError(msg)
示例#12
0
def validate(json_to_validate, schema):
    validator = Draft7Validator(schema, format_checker=format_checker)
    errors = list(validator.iter_errors(json_to_validate))
    if errors.__len__() > 0:
        raise ValidationError(build_error_message(errors))
    return json_to_validate
示例#13
0
def _maybe_validate_schema(instance: Dict[str, Any], schema: Dict[str, Any], validate_schema: bool) -> None:
    if validate_schema:
        try:
            jsonschema.validate(instance, schema)
        except TypeError:
            raise ValidationError("Invalid schema")
示例#14
0
def validate_schema_postage(instance):
    if isinstance(instance, str):
        if instance not in ["first", "second"]:
            raise ValidationError("invalid. It must be either first or second.")
    return True
示例#15
0
def validate_content_type(swagger: Mapping, content_type: str):
    consumes = swagger.get('consumes')
    if consumes and not any(content_type == consume for consume in consumes):
        raise ValidationError(
            message='Unsupported content type: {}'.format(content_type))
示例#16
0
    'memory_swap': 'memswap_limit',
    'port': 'ports',
    'privilege': 'privileged',
    'priviliged': 'privileged',
    'privilige': 'privileged',
    'volume': 'volumes',
    'workdir': 'working_dir',
}

VALID_NAME_CHARS = '[a-zA-Z0-9\._\-]'


@FormatChecker.cls_checks(
    format="ports",
    raises=ValidationError(
        "Invalid port formatting, it should be '[[remote_ip:]remote_port:]port[/protocol]'"
    ))
def format_ports(instance):
    try:
        split_port(instance)
    except ValueError:
        return False
    return True


def validate_service_names(func):
    @wraps(func)
    def func_wrapper(config):
        for service_name in config.keys():
            if type(service_name) is int:
                raise ConfigurationError(
示例#17
0
def checkSecurityIncident(inDict):
  if inDict['security_incident'] == "Confirmed":
    if 'attribute' not in inDict:
      raise ValidationError("security_incident Confirmed but attribute section not present")
  return True
示例#18
0
def checkMisuseActor(inDict):
    if 'misuse' in inDict['action'] and 'internal' not in inDict[
            'actor'] and 'partner' not in inDict['actor']:
        yield ValidationError(
            "Misuse in action, but no internal or partner actor defined.  Per VERIS issue #229, there should always be an internal or partner actor if there is a misuse action."
        )
示例#19
0
def checkMalwareIntegrity(inDict):
    if 'malware' in inDict['action']:
        if 'Software installation' not in inDict.get('attribute',{}).get('integrity',{}).get('variety',[]):
          raise ValidationError("Malware present, but no Software installation in attribute.integrity.variety")
    return True
示例#20
0
def checkYear(inDict):
    if inDict.get('plus', {}).get('dbir_year', None):
        dbir_year = inDict['plus']['dbir_year']
        nyear = inDict.get('plus', {}).get('timeline',
                                           {}).get('notification',
                                                   {}).get('year', None)
        nmonth = inDict.get('plus', {}).get('timeline',
                                            {}).get('notification',
                                                    {}).get('month', None)
        nday = inDict.get('plus', {}).get('timeline',
                                          {}).get('notification',
                                                  {}).get('day', None)
        iyear = inDict.get('timeline', {}).get('incident',
                                               {}).get('year', None)
        imonth = inDict.get('timeline', {}).get('incident',
                                                {}).get('month', None)
        iday = inDict.get('timeline', {}).get('incident', {}).get('day', None)
        discovered = inDict.get('timeline',
                                {}).get('discovered',
                                        {}).get('unit', "(no discovery unit)")
        if nyear is not None:
            source = "notification"
            tyear = nyear
            tmonth = nmonth
        else:
            tyear = iyear
            tmonth = imonth
            source = "incident"
        if tyear >= dbir_year:
            yield ValidationError(
                "DBIR year of {0} from {5} runs from Nov 1, {1} to Oct 31, {2}. Incident year {3} and month {4} is too late to be in this DBIR year."
                .format(dbir_year, dbir_year - 2, dbir_year - 1, tyear, tmonth,
                        source))
        if tyear == dbir_year - 1:
            if tmonth is not None and tmonth > 10:
                yield ValidationError(
                    "DBIR year of {0} from {5} runs from Nov 1, {1} to Oct 31, {2}. Incident year {3} and month {4} is too late to be in this DBIR year."
                    .format(dbir_year, dbir_year - 2, dbir_year - 1, tyear,
                            tmonth, source))
        elif tyear == dbir_year - 2:
            if tmonth is not None and tmonth < 11:
                if discovered in ["Months", "Years"]:
                    yield ValidationError(
                        "DBIR year of {0} from {5} runs from Nov 1, {1} to Oct 31, {2}. Incident year {3}, month {4}, and discovery unit {6} is before this range."
                        .format(dbir_year, dbir_year - 2, dbir_year - 1, tyear,
                                tmonth, source, discovered))
        else:
            if discovered != "Years":
                yield ValidationError(
                    "DBIR year of {0} from {4} runs from Nov 1, {1} to Oct 31, {2}. Incident year {3} and discovery unit {5} is before this range."
                    .format(dbir_year, dbir_year - 2, dbir_year - 1, tyear,
                            source, discovered))
        # check if incident or notification dates are in future
        if nyear is not None:
            try:
                ndate = date(*[x if x else 1 for x in [nyear, nmonth, nday]])
            except ValueError as e:
                yield ValidationError(
                    "Problem with notification date: {0}".format(e))
            if ndate > date.today():
                yield ValidationError(
                    "Notification date {0} is greater than today's date {1}.".
                    format(ndate, date.today()))
        try:
            idate = date(*[x if x else 1 for x in [iyear, imonth, iday]])
        except ValueError as e:
            yield ValidationError("Problem with incident date: {0}".format(e))
        if idate > date.today():
            yield ValidationError(
                "Incident date {0} is greater than today's date {1}.".format(
                    idate, date.today()))
        if nyear is not None and idate > ndate:
            yield ValidationError(
                "Notification date {0} appears to be earlier than incident date {1}. This may be due to incomplete dates."
                .format(ndate, idate))
示例#21
0
    cycle = cycler.PhaseMarker().add_phase_markers(cb.cycle, cb.V, cb.A)
    n_stops = (cycle["stop"].astype(int).diff() < 0).sum(None)
    n_decels = (cycle["decel"].astype(int).diff() < 0).sum(None)
    n_stopdecels = (cycle["stopdecel"].astype(int).diff() < 0).sum(None)
    assert n_stopdecels < n_decels
    ##  The initial stop has no deceleration before it BUT no diff >0 either!
    assert n_stopdecels == n_stops


@pytest.mark.parametrize(
    "wltc_class, t_cold_end, err",
    zip(
        range(4),
        (800, 150),
        (
            ValidationError("before the 1st cycle-part"),
            ValidationError("on a cycle stop"),
        ),
    ),
)
def test_validate_t_start(wltc_class, t_cold_end, err):
    """
    .. TODO:: move `t_cold_end` check in validations pipeline.
    """
    V = datamodel.get_class_v_cycle(wltc_class)
    wltc_parts = datamodel.get_class_parts_limits(wltc_class)

    cb = CycleBuilder(V)
    cb.cycle = cycler.PhaseMarker().add_phase_markers(cb.cycle, cb.V, cb.A)
    with pytest.raises(type(err), match=str(err)):
        for err in cb.validate_nims_t_cold_end(t_cold_end, wltc_parts):
示例#22
0
def convert_tulane(schema,
                   encoding,
                   input_file,
                   verbose=True,
                   output=True,
                   output_file=None,
                   config={},
                   enforce_validation=True,
                   reactor=None):

    if reactor is not None:
        helper = AgaveHelper(reactor.client)
        print("Helper loaded")
    else:
        print("Helper not loaded")

    # for SBH Librarian Mapping
    sbh_query = SynBioHubQuery(SD2Constants.SD2_SERVER)
    sbh_query.login(config["sbh"]["user"], config["sbh"]["password"])

    tulane_doc = json.load(open(input_file, encoding=encoding))

    output_doc = {}
    lab = SampleConstants.LAB_TULANE

    original_experiment_id = tulane_doc[SampleConstants.EXPERIMENT_ID]
    output_doc[SampleConstants.EXPERIMENT_ID] = namespace_experiment_id(
        original_experiment_id, lab)

    output_doc[SampleConstants.CHALLENGE_PROBLEM] = tulane_doc[
        SampleConstants.CHALLENGE_PROBLEM]
    output_doc[SampleConstants.EXPERIMENT_REFERENCE_URL] = tulane_doc[
        SampleConstants.EXPERIMENT_REFERENCE]

    map_experiment_reference(config, output_doc)

    output_doc[SampleConstants.LAB] = lab
    output_doc[SampleConstants.SAMPLES] = []
    samples_w_data = 0
    if SampleConstants.CYTOMETER_CONFIG in tulane_doc:
        output_doc[SampleConstants.CYTOMETER_CONFIG] = tulane_doc[
            SampleConstants.CYTOMETER_CONFIG]
        cytometer_channels = []
        for channel in output_doc[
                SampleConstants.CYTOMETER_CONFIG]['channels']:
            cytometer_channels.append(channel['name'])

    for tulane_sample in tulane_doc["tulane_samples"]:
        sample_doc = {}

        sample_id = tulane_sample["sample_id"]

        sample_doc[SampleConstants.SAMPLE_ID] = namespace_sample_id(
            sample_id, lab, output_doc)
        sample_doc[SampleConstants.LAB_SAMPLE_ID] = namespace_sample_id(
            sample_id, lab, None)

        if SampleConstants.STRAIN in tulane_sample:
            strain = tulane_sample[SampleConstants.STRAIN]
            sample_doc[SampleConstants.STRAIN] = create_mapped_name(
                original_experiment_id,
                strain,
                strain,
                lab,
                sbh_query,
                strain=False)

        if SampleConstants.CONTROL_TYPE in tulane_sample:
            sample_doc[SampleConstants.CONTROL_TYPE] = tulane_sample[
                SampleConstants.CONTROL_TYPE]

        if SampleConstants.CONTROL_CHANNEL in tulane_sample:
            sample_doc[SampleConstants.CONTROL_CHANNEL] = tulane_sample[
                SampleConstants.CONTROL_CHANNEL]

        measurement_counter = 1

        for file in tulane_sample[SampleConstants.FILES]:
            measurement_doc = {}

            measurement_doc[SampleConstants.FILES] = []

            measurement_type = file[SampleConstants.M_TYPE]

            file_name = file[SampleConstants.M_NAME]
            # same logic as uploads manager
            file_name = safen_filename(file_name)

            measurement_doc[
                SampleConstants.MEASUREMENT_TYPE] = measurement_type

            # apply channels, if nothing mapped
            if measurement_type == SampleConstants.MT_FLOW:
                if SampleConstants.M_CHANNELS not in measurement_doc:
                    measurement_doc[
                        SampleConstants.M_CHANNELS] = cytometer_channels

            # append the type so we have a distinct id per actual grouped measurement
            typed_measurement_id = '.'.join(
                [str(measurement_counter), measurement_type])

            # generate a measurement id unique to this sample
            measurement_doc[
                SampleConstants.MEASUREMENT_ID] = namespace_measurement_id(
                    str(measurement_counter), output_doc[SampleConstants.LAB],
                    sample_doc, output_doc)

            # record a measurement grouping id to find other linked samples and files
            measurement_doc[SampleConstants.
                            MEASUREMENT_GROUP_ID] = namespace_measurement_id(
                                typed_measurement_id,
                                output_doc[SampleConstants.LAB], sample_doc,
                                output_doc)

            file_type = SampleConstants.infer_file_type(file_name)
            file_name_final = file_name

            if file_name.startswith('s3') or file_name.count("/") >= 2:
                file_name_final = file_name.split(original_experiment_id)[-1]

            if file_name_final.startswith("/"):
                file_name_final = file_name_final[1:]

            measurement_doc[SampleConstants.FILES].append({
                SampleConstants.M_NAME:
                file_name_final,
                SampleConstants.M_TYPE:
                file_type,
                SampleConstants.M_LAB_LABEL: [SampleConstants.M_LAB_LABEL_RAW],
                # measurements and files here are 1:1
                SampleConstants.FILE_ID:
                namespace_file_id("1", output_doc[SampleConstants.LAB],
                                  measurement_doc, output_doc),
                SampleConstants.FILE_LEVEL:
                SampleConstants.F_LEVEL_0
            })

            if SampleConstants.MEASUREMENTS not in sample_doc:
                sample_doc[SampleConstants.MEASUREMENTS] = []
            sample_doc[SampleConstants.MEASUREMENTS].append(measurement_doc)
            samples_w_data = samples_w_data + 1
            #print('sample {} / measurement {} contains {} files'.format(sample_doc[SampleConstants.SAMPLE_ID], file_name, len(measurement_doc[SampleConstants.FILES])))

            measurement_counter = measurement_counter + 1

        if SampleConstants.MEASUREMENTS not in sample_doc:
            sample_doc[SampleConstants.MEASUREMENTS] = []
        output_doc[SampleConstants.SAMPLES].append(sample_doc)

    print('Samples in file: {}'.format(len(tulane_doc)))
    print('Samples with data: {}'.format(samples_w_data))

    try:
        validate(output_doc, schema)
        # if verbose:
        # print(json.dumps(output_doc, indent=4))
        if output is True or output_file is not None:
            if output_file is None:
                path = os.path.join("output/tulane",
                                    os.path.basename(input_file))
            else:
                path = output_file
            with open(path, 'w') as outfile:
                json.dump(output_doc, outfile, indent=4)
        return True
    except ValidationError as err:
        if enforce_validation:
            if verbose:
                print("Schema Validation Error: {0}\n".format(err))
            raise ValidationError("Schema Validation Error", err)
        else:
            if verbose:
                print("Schema Validation Error: {0}\n".format(err))
            return False
示例#23
0
        print(engine.parse_wot(wot))


@pytest.mark.parametrize(
    "wot, n_idle, n_rated, p_rated, err",
    [
        # ([[1, 2], [3, 4]], None, None, None, ValueError("Too few points in wot")),
        (
            {
                "p": _P,
                "n": _N
            },
            None,
            None,
            92,
            ValidationError(
                re.escape("`p_wot_max`(78) much lower than p_rated(92)!")),
        ),
        (
            {
                "p": _P,
                "n": _N
            },
            None,
            None,
            22,
            ValidationError(
                re.escape("`p_wot_max`(78) much bigger than p_rated(22)!")),
        ),
    ],
)
def test_validate_wot_errors(mdl, wot, n_idle, n_rated, p_rated, err):
示例#24
0
borkenbuild = False
spacesave = 0

for filename in arguments.path:
    try:

        ## dict_raise_on_duplicates raises error on duplicate keys in geojson
        source = json.load(io.open(filename, encoding='utf-8'),
                           object_pairs_hook=dict_raise_on_duplicates)

        ## jsonschema validate
        validator.validate(source, schema)
        sourceid = source['properties']['id']
        if sourceid in seen_ids:
            raise ValidationError('Id %s used multiple times' % sourceid)
        seen_ids.add(sourceid)

        ## {z} instead of {zoom}
        if '{z}' in source['properties']['url']:
            raise ValidationError('{z} found instead of {zoom} in tile url')
        if 'license' in source['properties']:
            license = source['properties']['license']
            if not spdx_lookup.by_id(license):
                raise ValidationError('Unknown license %s' % license)
        else:
            logger.debug("{} has no license property".format(filename))

        ## Check for license url. Too many missing to mark as required in schema.
        if 'license_url' not in source['properties']:
            logger.debug("{} has no license_url".format(filename))
示例#25
0
    if not filename.lower()[-8:] == ".geojson":
        logger.debug("{} is not a geojson file, skip".format(filename))
        continue

    if not os.path.exists(filename):
        logger.debug("{} does not exist, skip".format(filename))
        continue

    try:
        ## dict_raise_on_duplicates raises error on duplicate keys in geojson
        source = json.load(io.open(filename, encoding="utf-8"),
                           object_pairs_hook=dict_raise_on_duplicates)
    except Exception as e:
        logger.exception(f"Could not parse file: {filename}: {e}")
        raise ValidationError(f"Could not parse file: {filename}: {e}")

    try:

        ## dict_raise_on_duplicates raises error on duplicate keys in geojson
        source = json.load(io.open(filename, encoding="utf-8"),
                           object_pairs_hook=dict_raise_on_duplicates)

        ## jsonschema validate
        validator.validate(source, schema)
        sourceid = source["properties"]["id"]
        if sourceid in seen_ids:
            raise ValidationError("Id %s used multiple times" % sourceid)
        seen_ids.add(sourceid)

        ## {z} instead of {zoom}
示例#26
0
def main():

    rx = Reactor()
    m = AttrDict(rx.context.message_dict)

    if m == {}:
        try:
            jsonmsg = json.loads(rx.context.raw_message)
            m = jsonmsg
        except Exception:
            pass

    #    ['event', 'agavejobs', 'create', 'delete']
    action = "emptypost"
    try:
        for a in ["aloejobs", "event", "agavejobs"]:
            try:
                rx.logger.info("Testing against {} schema".format(a))
                rx.validate_message(m,
                                    messageschema="/schemas/" + a +
                                    ".jsonschema",
                                    permissive=False)
                action = a
                break
            except Exception as exc:
                print("Validation error: {}".format(exc))
        if action is None:
            pprint(m)
            raise ValidationError("Message did not a known schema")
    except Exception as vexc:
        rx.on_failure("Failed to process message", vexc)

    # rx.logger.debug("SCHEMA DETECTED: {}".format(action))

    # store = PipelineJobStore(mongodb=rx.settings.mongodb)
    # Process the event

    # Get URL params from Abaco context
    #
    # These can be overridden by the event body or custom
    # code implemented to process the message. This has a
    # side effect of allowing the manager to process empty
    # POST bodies so long as the right values are presented
    # as URL params.
    #
    # cb_* variables are always overridden by the contents of
    #   the POST body
    #
    cb_event_name = rx.context.get("event", None)
    cb_job_uuid = rx.context.get("uuid", None)
    cb_token = rx.context.get("token", "null")
    # Accept a 'note' as a URL parameter
    # TODO - urldecode the contents of 'note'
    cb_note = rx.context.get("note", "Event had no JSON payload")
    # NOTE - contents of cb_data will be overridden in create, event. aloejob
    cb_data = {"note": cb_note}
    # Accept 'status', the Aloe-centric name for job.state
    # as well as 'state'
    cb_agave_status = rx.context.get("status", rx.context.get("state", None))

    # Prepare template PipelineJobsEvent
    event_dict = {
        "uuid": cb_job_uuid,
        "name": cb_event_name,
        "token": cb_token,
        "data": cb_data,
    }

    # This is the default message schema 'event'
    if action == "event":
        # Filter message and override values in event_dict with its contents
        for k in ["uuid", "name", "token", "data"]:
            event_dict[k] = m.get(k, event_dict.get(k))

    # AgaveJobs can update the status of an existing job but cannot
    # create one. To do so, an Agave job must be launched
    # using the PipelineJobsAgaveProxy resource.
    if action == "agavejobs":
        rx.on_failure("Agave job callbacks are no longer supported")
    elif action == "aloejobs":
        try:
            # Aloe jobs POST their current JSON representation to
            # callback URL targets. The POST body contains a 'status' key.
            # If for some reason it doesn't, job status is determined by
            # the 'state' or 'status' URL parameter.
            if cb_agave_status is None:
                cb_agave_status = m.get("status", None)
            # Agave job message bodies include 'id' which is the jobId
            mes_agave_job_id = m.get("id", None)
            rx.logger.debug("aloe_status: {}".format(cb_agave_status))
            if cb_agave_status is not None:
                cb_agave_status = cb_agave_status.upper()
        except Exception as exc:
            rx.on_failure(
                "Aloe callback POST and associated URL parameters were missing some required fields",
                exc,
            )

        # If the job status is 'RUNNING' then use a subset of the POST for
        # event.data. Otherwise, create an event.data from the most recent
        # entry in the Agave job history. One small detail to note is that
        # callbacks are sent at the beginning of event processing in the
        # Agave jobs service and so a handful of fields in the job record
        # that are late bound are not yet populated when the event is sent.
        if cb_agave_status == "RUNNING":
            cb_data = minify_job_dict(dict(m))
        else:
            cb_data = {"status": cb_agave_status}
            # Fetch latest history entry to put in event.data
            try:
                # Is there a better way than grabbing entire history that can
                # be implemented in a pure Agave call? Alternatively, we could
                # cache last offset for this job in rx.state but that will
                # limit our scaling to one worker
                #
                agave_job_latest_history = rx.client.jobs.getHistory(
                    jobId=mes_agave_job_id,
                    limit=100)[-1].get("description", None)
                if agave_job_latest_history is not None:
                    cb_data["description"] = agave_job_latest_history
            except Exception as agexc:
                rx.logger.warning("Failed to get history for {}: {}".format(
                    mes_agave_job_id, agexc))

        # Map the Agave job status to an PipelineJobsEvent name
        if cb_event_name is None and cb_agave_status is not None:
            cb_event_name = AgaveEvents.agavejobs.get(cb_agave_status,
                                                      "update")
            rx.logger.debug("Status: {} => Event: {}".format(
                cb_agave_status, cb_event_name))

        # Event name and data can be updated as part of processing an Agave POST
        # so apply the current values to event_dict here
        event_dict["name"] = cb_event_name
        event_dict["data"] = cb_data

    # Sanity check event_dict and token
    if event_dict["uuid"] is None or event_dict[
            "name"] is None or cb_token is None:
        rx.on_failure("No actionable event was received.")

    # Instantiate a job instance to leverage the MPJ framework
    store = ManagedPipelineJobInstance(rx.settings.mongodb,
                                       event_dict["uuid"],
                                       agave=rx.client)

    # Handle event...
    try:

        # First, proxy events. This code forwards index and indexed events to the jobs-indexer
        # Proxy 'index'
        if event_dict["name"] == "index":
            rx.logger.info("Forwarding 'index'")
            index_mes = {
                "name": "index",
                "uuid": event_dict["uuid"],
                "token": event_dict["token"],
            }
            rx.send_message(rx.settings.pipelines.job_indexer_id,
                            index_mes,
                            retryMaxAttempts=10)
            # Disable this since it should be picked up via events-manager subscription
            # message_control_annotator(up_job, ["INDEXING"], rx)

        # Proxy 'indexed'
        elif event_dict["name"] == "indexed":
            rx.logger.info("Forwarding 'indexed'")
            index_mes = {
                "name": "indexed",
                "uuid": event_dict["uuid"],
                "token": event_dict["token"],
            }
            rx.send_message(rx.settings.pipelines.job_indexer_id,
                            index_mes,
                            retryMaxAttempts=10)
            # Disable this since it should be picked up via events-manager subscription
            # message_control_annotator(up_job, ["FINISHED"], rx)

        # Handle all other events
        else:
            rx.logger.info("Handling '{}'".format(event_dict["name"]))
            # Get the current state of the MPJ. We use this to detect if
            # handling the event has resulted in a change of state
            store_state = store.state
            last_event = store.last_event

            # Send event at the beginning of state change so subscribers can pick
            # up, for instance, a case where the job receives an index event and
            # is in the FINISHED state.
            if rx.settings.state_enter:
                forward_event(event_dict["uuid"], event_dict['name'],
                              store_state, {'last_event': last_event}, rx)

            up_job = store.handle(event_dict, cb_token)
            if rx.settings.state_exit:
                forward_event(up_job["uuid"], event_dict['name'],
                              up_job["state"],
                              {"last_event": up_job["last_event"]}, rx)

    except Exception as exc:
        rx.on_failure("Event not processed", exc)

    rx.on_success("Processed event in {} usec".format(rx.elapsed()))
示例#27
0
def format_ports(instance):
    try:
        split_port(instance)
    except ValueError as e:
        raise ValidationError(six.text_type(e))
    return True
示例#28
0
文件: base.py 项目: ri0t/isomer
def provisionList(items,
                  database_name,
                  overwrite=False,
                  clear=False,
                  skip_user_check=False):
    """Provisions a list of items according to their schema

    :param items: A list of provisionable items.
    :param database_name:
    :param overwrite: Causes existing items to be overwritten
    :param clear: Clears the collection first (Danger!)
    :param skip_user_check: Skips checking if a system user is existing already
           (for user provisioning)
    :return:
    """

    log("Provisioning", items, database_name, lvl=debug)

    def get_system_user():
        """Retrieves the node local system user"""

        user = objectmodels["user"].find_one({"name": "System"})

        try:
            log("System user uuid: ", user.uuid, lvl=verbose)
            return user.uuid
        except AttributeError as system_user_error:
            log("No system user found:", system_user_error, lvl=warn)
            log(
                "Please install the user provision to setup a system user or "
                "check your database configuration",
                lvl=error,
            )
            return False

    # TODO: Do not check this on specific objects but on the model (i.e. once)
    def needs_owner(obj):
        """Determines whether a basic object has an ownership field"""
        for privilege in obj._fields.get("perms", None):
            if "owner" in obj._fields["perms"][privilege]:
                return True

        return False

    import pymongo
    from isomer.database import objectmodels, dbhost, dbport, dbname

    database_object = objectmodels[database_name]

    log(dbhost, dbname)
    # TODO: Fix this to make use of the dbhost

    client = pymongo.MongoClient(dbhost, dbport)
    db = client[dbname]

    if not skip_user_check:
        system_user = get_system_user()

        if not system_user:
            return
    else:
        # TODO: Evaluate what to do instead of using a hardcoded UUID
        # This is usually only here for provisioning the system user
        # One way to avoid this, is to create (instead of provision)
        # this one upon system installation.
        system_user = "******"

    col_name = database_object.collection_name()

    if clear is True:
        log("Clearing collection for", col_name, lvl=warn)
        db.drop_collection(col_name)
    counter = 0

    for no, item in enumerate(items):
        new_object = None
        item_uuid = item["uuid"]
        log("Validating object (%i/%i):" % (no + 1, len(items)),
            item_uuid,
            lvl=debug)

        if database_object.count({"uuid": item_uuid}) > 0:
            log("Object already present", lvl=warn)
            if overwrite is False:
                log("Not updating item", item, lvl=warn)
            else:
                log("Overwriting item: ", item_uuid, lvl=warn)
                new_object = database_object.find_one({"uuid": item_uuid})
                new_object._fields.update(item)
        else:
            new_object = database_object(item)

        if new_object is not None:
            try:
                if needs_owner(new_object):
                    if not hasattr(new_object, "owner"):
                        log("Adding system owner to object.", lvl=verbose)
                        new_object.owner = system_user
            except Exception as e:
                log("Error during ownership test:",
                    e,
                    type(e),
                    exc=True,
                    lvl=error)
            try:
                new_object.validate()
                new_object.save()
                counter += 1
            except ValidationError as e:
                raise ValidationError(
                    "Could not provision object: " + str(item_uuid), e)

    log("Provisioned %i out of %i items successfully." % (counter, len(items)))
示例#29
0
def validate_n_rated_above_n_idle(n_idle_R, n_rated_R):
    if n_rated_R <= n_idle_R:
        raise ValidationError(
            f"{m.n_rated}({n_rated_R}) must be higher than {m.n_idle}({n_idle_R}!"
        )
示例#30
0
def checkSQLiRepurpose(inDict):
  if 'SQLi' in inDict.get('action',{}).get('hacking',{}).get('variety',[]):
    if 'Repurpose' not in inDict.get('attribute',{}).get('integrity',{}).get('variety',[]):
      raise ValidationError("action.hacking.SQLi present but Repurpose not in attribute.integrity.variety")
  return True
示例#31
0
def convert_caltech(schema,
                    encoding,
                    input_file,
                    verbose=True,
                    output=True,
                    output_file=None,
                    config={},
                    enforce_validation=True,
                    reactor=None):

    if reactor is not None:
        helper = AgaveHelper(reactor.client)
        print("Helper loaded")
    else:
        print("Helper not loaded")

    # for SBH Librarian Mapping
    sbh_query = SynBioHubQuery(SD2Constants.SD2_SERVER)
    sbh_query.login(config["sbh"]["user"], config["sbh"]["password"])

    # TODO sheet name may change?
    caltech_df = pandas.read_excel(input_file, sheet_name='IDs')

    output_doc = {}

    lab = SampleConstants.LAB_CALTECH

    output_doc[SampleConstants.LAB] = lab
    output_doc[SampleConstants.SAMPLES] = []

    # We don't navtively know which experiment contains which columns - they can all be different
    # Idea: build up a map that relates column names to mapping functions

    # columns for exp
    exp_columns = {}
    # column functions
    exp_column_functions = {}
    # exp measurement type
    exp_mt = {}
    # exp measurement key
    exp_mk = {}
    # exp relative path to files
    exp_rel_path = {}
    # exp column units
    exp_column_units = {}
    # time
    exp_time = {}
    # temp
    exp_temp = {}
    # flow cytometer channels, configuration and controls
    exp_cytometer_channels = {}
    exp_cytometer_configuration = {}
    exp_negative_controls = {}
    exp_positive_controls = {}

    flow_1 = "20181009-top-4-A-B-cell-variants-A-B-sampling-exp-1"
    exp_columns[flow_1] = ["well", "a", "b", "ba ratio", "atc", "iptg"]
    exp_column_functions[flow_1] = [
        SampleConstants.SAMPLE_ID, SampleConstants.STRAIN_CONCENTRATION,
        SampleConstants.STRAIN_CONCENTRATION, None,
        SampleConstants.REAGENT_CONCENTRATION,
        SampleConstants.REAGENT_CONCENTRATION
    ]
    exp_mt[flow_1] = [SampleConstants.MT_FLOW]
    exp_mk[flow_1] = ["0_flow"]
    exp_rel_path[flow_1] = ["0"]
    exp_time[flow_1] = ["0:hour"]
    exp_temp[flow_1] = ["37:celsius"]

    exp_cytometer_channels[flow_1] = [
        "FSC-A", "SSC-A", "CFP/VioBlue-A", "GFP/FITC-A"
    ]
    exp_cytometer_configuration[
        flow_1] = "agave://data-sd2e-projects.sd2e-project-21/ReedM-index/A_eq_B/20190214_A_eq_B_mar_1/20190214-A-B-mar-1-cc.json"
    exp_negative_controls[flow_1] = ["0/blank-RDM2019-02-14.0001.fcs"]
    exp_positive_controls[flow_1] = {}
    exp_positive_controls[flow_1]["CFP/VioBlue-A"] = [
        "0/bfp-RDM2019-02-14.0001.fcs"
    ]
    exp_positive_controls[flow_1]["GFP/FITC-A"] = [
        "0/yfp-RDM2019-02-14.0002.fcs"
    ]

    flow_2 = "20190214-A-B-mar-1"
    exp_columns[flow_2] = ["well", "iptg", "sal", "a", "b"]
    exp_column_functions[flow_2] = [
        SampleConstants.SAMPLE_ID, SampleConstants.REAGENT_CONCENTRATION,
        SampleConstants.REAGENT_CONCENTRATION,
        SampleConstants.STRAIN_CONCENTRATION,
        SampleConstants.STRAIN_CONCENTRATION
    ]
    exp_mt[flow_2] = [SampleConstants.MT_FLOW, SampleConstants.MT_FLOW]
    exp_mk[flow_2] = ["0_flow", "18_flow"]
    exp_rel_path[flow_2] = ["0_flow", "18_flow"]
    exp_column_units[flow_2] = [None, "micromole", "micromole", None, None]
    exp_time[flow_2] = ["0:hour", "18:hour"]
    exp_temp[flow_2] = ["37:celsius", "37:celsius"]

    exp_cytometer_channels[flow_2] = [
        "FSC-A", "SSC-A", "CFP/VioBlue-A", "GFP/FITC-A"
    ]
    exp_cytometer_configuration[
        flow_2] = "agave://data-sd2e-projects.sd2e-project-21/ReedM-index/A_eq_B/20190214_A_eq_B_mar_1/20190214-A-B-mar-1-cc.json"
    exp_negative_controls[flow_2] = ["0_flow/blank-RDM2019-02-14.0001.fcs"]
    exp_positive_controls[flow_2] = {}
    exp_positive_controls[flow_2]["CFP/VioBlue-A"] = ["0_flow/A5.csv"]
    exp_positive_controls[flow_2]["GFP/FITC-A"] = [
        "0_flow/yfp-RDM2019-02-14.0002.fcs"
    ]

    matched_exp_key = None
    matched_exp_cols = None
    matched_exp_functions = None
    header_row_values = list(caltech_df.columns.values)
    for exp_key in exp_columns:
        exp_col_list = exp_columns[exp_key]
        match_header = all(
            [header in header_row_values for header in exp_col_list])
        if match_header:
            matched_exp_key = exp_key
            matched_exp_cols = exp_col_list
            matched_exp_functions = exp_column_functions[exp_key]
            break
    if matched_exp_key == None:
        raise ValueError(
            "Could not match caltech experiment headers {}".format(input_file))

    # use the matched_exp_key as the reference
    output_doc[SampleConstants.EXPERIMENT_REFERENCE] = matched_exp_key
    map_experiment_reference(config, output_doc)

    # use matching exp key, e.g. 20181009-top-4-A-B-cell-variants-A--B-sampling-exp-1
    output_doc[SampleConstants.EXPERIMENT_ID] = namespace_experiment_id(
        matched_exp_key, lab)

    replicate_count = {}

    for caltech_index, caltech_sample in caltech_df.iterrows():

        measurement_key = exp_mk[matched_exp_key]

        for measurement_key_index, measurement_key_value in enumerate(
                measurement_key):

            # skip if this is a control
            skip = False

            sample_doc = {}
            contents = []
            well_id = None

            value_string = ""

            for index, column_name in enumerate(matched_exp_cols):
                value = caltech_sample[column_name]
                function = matched_exp_functions[index]

                if function == SampleConstants.SAMPLE_ID:
                    # 1:1 sample measurements
                    sample_doc[
                        SampleConstants.SAMPLE_ID] = namespace_sample_id(
                            value + "_" + str(measurement_key_index), lab,
                            output_doc)
                    well_id = value
                elif function == SampleConstants.STRAIN_CONCENTRATION:
                    # add as reagent with concentration value
                    # 'x' = not present/0
                    if value == 'x':
                        value = 0

                    contents.append(
                        create_media_component(
                            output_doc.get(SampleConstants.EXPERIMENT_ID),
                            column_name, column_name, lab, sbh_query, value))
                    # build up a string of values that define this sample
                    value_string = value_string + str(value)
                elif function == SampleConstants.REAGENT_CONCENTRATION:
                    if matched_exp_key in exp_column_units:
                        unit = exp_column_units[matched_exp_key][index]
                        value_unit = str(value) + ":" + str(unit)
                        contents.append(
                            create_media_component(
                                output_doc.get(SampleConstants.EXPERIMENT_ID),
                                column_name, column_name, lab, sbh_query,
                                value_unit))
                    else:
                        contents.append(
                            create_media_component(
                                output_doc.get(SampleConstants.EXPERIMENT_ID),
                                column_name, column_name, lab, sbh_query,
                                value))

                    value_string = value_string + str(value)
                elif function == None:
                    # skip
                    continue
                else:
                    raise ValueError("Unknown function {}".format(function))

            # have we seen this value before?
            if not value_string in replicate_count:
                replicate_count[value_string] = 0
                sample_doc[SampleConstants.REPLICATE] = 0
            else:
                replicate = replicate_count[value_string]
                replicate = replicate + 1
                replicate_count[value_string] = replicate

                sample_doc[SampleConstants.REPLICATE] = replicate

            if len(contents) > 0:
                sample_doc[SampleConstants.CONTENTS] = contents

            measurement_doc = {}
            measurement_doc[SampleConstants.FILES] = []
            measurement_doc[SampleConstants.MEASUREMENT_TYPE] = exp_mt[
                matched_exp_key][measurement_key_index]
            measurement_doc[
                SampleConstants.MEASUREMENT_NAME] = measurement_key_value

            # Fill in Flow information, if known
            if measurement_doc[SampleConstants.
                               MEASUREMENT_TYPE] == SampleConstants.MT_FLOW:
                if matched_exp_key in exp_cytometer_channels:
                    measurement_doc[
                        SampleConstants.
                        M_CHANNELS] = exp_cytometer_channels[matched_exp_key]
                if matched_exp_key in exp_cytometer_configuration:
                    measurement_doc[
                        SampleConstants.
                        M_INSTRUMENT_CONFIGURATION] = exp_cytometer_configuration[
                            matched_exp_key]

            if matched_exp_key in exp_time:
                time = exp_time[matched_exp_key][measurement_key_index]
                measurement_doc[SampleConstants.TIMEPOINT] = create_value_unit(
                    time)

            if SampleConstants.TEMPERATURE not in sample_doc:
                if matched_exp_key in exp_temp:
                    temp = exp_temp[matched_exp_key][measurement_key_index]
                    sample_doc[
                        SampleConstants.TEMPERATURE] = create_value_unit(temp)

            # generate a measurement id unique to this sample
            measurement_doc[
                SampleConstants.MEASUREMENT_ID] = namespace_measurement_id(
                    str(measurement_key_index + 1),
                    output_doc[SampleConstants.LAB], sample_doc, output_doc)

            # record a measurement grouping id to find other linked samples and files
            measurement_doc[SampleConstants.
                            MEASUREMENT_GROUP_ID] = namespace_measurement_id(
                                measurement_key_value,
                                output_doc[SampleConstants.LAB], sample_doc,
                                output_doc)

            # sample id -> well name -> filename.csv?
            # TODO this may not hold
            fn_well = well_id + ".csv"

            if matched_exp_key in exp_negative_controls:
                for negative_control in exp_negative_controls[matched_exp_key]:
                    if negative_control.endswith(fn_well):
                        skip = True

            if matched_exp_key in exp_positive_controls:
                for positive_control_channel in exp_positive_controls[
                        matched_exp_key]:
                    for positive_control in exp_positive_controls[
                            matched_exp_key][positive_control_channel]:
                        if positive_control.endswith(fn_well):
                            skip = True
            if skip:
                continue

            filename = os.path.join(
                exp_rel_path[matched_exp_key][measurement_key_index], fn_well)
            file_id = namespace_file_id(str(1),
                                        output_doc[SampleConstants.LAB],
                                        measurement_doc, output_doc)
            file_type = SampleConstants.infer_file_type(filename)
            measurement_doc[SampleConstants.FILES].append({
                SampleConstants.M_NAME:
                filename,
                SampleConstants.M_TYPE:
                file_type,
                SampleConstants.M_LAB_LABEL: [SampleConstants.M_LAB_LABEL_RAW],
                SampleConstants.FILE_ID:
                file_id,
                SampleConstants.FILE_LEVEL:
                SampleConstants.F_LEVEL_0
            })

            if SampleConstants.MEASUREMENTS not in sample_doc:
                sample_doc[SampleConstants.MEASUREMENTS] = []
            sample_doc[SampleConstants.MEASUREMENTS].append(measurement_doc)

            output_doc[SampleConstants.SAMPLES].append(sample_doc)

    # Add flow controls, if known
    if matched_exp_key in exp_negative_controls:
        for negative_control in exp_negative_controls[matched_exp_key]:
            create_flow_control_sample(negative_control, "negative flow control", \
                exp_cytometer_channels[matched_exp_key], exp_cytometer_configuration[matched_exp_key], output_doc, \
                    True, False, None)

    if matched_exp_key in exp_positive_controls:
        for positive_control_channel in exp_positive_controls[matched_exp_key]:
            for positive_control in exp_positive_controls[matched_exp_key][
                    positive_control_channel]:
                create_flow_control_sample(positive_control, "positive flow control", \
                    exp_cytometer_channels[matched_exp_key], exp_cytometer_configuration[matched_exp_key], output_doc, \
                        False, True, positive_control_channel)

    try:
        validate(output_doc, schema)

        if output is True or output_file is not None:
            if output_file is None:
                path = os.path.join("output/caltech",
                                    os.path.basename(input_file))
            else:
                path = output_file

            if path.endswith(".xlsx"):
                path = path[:-5] + ".json"

            with open(path, 'w') as outfile:
                json.dump(output_doc, outfile, indent=4)
        return True
    except ValidationError as err:
        if enforce_validation:
            if verbose:
                print("Schema Validation Error: {0}\n".format(err))
            raise ValidationError("Schema Validation Error", err)
        else:
            if verbose:
                print("Schema Validation Error: {0}\n".format(err))
            return False
        return False
示例#32
0
def validate_schema_postage(instance):
    if isinstance(instance, str):
        if instance not in ["first", "second", "europe", "rest-of-world"]:
            raise ValidationError(
                "invalid. It must be first, second, europe or rest-of-world.")
    return True