示例#1
0
def index_from_get_well_name(well_name, platesize):

    row_index = well_name_row_index(well_name)
    col_index = well_name_col_index(well_name)
    rows = get_rows(platesize)
    cols = get_cols(platesize)

    if row_index >= rows:
        raise ValidationError(
            key='well_name',
            msg='%r, row index: %d, is > rows: %d, for platesize: %d' %
            (well_name, row_index, rows, platesize))
    if col_index >= cols:
        raise ValidationError(
            key='well_name',
            msg='%r, col index: %d, is > cols: %d, for platesize: %d' %
            (well_name, col_index, cols, platesize))

    # Fill in cols by row then col
    # index = col_index * rows + row_index

    # Fill by cols just like the Screening lab does: col then row
    index = row_index * cols + col_index

    if index > platesize:
        raise ValidationError(key='well_name',
                              msg='%r, index: %d, is > platesize: %d' %
                              (well_name, index, platesize))
    return index
示例#2
0
def create_request_from_job(job_data, raw_data=''):

    if DEBUG_BACKGROUND is True:
        logger.info('create_request_from_job: %r', job_data)
    if raw_data:
        logger.info('raw_data: %r, %d', type(raw_data), len(raw_data))
        if len(raw_data) < 1000:
            logger.info('raw_data: %r', raw_data)
        else:
            logger.info('raw_data(trunc): %r', raw_data[:1000])
    request_factory = RequestFactory()

    path = job_data[JOB.URI]
    method = job_data[JOB.METHOD]
    encoding = job_data[JOB.ENCODING]
    content_type = job_data[JOB.CONTENT_TYPE]
    # For job processing, force response to JSON
    #     accept = urllib.unquote(job_data[JOB.HTTP_ACCEPT])
    accept = JSON_MIMETYPE
    comment = job_data[JOB.COMMENT]
    params = job_data[JOB.PARAMS]
    if params:
        params = json.loads(params)
    else:
        params = {}
    params[HEADER_APILOG_COMMENT_CLIENT] = comment
    params[HEADER_APILOG_COMMENT] = comment

    if raw_data:
        if DEBUG_BACKGROUND is True:
            logger.info('add raw data: %d', len(raw_data))
        if MULTIPART_MIMETYPE not in content_type:
            msg = 'content type must contain %r for raw data post: found: %r'\
                 % (MULTIPART_MIMETYPE, content_type)
            logger.error(msg)
            raise ValidationError(key=JOB.CONTENT_TYPE, msg=msg)
        if method != 'POST':
            errmsg = 'method %r is not %r, required for raw data post' % (
                method, 'POST')
            raise ValidationError(key=JOB.METHOD, msg=errmsg)
    else:
        if DEBUG_BACKGROUND is True:
            logger.info('no raw data to add')

    if DEBUG_BACKGROUND is True:
        logger.info('create_request_from_job content type: %r', content_type)
    request = request_factory.generic(method,
                                      path,
                                      data=raw_data,
                                      HTTP_ACCEPT=accept,
                                      content_type=content_type,
                                      **params)

    if DEBUG_BACKGROUND is True:
        logger.info('create_request_from_job: META: %r', request.META)
        logger.info('create_request_from_job: FILES: %r', request.FILES)

    return request
示例#3
0
def parse_wells_to_leave_empty(wells_to_leave_empty, plate_size):
    '''
    Parse the wells to leave empty field of the Cherry Pick Request.
    TODO: replace with parse_well_ranges
    '''

    logger.debug('raw wells_to_leave_empty: %r, plate_size: %r',
                 wells_to_leave_empty, plate_size)

    ncols = get_cols(plate_size)
    nrows = get_rows(plate_size)
    row_pattern = re.compile('row:\s*([a-zA-Z]{1,2})', flags=re.IGNORECASE)
    col_pattern = re.compile(r'col:\s*(\d{1,2})', flags=re.IGNORECASE)
    selections = re.split(r'\s*,\s*', wells_to_leave_empty)
    new_selections = []
    for selection in selections:
        colmatch = col_pattern.match(selection)
        if colmatch:
            col = int(colmatch.group(1))
            if col > ncols:
                raise ValidationError(key='wells_to_leave_empty',
                                      msg='column out of range: %d, %r' %
                                      (col, selection))
            new_selections.append('Col:%d' % col)
            continue
        rowmatch = row_pattern.match(selection)
        if rowmatch:
            row = letter_to_row_index(rowmatch.group(1))
            if row >= nrows:
                raise ValidationError(key='wells_to_leave_empty',
                                      msg='row out of range: %r, %r' %
                                      (rowmatch.group(1), selection))
            new_selections.append('Row:%s' % rowmatch.group(1).upper())
            continue
        wellmatch = WELL_NAME_PATTERN.match(selection)
        if wellmatch:
            new_selections.append(selection.upper())
            continue

        raise ValidationError(key='wells_to_leave_empty',
                              msg='unrecognized pattern: %r' % selection)
    logger.debug('new wells_to_leave_empty selections: %r', new_selections)

    decorated = []
    for wellname in new_selections:
        if 'Col:' in wellname:
            decorated.append((1, int(wellname.split(':')[1]), wellname))
        elif 'Row:' in wellname:
            decorated.append((2, wellname.split(':')[1], wellname))
        else:
            match = WELL_NAME_PATTERN.match(wellname)
            decorated.append((match.group(1), match.group(1), wellname))
    new_wells_to_leave_empty = [x[2] for x in sorted(decorated)]
    logger.debug('wells_to_leave_empty: %r', new_wells_to_leave_empty)

    return new_wells_to_leave_empty
示例#4
0
def assay_plate_available_wells(wells_to_leave_empty, plate_size):

    if plate_size not in ALLOWED_PLATE_SIZES:
        raise ValidationError(
            key='plate_size',
            msg=('plate_size: %d for assay_plate_available_wells, '
                 'not in allowed: %r' % (plate_size, ALLOWED_PLATE_SIZES)))
    available_wells = []

    # Parse and sanitize the wells_to_leave_empty (should be already done)
    wells_to_leave_empty_list = []
    if wells_to_leave_empty:
        wells_to_leave_empty_list = parse_wells_to_leave_empty(
            wells_to_leave_empty, plate_size)
    row_specifier = 'Row:%s'
    col_specifier = 'Col:%d'
    for i in range(0, plate_size):
        well_name = well_name_from_index(i, plate_size)
        wellmatch = WELL_NAME_PATTERN.match(well_name)
        row = wellmatch.group(1)
        col = int(wellmatch.group(2))
        if row_specifier % row in wells_to_leave_empty_list:
            continue
        if col_specifier % col in wells_to_leave_empty_list:
            continue
        if well_name in wells_to_leave_empty_list:
            continue
        available_wells.append(well_name)
    return available_wells
示例#5
0
 def get_value(member_name):
     logger.info('get_value: %r', member_name)
     if member_name.upper() in Collation.ordered_members:
         return Collation.ordered_members.index(member_name.upper())
     else:
         msg = 'must be one of %r' % Collation.ordered_members
         logger.warn('collation ' + msg)
         raise ValidationError(key='collation', msg=msg)
示例#6
0
def well_id_plate_number(well_id):
    ''' Get the plate_number from the well_id '''
    match = WELL_ID_PATTERN.match(well_id)
    if not match:
        raise ValidationError(key='well_id',
                              msg='%r Does not match pattern: %s' %
                              (well_id, WELL_ID_PATTERN.pattern))
    return int(match.group(1))
示例#7
0
def well_id_name(well_id):
    ''' Get the plate_number from the well_id '''
    match = WELL_ID_PATTERN.match(well_id)
    if not match:
        raise ValidationError(key='well_id',
                              msg='%r Does not match pattern: %s' %
                              (well_id, WELL_ID_PATTERN.pattern))
    wellrow = match.group(3).upper()
    wellcol = match.group(4)
    return '%s%s' % (wellrow, str(wellcol).zfill(2))
示例#8
0
def well_row_col(well_name):
    '''
    @return zero based (row_index,col_index)
    '''
    match = WELL_NAME_PATTERN.match(well_name)
    if not match:
        raise ValidationError(key='well_name',
                              msg='%r does not match pattern: %s' %
                              (well_name, WELL_NAME_PATTERN.pattern))
    return (letter_to_row_index(match.group(1)), int(match.group(2)) - 1)
示例#9
0
def parse_val(value, key, data_type, options=None):
    """
    All values are read as strings from the input files, so this function 
    converts them as directed.
    TODO: validation
    """
    try:
        if (value is None or value == '' or value == 'None' or value == u'None'
                or value == 'null' or value == u'n/a'):
            if data_type == 'string':
                return ''
            elif data_type == 'list':
                return []
            else:
                return None
        if data_type == 'string':
            return value
        elif data_type == 'integer':
            # todo: this is a kludge, create an integer from values like "5.0"
            return int(float(value))
        elif data_type == 'date':
            return dateutil.parser.parse(value).date()
        elif data_type == 'datetime':
            return dateutil.parser.parse(value)
        elif data_type == 'boolean':
            if value is True or value is False:
                return value
            value = str(value)
            if (value.lower() == 'true' or value.lower() == 't'
                    or value == '1'):
                return True
            return False
        elif data_type == 'float':
            return float(value)
        elif data_type == 'decimal':
            if isinstance(value, float):
                logger.warn('converting float: %r to decimal: %r', value,
                            Decimal(str(value)))
                value = str(value)
            return Decimal(value)
        elif data_type == 'list':
            if isinstance(value, six.string_types):
                if value.strip():
                    return (value, )  # convert string to list
                else:
                    return []
            return value  # otherwise, better be a list
        else:
            raise Exception('unknown data type: %s: "%s"' % (key, data_type))
    except Exception, e:
        logger.exception('value not parsed %r:%r', key, value)
        raise ValidationError(key=key, msg='parse error: %r' % str(e))
示例#10
0
def plate_size_from_plate_type(plate_type):
    '''
    Get the plate size from the current plate_type vocabulary:
    eppendorf_384
    costar_96
    abgene_384
    genetix_384
    marsh_384
    nunc_96
    eppendorf_96
    Note: plate_type must end with the plate size integer for this to work:
    FIXME: plate size determined by magic value embedded in plate_types
    '''
    parts = plate_type.split('_')
    if len(parts) != 2:
        raise ValidationError(key='plate_type',
                              msg='not a recognized type: %r' % plate_type)
    plate_size = int(parts[1])
    if plate_size not in ALLOWED_PLATE_SIZES:
        raise ValidationError(
            key='plate_type',
            msg='plate_size: %d for plate_type: %r, not in allowed: %r' %
            (plate_size, plate_type, ALLOWED_PLATE_SIZES))
    return plate_size
示例#11
0
def parse_columns(columns):
    '''
    Parse the Screen Result input file Data Columns sheet into valid API 
        Data Columns input.
    '''
    parsed_cols = OrderedDict()
    errors = {}
    for i,column in enumerate(columns):
        parsed_col = {
            'is_derived': False,
            'is_follow_up_data': False,
            'ordinal': i
        }
        logger.debug('parsing column: %r', column['data_worksheet_column'])
        if column['data_worksheet_column'] in parsed_cols:
            raise ValidationError(
                key='data_worksheet_column', 
                msg='%r is listed more than once' 
                    % column['data_worksheet_column'])
        parsed_cols[column['data_worksheet_column']] = parsed_col
        for key,val in column.items():
            if key == 'is_follow_up_data':
                parsed_col[key] = ( val and val.lower() == 'follow up')
            elif key == 'data_type':
                val = default_converter(val)
                # handle validation errors in the api
                if val not in DATA_TYPE_VALUES:
                    key = '%s:%s' % (column['data_worksheet_column'],'data_type')
                    errors[key] = 'val: %r must be one of %r' % (val,DATA_TYPE_VALUES)
                parsed_col[key] = val
            elif key == 'assay_readout_type':
                parsed_col[key] = default_converter(val)
            else:
                if key == 'how_derived':
                    parsed_col['is_derived'] = ( 
                        val is not None and val.strip() is not '' )
                parsed_col[key] = val
        
        if parsed_col.get('decimal_places') is not None:
            try:
                key = '%s:%s' % (column['data_worksheet_column'],'data_type')
                column['decimal_places'] = parse_val(
                    column['decimal_places'],key,'integer')
            except ValidationError, e:
                errors.update(e.errors)
        logger.debug('parsed_col: %r', parsed_col)
示例#12
0
def parse_copywell_id(pattern):

    parts = pattern.split('/')

    if len(parts) < 3:
        raise ValidationError(key='copywell_id',
                              msg='Invalid pattern: must contain '
                              '"library_short_name/copy_name/well_id"')
    else:
        library_short_name = parts[0]
        copy_name = parts[1]
        _well_id = parts[2]
        plate_number = well_id_plate_number(_well_id)
        well_name = well_id_name(_well_id)

        return (copy_name, plate_number, well_id(plate_number,
                                                 well_name), well_name)
示例#13
0
def transform(input_matrices, counter, aps, lps):

    assert aps in ALLOWED_MATRIX_SIZES, \
        ('assay_plate_size must be one of %r' % ALLOWED_MATRIX_SIZES)
    assert lps in ALLOWED_MATRIX_SIZES, \
        ('library_plate_size must be one of %r' % ALLOWED_MATRIX_SIZES)

    if aps < lps:
        logger.info('convolute matrices')
        factor = lps / aps
        if factor != 4:
            msg = (
                'Convolute: library_plate_size/assay_plate_size != 4: %d/%d' %
                (aps, lps))
            raise ValidationError({
                'assay_plate_size': msg,
                'library_plate_size': msg
            })
        if len(input_matrices) % 4 != 0:
            msg = 'Convolute: input matrix array must contain a multiple of 4 members'
            raise ValidationError({
                'assay_plate_size': msg,
                'library_plate_size': msg
            })
        # Create an adjusted counter to match the input:
        # - add quadrant counter to the right of plate counter
        new_counter_hash = OrderedDict()
        for key, value in counter.counter_hash.items():
            new_counter_hash[key] = value
            if key == 'plate':
                new_counter_hash['quadrant'] = [0, 1, 2, 3]
        counter96 = Counter(new_counter_hash)
        logger.info('counter96: %r', counter96)
        if counter96.size() != len(input_matrices):
            raise ProgrammingError(
                'input_matrices length (%d) must match '
                'the counter length with 4 quadrants: (%d)' %
                (len(input_matrices), counter96.size()))

        # - Create blank output matrices
        convoluted_matrices = [
            lims_utils.create_blank_matrix(lps)
            for x in range(0,
                           len(input_matrices) / 4)
        ]

        # Iterate through output (384) matrices and find the 96 matrix values
        # NOTE: could also start by iterating through input matrices
        for index, matrix in enumerate(convoluted_matrices):
            readout = counter.get_readout(index)
            for rownum, row in enumerate(matrix):
                for colnum in range(0, len(row)):
                    input_quadrant = lims_utils.deconvolute_quadrant(
                        lps, aps, rownum, colnum)
                    readout96 = dict(readout, quadrant=input_quadrant)
                    logger.debug(
                        'index: %d, 384 readout: %r, quadrant: %d, 96: %r',
                        index, readout, input_quadrant, readout96)
                    logger.debug('counter96: %r' % counter96.counter_hash)
                    input_index = counter96.get_index(readout96)
                    input_row = lims_utils.deconvolute_row(
                        lps, aps, rownum, colnum)
                    input_col = lims_utils.deconvolute_col(
                        lps, aps, rownum, colnum)
                    logger.debug('find: index: %d, cell: [%d][%d]',
                                 input_index, input_row, input_col)
                    row[colnum] = input_matrices[input_index][input_row][
                        input_col]

        return convoluted_matrices

    elif lps < aps:
        logger.info('deconvolute matrices')
        factor = aps / lps
        if factor != 4:
            msg = (
                'Deconvolute: assay_plate_size/library_plate_size != 4: %d/%d'
                % (aps, lps))
            raise ValidationError({
                'assay_plate_size': msg,
                'library_plate_size': msg
            })
        # Create an adjusted counter to match the input
        plates = counter.counter_hash.get('plate')
        logger.info('plates: %r', plates)
        if len(plates) % 4 != 0:
            msg = 'Deconvolute: plate count must be a multiple of 4: %d' % len(
                plates)
            raise ValidationError({'plate_ranges': msg})

        plates_1536 = OrderedDict()
        for i, plate in enumerate(plates):
            plate_number_1536 = i / 4
            if plate_number_1536 not in plates_1536:
                plates_1536[plate_number_1536] = []
            plates_1536[plate_number_1536].append(plate)
        logger.info('plates_1536: %r', plates_1536)
        new_counter_hash = counter.counter_hash.copy()
        new_counter_hash['plate'] = plates_1536.keys()
        counter1536 = Counter(new_counter_hash)

        # Create blank output matrices
        deconvoluted_matrices = [
            None for x in range(0,
                                len(input_matrices) * 4)
        ]
        # Iterate through input (1536) matrices and find the output 384 matrix value
        for index, matrix in enumerate(input_matrices):
            readout1536 = counter1536.get_readout(index)
            plate1536 = readout1536['plate']

            # Convert each 1536 plate separately, and find the output matrix position
            output_384_matrices = lims_utils.deconvolute_matrices([matrix],
                                                                  aps, lps)

            for quadrant, matrix384 in enumerate(output_384_matrices):
                plate384 = plates_1536[plate1536][quadrant]
                readout384 = dict(readout1536, plate=plate384)
                index384 = counter.get_index(readout384)

                deconvoluted_matrices[index384] = matrix384

        return deconvoluted_matrices

    else:
        return input_matrices
示例#14
0
def parse_result_row(i,parsed_columns,result_row):    
    '''
    Parse the Screen Result input file format into a valid API input format:
    - Convert plate_number and well_name into a well_id
    - Convert the assay_well_control_type input:
        use the ASSAY_WELL_CONTROL_TYPES to map api schema assaywell.control_type
    - Convert the exclude column specifiers into known column letters:
        "all" is converted to a list of all column letters
    - Parse value columns according to the data_type specified:
        - Create default values for positive columns
        - (TODO: validation rules can be moved to API)
        - Verify that PARTITION_POSITIVE_MAPPING values are used
        - Verify that CONFIRMED_POSITIVE_MAPPING values are used
        - Verify that integer values are integers
        - Verify that decimal values can be parsed as float
    '''
    logger.debug(
        'parse result row: %d, %r:  %r', i, parsed_columns.keys(), result_row)
    
    meta_columns = RESULT_VALUE_FIELD_MAP.values()
    parsed_row = {}
    excluded_cols = []

    well_id_errors = []
    meta_key = 'plate_number'
    val = result_row[meta_key]
    logger.debug('plate value to parse: %r', val)
    plate_number = parse_val(val, meta_key, 'integer')
    if plate_number is None:
        well_id_errors.append('%s is required' % meta_key)
    meta_key = 'well_name'
    val = result_row[meta_key]
    if not val:
        well_id_errors.append('%s is required' % meta_key)
    elif WELL_NAME_PATTERN.match(val):
        wellname = val
    else:
        well_id_errors.append('Well_name val %r does not follow the pattern: %r'
            % (val, WELL_NAME_PATTERN.pattern))
    if well_id_errors:
        raise ParseError(errors={ 'row: %d'%i: well_id_errors })
    
    parsed_row['well_id'] = \
        '%s:%s' % (str(plate_number).zfill(5), wellname)
    
    meta_key = 'assay_well_control_type'
    val = result_row.get(meta_key)
    parsed_row[meta_key] = None
    if val is not None:
        if val.lower() in ASSAY_WELL_CONTROL_TYPES:
            parsed_row[meta_key] = \
                ASSAY_WELL_CONTROL_TYPES[val.lower()]
        else:
            msg = ('%s: val %r is not one of the choices: %r'
                % (meta_key, val, ASSAY_WELL_CONTROL_TYPES))
            logger.error(msg)
            raise ValidationError(key=parsed_row['well_id'], msg=msg)

    meta_key = 'exclude'
    val = result_row.get(meta_key)
    if val is not None:
        if val.lower() == 'all':
            excluded_cols = parsed_columns.keys()
        else:
            excluded_cols = [x.strip().upper() for x in val.split(',')]
            unknown_excluded_cols = (
                set(excluded_cols) - set(parsed_columns.keys()))
            if unknown_excluded_cols:
                raise ValidationError(
                    key = parsed_row['well_id'],
                    msg = 'unknown excluded cols: %r' % unknown_excluded_cols )
            parsed_row[meta_key] = excluded_cols
            
    for colname, raw_val in result_row.items():
        logger.debug('colname: %r, raw_val: %r', colname, raw_val)
        if colname in meta_columns:
            continue
        if colname not in parsed_columns:
            # NOTE: this is no longer an error, as the result value sheet may
            # contain extra columns (selected by user on output)
            logger.debug(
                'result value column %r is not in recognized columns: %r', 
                colname, parsed_columns.keys())
            parsed_row[colname] = raw_val
            continue
        column = parsed_columns[colname]
        if raw_val is None:
            # 20180315 - verified with DJW, default values for
            # positive indicator columns
            if column['data_type'] == DATA_TYPE.BOOLEAN_POSITIVE:
                raw_val = False
            elif column['data_type'] == DATA_TYPE.PARTITIONED_POSITIVE:
                raw_val = 'NP'
            elif column['data_type'] == DATA_TYPE.CONFIRMED_POSITIVE:
                raw_val = 'NT'
            else:
                continue
        
        key = '%s-%s' % (parsed_row['well_id'],colname)
        parsed_row[colname] = raw_val
        
        if column['data_type']  in DATA_TYPE.numeric_types:
            if  column['decimal_places'] > 0:
                # parse, to validate only; use decimal for final parsing
                parse_val(raw_val, key, 'float')
            else:
                parsed_row[colname] = parse_val(raw_val, key, 'integer')
                
        elif column['data_type'] == DATA_TYPE.PARTITIONED_POSITIVE:
            val = raw_val.upper()
            if val not in PARTITION_POSITIVE_MAPPING:
                raise ValidationError(
                    key=key, 
                    msg='val: %r must be one of %r'
                        % (raw_val, PARTITION_POSITIVE_MAPPING.keys()))
            parsed_row[colname] = val
        elif column['data_type'] == DATA_TYPE.CONFIRMED_POSITIVE:
            val = raw_val.upper()
            if val not in CONFIRMED_POSITIVE_MAPPING:
                raise ValidationError(
                    key=key, 
                    msg='val: %r must be one of %r'
                        % (raw_val, CONFIRMED_POSITIVE_MAPPING.keys()))
            parsed_row[colname] = val
        elif column['data_type'] == DATA_TYPE.BOOLEAN_POSITIVE:
            val = parse_val(raw_val,key,'boolean')
            parsed_row[colname] = val
        logger.debug('parsed_row: %r', parsed_row)
    
    return parsed_row
示例#15
0
            else:
                if key == 'how_derived':
                    parsed_col['is_derived'] = ( 
                        val is not None and val.strip() is not '' )
                parsed_col[key] = val
        
        if parsed_col.get('decimal_places') is not None:
            try:
                key = '%s:%s' % (column['data_worksheet_column'],'data_type')
                column['decimal_places'] = parse_val(
                    column['decimal_places'],key,'integer')
            except ValidationError, e:
                errors.update(e.errors)
        logger.debug('parsed_col: %r', parsed_col)
    if errors:
        raise ValidationError(errors={'Data Columns': errors})
    
    logger.debug('parsed cols: %r', parsed_cols)
    return parsed_cols
        
def result_value_field_mapper(header_row, parsed_columns):
    '''
    Parse the Screen Result input file result sheet headers into the valid API 
        result value input headers using the RESULT_VALUE_FIELD_MAP
    '''
    if DEBUG_IMPORTER:
        logger.info('map result value header row... %r', parsed_columns.keys())
    mapped_row = []
    header_row = [x for x in header_row]
    for i,value in enumerate(header_row):
        if not value:
示例#16
0
def execute_from_python(job_id, sbatch=False):
    '''
    Utility method to invoke from the running server.
    
    @see settings.BACKGROUND_PROCESSOR
    
    @param sbatch if true, requires "sbatch_settings" in the 
    BACKGROUND_PROCESSOR settings
    @param keep_stdout (for testing) set True to use STDOUT (for non-sbatch only)
    '''
    logger.info('using settings.BACKGROUND_PROCESSOR: %r',
                settings.BACKGROUND_PROCESSOR)

    check_settings = set([
        'post_data_directory', 'job_output_directory', 'credential_file',
        'python_environ_script', 'background_process_script'
    ])

    if not check_settings.issubset(set(settings.BACKGROUND_PROCESSOR.keys())):
        raise ValidationError(
            key='settings.BACKGROUND_PROCESSOR',
            msg='missing required entries: %s' %
            (check_settings - set(settings.BACKGROUND_PROCESSOR.keys())))

    job_output_dir = settings.BACKGROUND_PROCESSOR['job_output_directory']
    if not os.path.exists(job_output_dir):
        os.makedirs(job_output_dir)
    credential_file = settings.BACKGROUND_PROCESSOR['credential_file']
    python_environ_script = settings.BACKGROUND_PROCESSOR[
        'python_environ_script']
    if not os.path.exists(python_environ_script):
        raise InformationError(key='python_environ_script',
                               msg='file does not exist: %r' %
                               python_environ_script)
    # background_process_script = settings.BACKGROUND_PROCESSOR['background_process_script']
    background_process_script = os.path.abspath(__file__)
    logger.info('this file: %r', background_process_script)

    output_stdout = '%d.stdout' % job_id
    output_stdout = os.path.abspath(os.path.join(job_output_dir,
                                                 output_stdout))
    output_stderr = '%d.stderr' % job_id
    output_stderr = os.path.abspath(os.path.join(job_output_dir,
                                                 output_stderr))

    run_sh_args = [
        python_environ_script, background_process_script, '--job_id',
        str(job_id), '--c', credential_file
    ]
    full_args = []

    if sbatch is True:
        os.putenv('USER', 'sde4')
        full_args.append('/usr/local/bin/sbatch')

        sbatch_settings = settings.BACKGROUND_PROCESSOR.get('sbatch_settings')
        if sbatch_settings is None:
            raise InformationError(
                key='sbatch_settings',
                msg='missing from the BACKGROUND_PROCESSOR settings')

        sbatch_settings['output'] = output_stdout
        sbatch_settings['error'] = output_stderr
        sbatch_settings['job-name'] = 'ss_{}'.format(job_id)
        sbatch_args = []
        for k, v in sbatch_settings.items():
            sbatch_args.extend(['--%s=%s' % (k, '%s' % str(v))])
        full_args.extend(sbatch_args)
        full_args.append('-vvv')

    full_args.extend(run_sh_args)

    logger.info('full args: %r', full_args)

    if sbatch is True:
        logger.info('sbatch specified, invoke sbatch and wait for output...')
        logger.info('full command %s: ', ' '.join(full_args))
        try:
            output = \
                subprocess.check_output(full_args, stderr=subprocess.STDOUT)
            logger.info('ran, output: %r', output)
            # TODO: parse the SLURM process ID from the output
            return output
        except subprocess.CalledProcessError, e:
            logger.error('subprocess.CalledProcessError: output: %r', e.output)
            raise
示例#17
0
def create_output_data(screen_facility_id, fields, result_values ):
    '''
    Translate Screen Result data into a data structure ready for Serialization:
    {
       'Screen Info': [ [ row1 ], [ row2 ]...].
       'Data Columns': [ [ row1 ], [ row2 ]...].
       'Data': [ [ row1 ], [ row2 ]...].
    }
    @param fields an iterable containing result_value data_column dicts and 
        field information dicts for the non-result value columns
    @param result_values an iterable containing result_value dicts
    '''
    logger.info('create screen result data structure for %r', screen_facility_id)
    control_type_mapping = {v:k for k,v in ASSAY_WELL_CONTROL_TYPES.items()}

    data = OrderedDict()
    
    data['Screen Info'] = { 'Screen Number': screen_facility_id }
    
    data_column_structure = []
    data['Data Columns'] = data_column_structure
    
    datacolumn_labels = DATA_COLUMN_FIELD_MAP.keys()
    data_columns = []
    data_column_names = []
    other_columns = []
    for key,field in fields.items(): 
        if ( field.get('is_datacolumn',False) 
            or field.get('data_worksheet_column', None)):
            data_columns.append(key)
            data_column_names.append(field['name'])
        elif ( key not in ['well_id', 'plate_number','well_name',
                           'screen_facility_id', 'assay_well_control_type']
               and key not in RESULT_VALUE_FIELD_MAP.keys() ):
            other_columns.append(key)
    data_columns = sorted(data_columns, key=lambda x: fields[x]['ordinal'])
    other_columns = sorted(other_columns, key=lambda x: fields[x]['ordinal'])
    data_column_names_to_col_letter = { 
        dc:xl_col_to_name(len(RESULT_VALUE_FIELD_MAP)+i) 
            for (i,dc) in enumerate(data_column_names) }
    logger.info('data columns: %r, other_columns: %r', data_columns, other_columns)
    
    # Transpose the field definitions into the output data_column sheet:
    # Row 0 - "Data" Worksheet Column
    # Row 1 - name
    # Row 2 - data_type
    # Row N - other data column fields
    # Column 0 - data column field label
    # Column 1-N data column values
    header_row = [datacolumn_labels[0]]
    header_row.extend([xl_col_to_name(len(RESULT_VALUE_FIELD_MAP)+i) 
        for i in range(len(data_columns))])
    logger.debug('header_row: %r', header_row)
    for i,(sheet_label,sheet_key) in enumerate(
            DATA_COLUMN_FIELD_MAP.items()[1:]):
        row = [sheet_label]
        for j,key in enumerate(data_columns):
            val = fields[key].get(sheet_key, None)
            if sheet_key == 'data_type':
                val = fields[key].get(
                    'assay_data_type',fields[key].get('data_type',None))
            if val:
                if sheet_key == 'is_follow_up_data':
                    if val == True:
                        val = 'Follow up'
                    elif val == False:
                        val = 'Primary'
                elif sheet_key == 'derived_from_columns':
                    if fields[key].get('screen_facility_id', None) == screen_facility_id:
                        logger.info('Translate derived_from_columns: %r', val)
                        if not set(data_column_names_to_col_letter.keys()).issuperset(set(val)):
                            raise ValidationError(
                                key='derived_from_columns', 
                                msg=('col: %r, values: %r are not in %r'
                                    %(key,val,data_column_names_to_col_letter.keys())))
                        val = ', '.join(
                            [data_column_names_to_col_letter[dc_name] for dc_name in val])
                    else:
                        # Manually serialize using commas
                        val = ', '.join(val)
                row.append(val)
            else:
                row.append(None)
                logger.debug(
                    'Note: sheet key not found in schema field: %r, %r', 
                    sheet_key, fields[key])
        logger.debug('data column row: %r', row)
        data_column_structure.append(OrderedDict(zip(header_row,row)))
        
    def result_value_generator(result_values):
        
        logger.info('Write the result values sheet')
        header_row = []
        header_row.extend(RESULT_VALUE_FIELD_MAP.keys())
        # TODO: allow column titles to be optional
        header_row.extend([fields[key].get('title', key) for key in data_columns])
        header_row.extend(other_columns)

        row_count = 0
        for result_value in result_values:
            row_count += 1
            row = []
            
            row.extend(result_value['well_id'].split(':'))
            if ( result_value.has_key('assay_well_control_type')
                 and result_value['assay_well_control_type'] ):
                control_type = default_converter(result_value['assay_well_control_type'])
                # note: "empty", "experimental", "buffer" are values that can be
                # found in this column, due to legacy data entry, but they are 
                # not valid
                if control_type in control_type_mapping:
                    row.append(control_type_mapping[control_type])
                else:
                    row.append(None)
            else:
                row.append(None)
            excluded_cols = []
            if result_value.has_key('exclude') and result_value['exclude']:
                temp = result_value['exclude']
                if hasattr(temp, 'split'):
                    temp = temp.split(LIST_DELIMITER_SQL_ARRAY)
                logger.debug('excluded data_columns: find %r, in %r', temp, data_columns)    
                for data_column_name in temp:
                    excluded_cols.append(get_column_letter(
                        len(RESULT_VALUE_FIELD_MAP)+1
                            +data_columns.index(data_column_name)))
                    excluded_cols = sorted(excluded_cols)
            row.append(','.join(excluded_cols))
            
            for j,key in enumerate(data_columns):
                if result_value.has_key(key):
                    row.append(result_value[key])
                else:
                    row.append(None)
            # append the non-result value columns to the end of the row
            for j,key in enumerate(other_columns):
                if result_value.has_key(key):
                    row.append(result_value[key])
            
            if row_count % 10000 == 0:
                logger.info('wrote %d rows', row_count)
            yield OrderedDict(zip(header_row,row))
    
    data['Data'] = result_value_generator(result_values)

    return data
示例#18
0
def parse_result_row(i,parsed_columns,result_row):    
    
    logger.debug('parse result row: %r', result_row)
    
    meta_columns = RESULT_VALUE_FIELD_MAP.values()
    parsed_row = {}
    excluded_cols = []
    
    meta_key = 'plate_number'
    val = result_row[meta_key]
    logger.debug('plate value to parse: %r', val)
    plate_number = parse_val(val, meta_key, 'integer')
    meta_key = 'well_name'
    val = result_row[meta_key]
    if WELLNAME_MATCHER.match(val):
        wellname = val
    else:
        raise ParseError(
            key=i, 
            msg=('well_name val %r does not follow the pattern: %r'
            % (val, WELLNAME_MATCHER.pattern))) 
    parsed_row['well_id'] = \
        '%s:%s' % (str(plate_number).zfill(5), wellname)
    
    meta_key = 'assay_well_control_type'
    val = result_row.get(meta_key, None)
    parsed_row[meta_key] = None
    if val:
        if val.lower() in ASSAY_WELL_CONTROL_TYPES:
            parsed_row[meta_key] = \
                ASSAY_WELL_CONTROL_TYPES[val.lower()]
        else:
            msg = ('%s: val %r is not one of the choices: %r'
                % (meta_key, val, ASSAY_WELL_CONTROL_TYPES))
            logger.error(msg)
            raise ValidationError(key=parsed_row['well_id'], msg=msg)

    meta_key = 'exclude'
    val = result_row.get(meta_key, None)
    if val:
        if val.lower() == 'all':
            excluded_cols = parsed_columns.keys()
        else:
            excluded_cols = [x.strip().upper() for x in val.split(',')]
            unknown_excluded_cols = (
                set(excluded_cols) - set(parsed_columns.keys()))
            if unknown_excluded_cols:
                raise ValidationError(
                    key = parsed_row['well_id'],
                    msg = 'unknown excluded cols: %r' % unknown_excluded_cols )
            parsed_row[meta_key] = excluded_cols
            
    for colname, raw_val in result_row.items():
        if colname in meta_columns:
            continue
        if colname not in parsed_columns:
            # NOTE: this is no longer an error, as the result value sheet may
            # contain extra columns (selected by user on output)
            logger.debug(
                'result value column %r is not in recognized columns: %r', 
                colname, parsed_columns.keys())
            parsed_row[colname] = raw_val
            continue
        column = parsed_columns[colname]
        if ( column['data_type'] == 'partition_positive_indicator'
            and not raw_val):
            raw_val = 'NP' 
        if ( column['data_type'] == 'confirmed_positive_indicator'
            and not raw_val):
            raw_val = 'NT' 
        if raw_val is None:
            continue
        
        key = '%s-%s' % (parsed_row['well_id'],colname)
        parsed_row[colname] = raw_val
        
        if column['data_type'] == 'numeric':
            if  column['decimal_places'] > 0:
                # parse, to validate
                parse_val(raw_val, key, 'float')
            else:
                parsed_row[colname] = parse_val(raw_val, key, 'integer')
        elif column['data_type'] == 'partition_positive_indicator':
            val = raw_val.upper()
            if val not in PARTITION_POSITIVE_MAPPING:
                raise ValidationError(
                    key=key, 
                    msg='val: %r must be one of %r'
                        % (raw_val, PARTITION_POSITIVE_MAPPING.keys()))
            parsed_row[colname] = val
        elif column['data_type'] == 'confirmed_positive_indicator':
            val = raw_val.upper()
            if val not in CONFIRMED_POSITIVE_MAPPING:
                raise ValidationError(
                    key=key, 
                    msg='val: %r must be one of %r'
                        % (raw_val, CONFIRMED_POSITIVE_MAPPING.keys()))
            parsed_row[colname] = val
        elif column['data_type'] == 'boolean_positive_indicator':
            val = parse_val(raw_val,key,'boolean')
            parsed_row[colname] = val
            
    return parsed_row