Exemplo n.º 1
0
    def test_create_small_molecule(self):
        """
        Tests that we can create a small molecule entity.
        """

        facilityId = "HMSL10001"
        sm = SmallMolecule(facility_id=facilityId)
        sm.save()

        print "New ID: %d" % sm.id
        self.assertTrue(sm.id is not None, "No ID was assigned")

        sm2 = SmallMolecule.objects.get(pk=sm.id)
        self.assertTrue(sm2 is not None, "Couldn't find the object")
        self.assertTrue(sm2.facility_id == facilityId)

        print "Created the Small Molecule: ", sm
Exemplo n.º 2
0
 def test_create_small_molecule(self):
     """
     Tests that we can create a small molecule entity.
     """
     
     facilityId = "HMSL10001"
     sm = SmallMolecule(facility_id=facilityId)
     sm.save()
     
     print "New ID: %d" % sm.id
     self.assertTrue(sm.id is not None, "No ID was assigned")
     
     sm2 = SmallMolecule.objects.get(pk=sm.id)
     self.assertTrue(sm2 is not None, "Couldn't find the object")
     self.assertTrue(sm2.facility_id == facilityId)
     
     print "Created the Small Molecule: ", sm
Exemplo n.º 3
0
 def test_create_multiple_small_molecules(self):
     """
     Tests that we can create a few small molecule objects in the db, and select a subset
     """
     
     facilityIdBase = "HMSL"
     
     for x in range(100) :
         sm = SmallMolecule(facility_id=facilityIdBase + str(10000+x),pub_date=timezone.now())
         sm.save()
     
     self.assertEqual(100, SmallMolecule.objects.all().count(), "Should be 100 items, but was %d" % SmallMolecule.objects.all().count())
     
     # read back a small sample
     resultSet = SmallMolecule.objects.filter(id__lt=50)
     
     self.assertEquals(resultSet.count(), 49, "Actual size of subqery return is %d" % resultSet.count())
     
     for y in resultSet :
         print "Small Molecule: id: %d, facilityId: %s, pubDate: %s" % ( y.id, y.facility_id, y.pub_date )
Exemplo n.º 4
0
 def test_create_small_molecule(self):
     """
     Tests that we can create a small molecule entity.
     """
     
     facilityId = "HMSL10001"
     pubDate = timezone.now()
     sm = SmallMolecule(facility_id=facilityId,pub_date=pubDate)
     sm.save()
     
     print "New ID: %d" % sm.id
     self.assertTrue(sm.id is not None, "No ID was assigned")
     
     id = sm.id
     
     sm2 = SmallMolecule.objects.get(pk=id)
     
     self.assertTrue(sm is not None, "Couldn't find the object")
     self.assertTrue(sm.facility_id == facilityId)
     self.assertTrue(sm.pub_date == pubDate)
     
     print "Created the Small Molecule: id: %d, facilityId: %s, pubDate: %s" % ( sm.id, sm.facility_id, sm.pub_date )
Exemplo n.º 5
0
 def build_schema(self):
     schema = super(SmallMoleculeResource,self).build_schema()
     schema['fields'] = get_detail_schema(SmallMolecule(),['smallmolecule'])
     return schema 
def main(path):
    """
    Read in the sdf file
    """
    # map field labels to model fields
    properties = ('model_field','required','default','converter')
    get_primary_name = lambda x: x.split(';')[0].strip()
    get_alternate_names = lambda x: ';'.join([x.strip() for x in x.split(';')[1:]])
    
    labels = { s2p.MOLDATAKEY:('molfile',True),
              # NOTE: even though these db field are not integers, 
              # it is convenient to convert the read in values to INT to make sure they are not interpreted as float values
               'facility_reagent_id': ('facility_id',True,None, lambda x: util.convertdata(x[x.index('HMSL')+4:],int)), 
               'salt_id': ('salt_id',True,None, lambda x: util.convertdata(x,int)),
               'lincs_id':('lincs_id',False), #None,lambda x:util.convertdata(x,int)),
               'chemical_name':('name',True),
               'alternative_names':'alternative_names',
               'pubchem_cid':'pubchem_cid',
               'chembl_id':'chembl_id',
               'chebi_id':'chebi_id',
               'inchi':'_inchi',
               'inchi_key':'_inchi_key',
               'smiles': ('_smiles',True),
               'molecular_mass':('_molecular_mass',False,None, lambda x: round(util.convertdata(x, float),2)),
               'molecular_formula':'_molecular_formula',
               'software':'software',
               # 'concentration':'concentration',
               #'well_type':('well_type',False,'experimental'),
               'is_restricted':('is_restricted',False,False,util.bool_converter)}
    # convert the labels to fleshed out dict's, with strategies for optional, default and converter
    labels = util.fill_in_column_definitions(properties,labels)
    
    assert typecheck.isstring(path)
    with open(path) as fh:
        data = fh.read().decode(DEFAULT_ENCODING)

    records = s2p.parse_sdf(data)
    logger.info(str(('read rows: ', len(records))))
    
    count = 0
    for record in records:
        logger.debug(str(('record', record)))
        initializer = {}
        for key,properties in labels.items():
            logger.debug(str(('look for key: ', key, ', properties: ', properties)))
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']
            
            value = record.get(key)

            # Todo, refactor to a method
            try:
                logger.debug(str(('raw value', value)))
                if(converter != None):
                    value = converter(value)
                if(value == None ):
                    if( default != None ):
                        value = default
                if(value == 'n/a'): value = None
                if(value == None and  required == True):
                    raise Exception(str(('Field is required: ', key, initializer, 'record:', count)))
                logger.debug(str(('model_field: ' , model_field, ', value: ', value)))
                initializer[model_field] = value
            except Exception, e:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]      
                logger.error(str((exc_type, fname, exc_tb.tb_lineno)))
                logger.error(str(('invalid input', e, 'count', count)))
                raise e
        # follows is a kludge, to split up the entered "chemical_name" field, on ';' - TODO: just have two fields that get entered
        if(initializer['name']):
            initializer['alternative_names']=get_alternate_names(initializer['name'])
            initializer['name']=get_primary_name(initializer['name'])
                
        if(logger.isEnabledFor(logging.DEBUG)): logger.debug(str(('initializer: ', initializer)))
        try:
            sm = SmallMolecule(**initializer)
            sm.save()
            logger.info(str(('sm created:', sm)))
            count += 1
        except Exception, e:
            logger.error(str(('save failed for: ', initializer, 'error',e, 'count: ', count)))
            raise e
Exemplo n.º 7
0
    filename = path.basename(inputFile)
    attachedFile = AttachedFile(filename=filename,
                                facility_id_for=facilityId,
                                relative_path=relativePath,
                                is_restricted=args.isRestricted)
    # lookup the Entity

    if (facilityId <= 30000):  # SM or Screen
        logger.info('look for the small molecule for:' + str(facilityId))
        saltId = util.int_converter(args.saltId)
        if (saltId is not None):
            logger.info('look for the small molecule for saltId ' +
                        str(saltId))
            try:
                sm = SmallMolecule(facility_id=facilityId, salt_id=saltId)
                attachedFile.salt_id_for = saltId
                batchId = util.int_converter(args.batchId)
                if (batchId is not None):
                    logger.info('look for the batch Id: ' + str(batchId))
                    attachedFile.batch_id_for = batchId
                    try:
                        smb = SmallMoleculeBatch(smallmolecule=sm,
                                                 facility_batch_id=batchId)
                    except ObjectDoesNotExist, e:
                        logger.error(
                            str(('No such SmallMoleculeBatch found',
                                 facilityId, saltId, batchId, e)))
                        raise e
            except ObjectDoesNotExist, e:
                logger.error(
Exemplo n.º 8
0
def main(path):
 
    properties = ('model_field','required','default','converter')
    get_primary_name = lambda x: x.split(';')[0].strip()
    get_alternate_names = (
        lambda x: '; '.join([x.strip() for x in x.split(';')[1:]]))
    
    labels = { s2p.MOLDATAKEY:('molfile',True),
        'facility_reagent_id': (
            'facility_id',True,None, 
            lambda x: util.convertdata(x[x.index('HMSL')+4:],int)), 
        'salt_id': ('salt_id',True,None, lambda x: util.convertdata(x,int)),
        'lincs_id':('lincs_id',False), 
        'chemical_name':('name',True),
        'alternative_names':'alternative_names',
        'pubchem_cid':'pubchem_cid',
        'chembl_id':'chembl_id',
        'chebi_id':'chebi_id',
        'inchi':'_inchi',
        'inchi_key':'_inchi_key',
        'smiles': ('_smiles',False),
        'molecular_mass':(
            '_molecular_mass',False,None, 
            lambda x: round(util.convertdata(x, float),2)),
        'relevant_citations': '_relevant_citations',
        'molecular_formula':'_molecular_formula',
        'software':'software',
        'date_data_received':('date_data_received',False,None,
                              util.date_converter),
        'date_loaded': ('date_loaded',False,None,util.date_converter),
        'date_publicly_available': ('date_publicly_available',False,None,
                                    util.date_converter),
        'date_updated': ('date_updated',False,None,util.date_converter),
        'is_restricted':('is_restricted',False,False,util.bool_converter)
    }
    labels = util.fill_in_column_definitions(properties,labels)
    
    assert typecheck.isstring(path)
    with open(path) as fh:
        data = fh.read().decode(DEFAULT_ENCODING)

    records = s2p.parse_sdf(data)
    logger.info('rows read: %d ', len(records))
    
    count = 0
    for record in records:
        initializer = {}
        for key,properties in labels.items():
            required = properties['required']
            default = properties['default']
            converter = properties['converter']
            model_field = properties['model_field']
            
            value = record.get(key)

            try:
                if(converter != None):
                    value = converter(value)
                if(value == None ):
                    if( default != None ):
                        value = default
                if(value == 'n/a'): value = None
                if(value == None and  required == True):
                    raise Exception(
                        'Field is required: %r, values: %r, row: %d'
                        % (key,initializer,count))
                initializer[model_field] = value
            except Exception, e:
                logger.exception('invalid input, row: %d', count)
                raise e
        # follows is a kludge, to split up the entered "chemical_name" field, 
        # on ';' - TODO: just have two fields that get entered
        if(initializer['name']):
            initializer['alternative_names']=get_alternate_names(initializer['name'])
            initializer['name']=get_primary_name(initializer['name'])
                
        try:
            sm = SmallMolecule(**initializer)
            sm.save()
            count += 1
            
            # create a default batch - 0
            SmallMoleculeBatch.objects.create(reagent=sm,batch_id=0)
            
        except Exception:
            logger.exception('save failed for: %r, row: %d', initializer, count)
            raise