Exemplo n.º 1
0
 def map(self, mol, source):
     """Import molecule into MESS.DB."""
     # setup local variables
     self.inchikey = mol.write('inchikey').rstrip()
     inchikey_dir = get_inchikey_dir(self.inchikey)
     inchikey_basename = os.path.join(inchikey_dir, self.inchikey)
     identifier = unicode(mol.title, 'utf-8', 'replace')
     # setup directory
     setup_dir(inchikey_dir)
     if not self.check():
         mol.title = b''
         mol.write('inchi',
                   (inchikey_basename + '.inchi'),
                   overwrite=True)
         if not os.path.exists(inchikey_basename + '.png'):
             mol.write('_png2',
                       (inchikey_basename + '.png'))
         touch(inchikey_basename + '.log')
         touch(inchikey_basename + '.notes')
         touch(os.path.join(inchikey_dir,
                            '%s.sources.tsv' % inchikey_basename))
         self.log_all.info('%s molecule directory initialized',
                           self.inchikey)
     source.update_source_tsv(self.inchikey, identifier)
     yield source.update_molecule_source_query(self.inchikey, identifier)
     yield self.insert_molecule_query(self.inchikey, mol)
     for query, values in self.get_insert_moldata_queries(
             self.inchikey,
             mol,
             description='molecule data from %s input' % source.dirname):
         yield query, values
     for query, values in self.get_openbabel_property_queries(self.inchikey,
                                                              mol):
         yield query, values
Exemplo n.º 2
0
Arquivo: log.py Projeto: vamin/MESS.DB
 def _to_all(self, inchikey=None):
     """Log to console, central log, and molecule log. Use to report things
     that change the database."""
     if inchikey is not None and not is_inchikey(inchikey):
         sys.exit('invalid inchikey passed to logger')
     logger = logging.getLogger('mess')
     for handler in logger.handlers:
         try:
             if ('molecules/' in handler.baseFilename
                     or '/dev/null' in handler.baseFilename):
                 if inchikey is not None:
                     logger.removeHandler(handler)
                     mol_log = '%s/%s.log' % (get_inchikey_dir(inchikey),
                                              inchikey)
                     logger.addHandler(logging.FileHandler(mol_log))
                     break
                 elif '/dev/null' not in handler.baseFilename:
                     logger.removeHandler(handler)
                     logger.addHandler(logging.FileHandler('/dev/null'))
                     break
         except AttributeError:
             continue
     if self.context is not None:
         return logging.getLogger('mess.%s' % self.context.lower())
     else:
         return logging.getLogger('mess')
Exemplo n.º 3
0
 def execute(self, args):
     """Remove specified elements."""
     db = MessDB()
     cur = db.cursor()
     for row in args.inchikeys:
         inchikey = row.split()[0].strip()
         try:
             inchikey_dir = get_inchikey_dir(inchikey)
             shutil.rmtree(inchikey_dir)
             self.log_all.info('%s dir removed', inchikey)
         except OSError:
             self.log_console.info('%s did not have a directory', inchikey)
         try:
             parent = os.path.relpath(os.path.join(inchikey_dir, '../'))
             os.removedirs(parent)
         except OSError:
             pass
         records = 0
         query = 'DELETE from molecule WHERE inchikey=?'
         cur.execute(query, (inchikey,))
         records += cur.rowcount
         query = 'DELETE from molecule_synonym WHERE inchikey=?'
         cur.execute(query, (inchikey,))
         records += cur.rowcount
         query = 'DELETE from molecule_source WHERE inchikey=?'
         cur.execute(query, (inchikey,))
         records += cur.rowcount
         query = ('DELETE from molecule_state_method_property '
                  'WHERE inchikey=?')
         cur.execute(query, (inchikey,))
         records += cur.rowcount
         db.commit()
         self.log_all.info('%i %s records removed from db',
                           records, inchikey)
Exemplo n.º 4
0
 def map(self, mol, source):
     """Import molecule into MESS.DB."""
     self.inchikey = mol.write('inchikey').rstrip()
     if not self.check():
         inchikey_dir = get_inchikey_dir(self.inchikey)
         setup_dir(os.path.join(inchikey_dir, self.method_dir))
         mol.write('xyz',
                   os.path.join(inchikey_dir,
                                self.method_dir,
                                '%s.xyz' % self.inchikey),
                   overwrite=True)
         self.log_all.info('%s 3D structure from %s added',
                           self.inchikey, source.dirname)
Exemplo n.º 5
0
 def mapreduce_local(self, inchikeys, method):
     """Run a method's map and reduce functions locally."""
     keys = {}
     for inchikey in inchikeys:
         if not is_inchikey(inchikey, enforce_standard=True):
             sys.exit('%s is not a valid InChIKey.' % inchikey)
         for key, values in method.map(inchikey,
                                       get_inchikey_dir(inchikey)):
             try:
                 keys[key].append(values)
             except KeyError:
                 keys[key] = [values]
     for key, values in keys.iteritems():
         method.reduce(key, values)
Exemplo n.º 6
0
 def check(self):
     inchikey_dir = get_inchikey_dir(self.inchikey)
     try:
         mol = pybel.readfile('xyz',
                              os.path.join(inchikey_dir,
                                           self.method_dir,
                                           '%s.xyz' % self.inchikey)).next()
     except IOError:
         return False
     decorate(mol, UnicodeDecorator)
     if not mol.write('inchikey').rstrip() == self.inchikey:
         self.log_console.warning('inconsistent 3D geometry in %s (%s)',
                                  self.inchikey, self.method_dir)
         return False
     return True
Exemplo n.º 7
0
 def mapreduce_server(self, inchikeys, method):
     """Start a mapreduce server."""
     self.log_console.info('hostname is %s' % gethostname())
     datasource = {}
     for inchikey in inchikeys:
         if not is_inchikey(inchikey, enforce_standard=True):
             sys.exit('%s is not a valid InChIKey.' % inchikey)
         datasource[inchikey] = get_inchikey_dir(inchikey)
     server = mapreduce.Server()
     server.datasource = datasource
     server.password = method.hash
     hostfile = os.path.join(os.path.dirname(__file__),
                             '../../temp/%s.host' % server.password)
     with open(hostfile, 'w') as f:
         f.write(gethostname())
     server.run()
     self.log_console.info('all mappers and reducers have finished')
Exemplo n.º 8
0
 def update_source_tsv(self, inchikey, identifier):
     """Update the sources.tsv file.
     
     Args:
         inchikey_dir: Dir to a molecule in the molecules dir.
         identifier: A source identifier (usually a catalog number).
     
     """
     inchikey_dir = get_inchikey_dir(inchikey)
     name = self.name.encode('ascii', 'replace')
     dirname = self.dirname.encode('ascii', 'replace')
     identifier = identifier.encode('ascii', 'replace')
     sources_tsv = os.path.join(inchikey_dir, '%s.sources.tsv' % inchikey)
     with codecs.open(sources_tsv, 'r', 'ascii') as sources_in:
         with codecs.open(sources_tsv, 'a', 'ascii') as sources_out:
             sources_in = csv.reader(sources_in, delimiter=b'\t')
             sources_out = csv.writer(sources_out, delimiter=b'\t')
             # check if source has been recorded
             source_present = False
             for row in sources_in:
                 try:
                     if row[1] == dirname and row[2] == identifier:
                         source_present = True
                 except IndexError:
                     pass
             if not source_present:
                 if self.url_template:
                     url_split = re.split(r"\[|\]", self.url_template)
                     (match, replace) = re.split(r",\s?", url_split[1])
                     url_identifier = re.sub(match, replace, identifier)
                     source_url = url_split[0] + url_identifier
                     if 2 < len(url_split):
                         source_url += url_split[2]
                 else:
                     source_url = ''
                 sources_out.writerow([name, dirname, identifier,
                                       source_url.encode('ascii',
                                                         'replace')])
                 self.log.inchikey = inchikey
                 self.log.info('%s added to %s sources', name, inchikey)
                 self.log.inchikey = None
Exemplo n.º 9
0
 def check(self):
     """Check that a valid molecule folder was created and that there is
     a matching molecule in the database.
     
     Args:
         inchikey: The valid InChIKey for the molecule.
         inchikey_dir: The full path to the molecule's dir.
     
     Returns:
         True if everything is fine, False otherwise.
     """
     inchikey_dir = get_inchikey_dir(self.inchikey)
     inchi = os.path.join(inchikey_dir, '%s.inchi' % self.inchikey)
     log = os.path.join(inchikey_dir, '%s.log' % self.inchikey)
     notes = os.path.join(inchikey_dir, '%s.notes' % self.inchikey)
     png = os.path.join(inchikey_dir, '%s.png' % self.inchikey)
     sources = os.path.join(inchikey_dir, '%s.sources.tsv' % self.inchikey)
     try:
         with codecs.open(inchi, encoding='utf-8') as file_:
             inchi_str = file_.readline().split('=')[1].strip()
             query = 'SELECT inchikey FROM molecule WHERE inchi=?'
             row = self.db.execute(query, (inchi_str,)).fetchone()
             try:
                 if row.inchikey != self.inchikey:
                     return False
             except AttributeError:
                 return False
         with codecs.open(log, encoding='utf-8'):
             pass
         with codecs.open(notes, encoding='utf-8'):
             pass
         with codecs.open(png, encoding='utf-8'):
             pass
         with codecs.open(sources, encoding='utf-8'):
             pass
         return True
     except IOError:
         return False
Exemplo n.º 10
0
 def execute(self, args):
     """Match molecules to SMARTS patterns."""
     if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty():
         sys.exit('No input specified.')
     # parse args
     if not (args.smarts or args.fingerprint or args.spectrophore):
         sys.exit('No operations were selected, nothing to match.')
     if sum(bool(arg) for arg in (args.smarts,
                                  args.fingerprint,
                                  args.spectrophore)) > 1:
         sys.exit(('One thing at a time, please. The arguments --smarts, '
                   '--fingerprint, and --spectrophore are mutually '
                   'exclusive.'))
     if args.smarts and args.target:
         self.log_console.warning(('--target ignored, proceeding with '
                                   'SMARTS matching'))
     if args.spectrophore:
         if args.path is None:
             sys.exit(('Spectrophore calculation requires 3D geometry. '
                       'You must specify a 3D geometry with --path.'))
         else:
             path = MethodPath()
             path.set_path(args.path)
             method_dir = path.get_path_directory()
         sp_args = {'normalization': args.spectrophore_normalization,
                    'accuracy': args.spectrophore_accuracy,
                    'stereo': args.spectrophore_stereospecificity,
                    'resolution': args.spectrophore_resolution}
     # load target and target fingerprints
     target_mol = None
     target_fp = None
     target_sp = None
     if args.target:
         if os.path.exists(args.target):
             target_mol = pybel.readfile(args.target.split('.')[-1],
                                         args.target).next()
         else:
             target_mol = pybel.readstring('smi', args.target)
     if target_mol is not None:
         if args.fingerprint:
             target_fp = self.calculate_fingerprint(target_mol,
                                                    args.fingerprint)
         if args.spectrophore:
             target_sp = self.calculate_spectrophore(target_mol, sp_args)
     # match every input
     db = MessDB()
     inchi_query = 'SELECT inchi FROM molecule WHERE inchikey = ?'
     fp_query = ('SELECT fingerprint FROM molecule_fingerprint '
                 'WHERE inchikey = ? AND name = ? '
                 'AND settings = ? AND method_path_id = ?')
     writer = csv.writer(sys.stdout, delimiter=args.delimiter)
     for row in args.inchikeys:
         inchikey = row.split()[0].strip()
         if args.smarts or args.fingerprint:
             inchi = db.execute(inchi_query, (inchikey,)).fetchone()[0]
             mol = pybel.readstring('inchi', 'InChI=%s' % inchi)
         if args.smarts:
             canonical = pybel.ob.OBOp.FindType(b"canonical")
             canonical.Do(mol.OBMol)
             for (smarts_obj,
                  smarts_str) in self.smarts_generator(args.smarts):
                 matches = [match for match in smarts_obj.findall(mol)]
                 if len(matches) > 0:
                     writer.writerow([inchikey, smarts_str] + matches)
         if args.fingerprint:
             try:
                 fp = db.execute(fp_query, (inchikey, args.fingerprint,
                                            '', '')).fetchone()[0]
             except TypeError:
                 fp = self.calculate_fingerprint(mol, args.fingerprint)
             if target_fp is not None:
                 similarity = self.calculate_similarity(target_fp, fp,
                                                        'tanimoto')
                 if similarity > args.cutoff:
                     writer.writerow([inchikey, args.fingerprint,
                                      args.target, similarity])
             else:
                 writer.writerow([inchikey, args.fingerprint] + fp)
         if args.spectrophore:
             try:
                 sp = db.execute(fp_query, (inchikey, 'Spectrophore',
                                            json.dumps(sp_args,
                                                       sort_keys=True),
                                            args.path)).fetchone()[0]
             except TypeError:
                 xyz_file = os.path.join(get_inchikey_dir(inchikey),
                                         method_dir,
                                         '%s.xyz' % inchikey)
                 mol = pybel.readfile('xyz', xyz_file).next()
                 sp = Match.calculate_spectrophore(mol, sp_args)
             if target_sp is not None:
                 try:
                     similarity = self.calculate_similarity(target_sp, sp,
                                                            'cos')
                 except ValueError:
                     similarity = 0
                 if similarity > args.cutoff:
                     writer.writerow([inchikey, 'Spectrophore',
                                      args.target, similarity])
             else:
                 writer.writerow([inchikey, 'Spectrophore'] + sp)
Exemplo n.º 11
0
 def test_get_inchikey_dir(self):
     molecules_dir = os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                  '../../molecules/'))
     test_dir = os.path.join(molecules_dir, 'B/QJ/CRHHNABKAKU-KBQPJGBKSA-N')
     self.assertEqual(utils.get_inchikey_dir('BQJCRHHNABKAKU-KBQPJGBKSA-N'),
                      test_dir)
Exemplo n.º 12
0
 def execute(self, args):
     """Match molecules to SMARTS patterns."""
     if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty():
         sys.exit('No input specified.')
     if not (args.cir or args.fingerprint or args.spectrophore):
         sys.exit('You did not request any annotations.')
     if args.spectrophore:
         if args.path is None:
             sys.exit(('Spectrophore calculation requires 3D geometry. '
                       'You must specify a 3D geometry with --path.'))
         else:
             path = MethodPath()
             path.set_path(args.path)
             method_dir = path.get_path_directory()
             sp_args = {'normalization': args.spectrophore_normalization,
                        'accuracy': args.spectrophore_accuracy,
                        'stereo': args.spectrophore_stereospecificity,
                        'resolution': args.spectrophore_resolution}
     self.db = MessDB()
     inchi_select_query = 'SELECT inchi FROM molecule WHERE inchikey = ?'
     fp_select_query = ('SELECT fingerprint FROM molecule_fingerprint '
                        'WHERE inchikey = ? '
                        'AND name = ? '
                        'AND settings = ? '
                        'AND method_path_id = ?')
     fp_insert_query = ('INSERT INTO molecule_fingerprint '
                        '(inchikey, name, settings, '
                        'fingerprint, method_path_id) '
                        'VALUES (?, ?, ?, ?, ?)')
     for row in args.inchikeys:
         self.inchikey = row.split()[0].strip()
         if args.cir:
             self.update_iupac(self.inchikey)
             self.update_synonyms(self.inchikey)
         if args.fingerprint:
             inchi = self.db.execute(inchi_select_query,
                                     (self.inchikey,)).fetchone()[0]
             mol = pybel.readstring('inchi', 'InChI=%s' % inchi)
             canonical = pybel.ob.OBOp.FindType(b'canonical')
             canonical.Do(mol.OBMol)
             fp = Match.calculate_fingerprint(mol, args.fingerprint)
             try:
                 db_fp = self.db.execute(fp_select_query,
                                         (self.inchikey,
                                          args.fingerprint,
                                          '',
                                          '')).fetchone()[0]
                 if not str(fp) == db_fp:
                     self.log_console.warning(('new %s fingerprint '
                                               'for %s did not match '
                                               'fingerprint in db, '
                                               'db not updated'),
                                              args.fingerprint,
                                              self.inchikey)
             except TypeError:
                 self.db.execute(fp_insert_query, (self.inchikey,
                                                   args.fingerprint,
                                                   '',
                                                   str(fp),
                                                   ''))
                 self.log_all.info('%s fingerprint for %s added to db',
                                   args.fingerprint, self.inchikey)
         if args.spectrophore:
             xyz_file = os.path.join(get_inchikey_dir(self.inchikey),
                                     method_dir,
                                     '%s.xyz' % self.inchikey)
             mol = pybel.readfile('xyz', xyz_file).next()
             sp = Match.calculate_spectrophore(mol, sp_args)
             try:
                 db_sp = self.db.execute(fp_select_query,
                                         (self.inchikey,
                                          'Spectrophore',
                                          json.dumps(sp_args,
                                                     sort_keys=True),
                                          args.path)).fetchone()[0]
                 if not str(sp) == db_sp:
                     self.log_console.warning(('new Spectrophore '
                                               'fingerprint for '
                                               '%s did not match '
                                               'fingerprint in db, '
                                               'db not updated'),
                                              self.inchikey)
             except TypeError:
                 json_sp_args = json.dumps(sp_args, sort_keys=True)
                 self.db.execute(fp_insert_query, (self.inchikey,
                                                   'Spectrophore',
                                                   json_sp_args,
                                                   str(sp),
                                                   args.path))
                 self.log_all.info(('Spectrophore fingerprint for %s '
                                    'with parameters %s and '
                                    'geometry from path %i '
                                    'added to db'),
                                   self.inchikey, json_sp_args, args.path)