Exemplo n.º 1
0
 def execute(self, args):
     """Run select query, output table."""
     if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty():
         filter_from = None
     else:
         try:
             filter_from = set(row.split()[0].strip()
                               for row in args.inchikeys)
         except IndexError:
             filter_from = set([])
             return
     if args.query and (args.property_name
                        or args.property_operator
                        or args.property_value):
         sys.exit(('Custom SQL queries are mutually exclusive with '
                   'property filtering.'))
     if (args.part or args.of) and not (args.part and args.of):
         sys.exit(('If you specify a --part n, you must also specify --of '
                   'N (e.g. something like --part 1 --of 5).'))
     if args.part and args.of:
         if args.part > args.of:
             sys.exit('--part must be smaller than --of.')
         if args.part < 1:
             sys.exit('--part must be >=1.')
         alpha = string.ascii_uppercase
         alpha3 = [''.join([a, b, c]) for a in alpha
                                      for b in alpha
                                      for c in alpha]  # AAA to ZZZ
         if args.of > len(alpha3):
             sys.exit(('MESS.DB does not support subsetting into more than '
                       '%i parts.' % len(alpha3)))
         subsets = [alpha3[i::args.of] for i in xrange(args.of)]
         subset = subsets[args.part - 1]
     db = MessDB()
     cur = db.cursor()
     if args.query:
         try:
             cur.execute(codecs.open(args.query, encoding='utf-8').read())
         except sqlite3.OperationalError:
             sys.exit("'%s' does not contain valid sql." % args.query)
         except IOError:
             try:
                 cur.execute(args.query)
             except sqlite3.OperationalError:
                 sys.exit(("'%s' is neither valid sql nor a path "
                           'to a file containing valid sql.') % args.query)
     elif (args.property_name and args.property_operator and
           args.property_value is not None):
         query, values = self.property_query(args.property_name,
                                             args.property_operator,
                                             args.property_value,
                                             args.path)
         cur.execute(query, values)
     else:
         cur.execute('SELECT inchikey FROM molecule')
     # check that sql returns inchikey in first column
     if not cur.description[0][0].lower() == 'inchikey':
         sys.exit('Query must return inchikey in first column.')
     # print table
     writer = csv.writer(sys.stdout, delimiter=args.delimiter)
     if args.headers:
         writer.writerow(list(h[0] for h in cur.description))
     for result in cur:
         if filter_from is not None and result[0] not in filter_from:
             continue
         if args.regex_subset and not re.match(args.regex_subset, result[0],
                                               re.IGNORECASE):
             continue
         if args.part and args.of:
             if not any(result[0].startswith(a) for a in subset):
                 continue
         if args.smarts:
             matches = 0
             query = 'SELECT inchi FROM molecule WHERE inchikey = ?'
             inchi = db.execute(query, (result[0],)).fetchone()[0]
             mol = pybel.readstring('inchi', 'InChI=%s' % inchi)
             for (smarts_obj,
                  smarts_str) in Match.smarts_generator(args.smarts):
                 matches += len(smarts_obj.findall(mol))
             if not matches:
                 continue
         writer.writerow(list(xstr(v).decode('utf-8') for v in result))
     db.close()  # must be closed manually to prevent db locking during pipe
Exemplo n.º 2
0
 def execute(self, args):
     """Match molecules to SMARTS patterns."""
     if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty():
         sys.exit('No input specified.')
     if not (args.cir or args.fingerprint or args.spectrophore):
         sys.exit('You did not request any annotations.')
     if args.spectrophore:
         if args.path is None:
             sys.exit(('Spectrophore calculation requires 3D geometry. '
                       'You must specify a 3D geometry with --path.'))
         else:
             path = MethodPath()
             path.set_path(args.path)
             method_dir = path.get_path_directory()
             sp_args = {'normalization': args.spectrophore_normalization,
                        'accuracy': args.spectrophore_accuracy,
                        'stereo': args.spectrophore_stereospecificity,
                        'resolution': args.spectrophore_resolution}
     self.db = MessDB()
     inchi_select_query = 'SELECT inchi FROM molecule WHERE inchikey = ?'
     fp_select_query = ('SELECT fingerprint FROM molecule_fingerprint '
                        'WHERE inchikey = ? '
                        'AND name = ? '
                        'AND settings = ? '
                        'AND method_path_id = ?')
     fp_insert_query = ('INSERT INTO molecule_fingerprint '
                        '(inchikey, name, settings, '
                        'fingerprint, method_path_id) '
                        'VALUES (?, ?, ?, ?, ?)')
     for row in args.inchikeys:
         self.inchikey = row.split()[0].strip()
         if args.cir:
             self.update_iupac(self.inchikey)
             self.update_synonyms(self.inchikey)
         if args.fingerprint:
             inchi = self.db.execute(inchi_select_query,
                                     (self.inchikey,)).fetchone()[0]
             mol = pybel.readstring('inchi', 'InChI=%s' % inchi)
             canonical = pybel.ob.OBOp.FindType(b'canonical')
             canonical.Do(mol.OBMol)
             fp = Match.calculate_fingerprint(mol, args.fingerprint)
             try:
                 db_fp = self.db.execute(fp_select_query,
                                         (self.inchikey,
                                          args.fingerprint,
                                          '',
                                          '')).fetchone()[0]
                 if not str(fp) == db_fp:
                     self.log_console.warning(('new %s fingerprint '
                                               'for %s did not match '
                                               'fingerprint in db, '
                                               'db not updated'),
                                              args.fingerprint,
                                              self.inchikey)
             except TypeError:
                 self.db.execute(fp_insert_query, (self.inchikey,
                                                   args.fingerprint,
                                                   '',
                                                   str(fp),
                                                   ''))
                 self.log_all.info('%s fingerprint for %s added to db',
                                   args.fingerprint, self.inchikey)
         if args.spectrophore:
             xyz_file = os.path.join(get_inchikey_dir(self.inchikey),
                                     method_dir,
                                     '%s.xyz' % self.inchikey)
             mol = pybel.readfile('xyz', xyz_file).next()
             sp = Match.calculate_spectrophore(mol, sp_args)
             try:
                 db_sp = self.db.execute(fp_select_query,
                                         (self.inchikey,
                                          'Spectrophore',
                                          json.dumps(sp_args,
                                                     sort_keys=True),
                                          args.path)).fetchone()[0]
                 if not str(sp) == db_sp:
                     self.log_console.warning(('new Spectrophore '
                                               'fingerprint for '
                                               '%s did not match '
                                               'fingerprint in db, '
                                               'db not updated'),
                                              self.inchikey)
             except TypeError:
                 json_sp_args = json.dumps(sp_args, sort_keys=True)
                 self.db.execute(fp_insert_query, (self.inchikey,
                                                   'Spectrophore',
                                                   json_sp_args,
                                                   str(sp),
                                                   args.path))
                 self.log_all.info(('Spectrophore fingerprint for %s '
                                    'with parameters %s and '
                                    'geometry from path %i '
                                    'added to db'),
                                   self.inchikey, json_sp_args, args.path)