def execute(self, args): """Run select query, output table.""" if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty(): filter_from = None else: try: filter_from = set(row.split()[0].strip() for row in args.inchikeys) except IndexError: filter_from = set([]) return if args.query and (args.property_name or args.property_operator or args.property_value): sys.exit(('Custom SQL queries are mutually exclusive with ' 'property filtering.')) if (args.part or args.of) and not (args.part and args.of): sys.exit(('If you specify a --part n, you must also specify --of ' 'N (e.g. something like --part 1 --of 5).')) if args.part and args.of: if args.part > args.of: sys.exit('--part must be smaller than --of.') if args.part < 1: sys.exit('--part must be >=1.') alpha = string.ascii_uppercase alpha3 = [''.join([a, b, c]) for a in alpha for b in alpha for c in alpha] # AAA to ZZZ if args.of > len(alpha3): sys.exit(('MESS.DB does not support subsetting into more than ' '%i parts.' % len(alpha3))) subsets = [alpha3[i::args.of] for i in xrange(args.of)] subset = subsets[args.part - 1] db = MessDB() cur = db.cursor() if args.query: try: cur.execute(codecs.open(args.query, encoding='utf-8').read()) except sqlite3.OperationalError: sys.exit("'%s' does not contain valid sql." % args.query) except IOError: try: cur.execute(args.query) except sqlite3.OperationalError: sys.exit(("'%s' is neither valid sql nor a path " 'to a file containing valid sql.') % args.query) elif (args.property_name and args.property_operator and args.property_value is not None): query, values = self.property_query(args.property_name, args.property_operator, args.property_value, args.path) cur.execute(query, values) else: cur.execute('SELECT inchikey FROM molecule') # check that sql returns inchikey in first column if not cur.description[0][0].lower() == 'inchikey': sys.exit('Query must return inchikey in first column.') # print table writer = csv.writer(sys.stdout, delimiter=args.delimiter) if args.headers: writer.writerow(list(h[0] for h in cur.description)) for result in cur: if filter_from is not None and result[0] not in filter_from: continue if args.regex_subset and not re.match(args.regex_subset, result[0], re.IGNORECASE): continue if args.part and args.of: if not any(result[0].startswith(a) for a in subset): continue if args.smarts: matches = 0 query = 'SELECT inchi FROM molecule WHERE inchikey = ?' inchi = db.execute(query, (result[0],)).fetchone()[0] mol = pybel.readstring('inchi', 'InChI=%s' % inchi) for (smarts_obj, smarts_str) in Match.smarts_generator(args.smarts): matches += len(smarts_obj.findall(mol)) if not matches: continue writer.writerow(list(xstr(v).decode('utf-8') for v in result)) db.close() # must be closed manually to prevent db locking during pipe
def execute(self, args): """Match molecules to SMARTS patterns.""" if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty(): sys.exit('No input specified.') if not (args.cir or args.fingerprint or args.spectrophore): sys.exit('You did not request any annotations.') if args.spectrophore: if args.path is None: sys.exit(('Spectrophore calculation requires 3D geometry. ' 'You must specify a 3D geometry with --path.')) else: path = MethodPath() path.set_path(args.path) method_dir = path.get_path_directory() sp_args = {'normalization': args.spectrophore_normalization, 'accuracy': args.spectrophore_accuracy, 'stereo': args.spectrophore_stereospecificity, 'resolution': args.spectrophore_resolution} self.db = MessDB() inchi_select_query = 'SELECT inchi FROM molecule WHERE inchikey = ?' fp_select_query = ('SELECT fingerprint FROM molecule_fingerprint ' 'WHERE inchikey = ? ' 'AND name = ? ' 'AND settings = ? ' 'AND method_path_id = ?') fp_insert_query = ('INSERT INTO molecule_fingerprint ' '(inchikey, name, settings, ' 'fingerprint, method_path_id) ' 'VALUES (?, ?, ?, ?, ?)') for row in args.inchikeys: self.inchikey = row.split()[0].strip() if args.cir: self.update_iupac(self.inchikey) self.update_synonyms(self.inchikey) if args.fingerprint: inchi = self.db.execute(inchi_select_query, (self.inchikey,)).fetchone()[0] mol = pybel.readstring('inchi', 'InChI=%s' % inchi) canonical = pybel.ob.OBOp.FindType(b'canonical') canonical.Do(mol.OBMol) fp = Match.calculate_fingerprint(mol, args.fingerprint) try: db_fp = self.db.execute(fp_select_query, (self.inchikey, args.fingerprint, '', '')).fetchone()[0] if not str(fp) == db_fp: self.log_console.warning(('new %s fingerprint ' 'for %s did not match ' 'fingerprint in db, ' 'db not updated'), args.fingerprint, self.inchikey) except TypeError: self.db.execute(fp_insert_query, (self.inchikey, args.fingerprint, '', str(fp), '')) self.log_all.info('%s fingerprint for %s added to db', args.fingerprint, self.inchikey) if args.spectrophore: xyz_file = os.path.join(get_inchikey_dir(self.inchikey), method_dir, '%s.xyz' % self.inchikey) mol = pybel.readfile('xyz', xyz_file).next() sp = Match.calculate_spectrophore(mol, sp_args) try: db_sp = self.db.execute(fp_select_query, (self.inchikey, 'Spectrophore', json.dumps(sp_args, sort_keys=True), args.path)).fetchone()[0] if not str(sp) == db_sp: self.log_console.warning(('new Spectrophore ' 'fingerprint for ' '%s did not match ' 'fingerprint in db, ' 'db not updated'), self.inchikey) except TypeError: json_sp_args = json.dumps(sp_args, sort_keys=True) self.db.execute(fp_insert_query, (self.inchikey, 'Spectrophore', json_sp_args, str(sp), args.path)) self.log_all.info(('Spectrophore fingerprint for %s ' 'with parameters %s and ' 'geometry from path %i ' 'added to db'), self.inchikey, json_sp_args, args.path)