Пример #1
0
 def setUp(self):
     self.tmp_dir = './tmp'
     if not os.path.exists(self.tmp_dir):
         os.mkdir(self.tmp_dir)
     self.path = MethodPath()
     with suppress_stderr():  # silence 'MESS.DB created' message
         self.path._db = MessDB(database='%s/test.db' % self.tmp_dir)
     self.path._graph = DirectedGraph()
Пример #2
0
 def test_setup_path(self):
     self.path.setup_path(1)
     self.assertEquals(self.path._graph._node_count, 1)
     self.assertEquals(self.path._path, [1])
     self.assertEquals(self.path.get_length(), 0)
     self.assert_path_consistency()
     # check that new path isn't added for same method
     new_path = MethodPath()
     new_path._db = self.path._db
     new_path._graph = DirectedGraph()
     new_path._load_graph()
     new_path.setup_path(1)
     self.assertEquals(new_path._graph._node_count, 1)
     self.assertEquals(new_path._path, [1])
     self.assertEquals(new_path.get_length(), 0)
Пример #3
0
 def __init__(self):
     """Set up db, check for attributes, dependencies, and setup."""
     self.db = MessDB()
     self.path = MethodPath()
     self.log_console = Log('console')
     self.log_all = Log('all')
     self.method_name = self.get_method_name()
     try:
         self.parameters
         self.description
         self.geop  # flag indicates method results in new xyz coordinates
         self.prog_name
         self.prog_version
         self.prog_url
     except AttributeError as err:
         print(''.join([str(err), '\n']), file=sys.stderr)
         sys.exit(('Each method class needs to define description, geop, '
                   'prog_name, prog_version, prog_url, '
                   'parameters as attributes.'))
     self.check_dependencies()
Пример #4
0
 def execute(self, args):
     """Match molecules to SMARTS patterns."""
     if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty():
         sys.exit('No input specified.')
     # parse args
     if not (args.smarts or args.fingerprint or args.spectrophore):
         sys.exit('No operations were selected, nothing to match.')
     if sum(bool(arg) for arg in (args.smarts,
                                  args.fingerprint,
                                  args.spectrophore)) > 1:
         sys.exit(('One thing at a time, please. The arguments --smarts, '
                   '--fingerprint, and --spectrophore are mutually '
                   'exclusive.'))
     if args.smarts and args.target:
         self.log_console.warning(('--target ignored, proceeding with '
                                   'SMARTS matching'))
     if args.spectrophore:
         if args.path is None:
             sys.exit(('Spectrophore calculation requires 3D geometry. '
                       'You must specify a 3D geometry with --path.'))
         else:
             path = MethodPath()
             path.set_path(args.path)
             method_dir = path.get_path_directory()
         sp_args = {'normalization': args.spectrophore_normalization,
                    'accuracy': args.spectrophore_accuracy,
                    'stereo': args.spectrophore_stereospecificity,
                    'resolution': args.spectrophore_resolution}
     # load target and target fingerprints
     target_mol = None
     target_fp = None
     target_sp = None
     if args.target:
         if os.path.exists(args.target):
             target_mol = pybel.readfile(args.target.split('.')[-1],
                                         args.target).next()
         else:
             target_mol = pybel.readstring('smi', args.target)
     if target_mol is not None:
         if args.fingerprint:
             target_fp = self.calculate_fingerprint(target_mol,
                                                    args.fingerprint)
         if args.spectrophore:
             target_sp = self.calculate_spectrophore(target_mol, sp_args)
     # match every input
     db = MessDB()
     inchi_query = 'SELECT inchi FROM molecule WHERE inchikey = ?'
     fp_query = ('SELECT fingerprint FROM molecule_fingerprint '
                 'WHERE inchikey = ? AND name = ? '
                 'AND settings = ? AND method_path_id = ?')
     writer = csv.writer(sys.stdout, delimiter=args.delimiter)
     for row in args.inchikeys:
         inchikey = row.split()[0].strip()
         if args.smarts or args.fingerprint:
             inchi = db.execute(inchi_query, (inchikey,)).fetchone()[0]
             mol = pybel.readstring('inchi', 'InChI=%s' % inchi)
         if args.smarts:
             canonical = pybel.ob.OBOp.FindType(b"canonical")
             canonical.Do(mol.OBMol)
             for (smarts_obj,
                  smarts_str) in self.smarts_generator(args.smarts):
                 matches = [match for match in smarts_obj.findall(mol)]
                 if len(matches) > 0:
                     writer.writerow([inchikey, smarts_str] + matches)
         if args.fingerprint:
             try:
                 fp = db.execute(fp_query, (inchikey, args.fingerprint,
                                            '', '')).fetchone()[0]
             except TypeError:
                 fp = self.calculate_fingerprint(mol, args.fingerprint)
             if target_fp is not None:
                 similarity = self.calculate_similarity(target_fp, fp,
                                                        'tanimoto')
                 if similarity > args.cutoff:
                     writer.writerow([inchikey, args.fingerprint,
                                      args.target, similarity])
             else:
                 writer.writerow([inchikey, args.fingerprint] + fp)
         if args.spectrophore:
             try:
                 sp = db.execute(fp_query, (inchikey, 'Spectrophore',
                                            json.dumps(sp_args,
                                                       sort_keys=True),
                                            args.path)).fetchone()[0]
             except TypeError:
                 xyz_file = os.path.join(get_inchikey_dir(inchikey),
                                         method_dir,
                                         '%s.xyz' % inchikey)
                 mol = pybel.readfile('xyz', xyz_file).next()
                 sp = Match.calculate_spectrophore(mol, sp_args)
             if target_sp is not None:
                 try:
                     similarity = self.calculate_similarity(target_sp, sp,
                                                            'cos')
                 except ValueError:
                     similarity = 0
                 if similarity > args.cutoff:
                     writer.writerow([inchikey, 'Spectrophore',
                                      args.target, similarity])
             else:
                 writer.writerow([inchikey, 'Spectrophore'] + sp)
Пример #5
0
class AbstractMethod(object):
    """All methods must inherit from this class.
    
    Attributes:
        db (obj): A MessDB object
        method_name (str): The name of the method
        description (str): Description of method
        geop (bool): Whether the method generates a new geometry
        prog_name (str): Program name
        prog_version (str): Program version
        prog_url (str): Program url
        parameters (dict): Parameters that affect program execution
    """
    parameters = dict()
    shortdesc = None
    method_citation = None
    prog_citation = None
    _inchikey = None
    _path_id = None
    _parent_path_id = None
    _method_dir = None
    _parent_method_dir = None
    
    def __init__(self):
        """Set up db, check for attributes, dependencies, and setup."""
        self.db = MessDB()
        self.path = MethodPath()
        self.log_console = Log('console')
        self.log_all = Log('all')
        self.method_name = self.get_method_name()
        try:
            self.parameters
            self.description
            self.geop  # flag indicates method results in new xyz coordinates
            self.prog_name
            self.prog_version
            self.prog_url
        except AttributeError as err:
            print(''.join([str(err), '\n']), file=sys.stderr)
            sys.exit(('Each method class needs to define description, geop, '
                      'prog_name, prog_version, prog_url, '
                      'parameters as attributes.'))
        self.check_dependencies()
    
    def __hash__(self):
        """Hash based on method name and parameters.
        
        Returns:
            A hex string of the sha1 hash of self.method_name plus
            JSON-serialized self.parameters. Keys are sorted.
        """
        return hashlib.sha1(self.method_name +
                            json.dumps(dict((str(k).lower(),
                                             str(v).lower())
                                            for k, v
                                            in self.parameters.iteritems()),
                                       sort_keys=True)).hexdigest()
    
    @property
    def hash(self):
        """Get hash."""
        return self.__hash__()
    
    @property
    def method_id(self):
        """Get the object's method_id attribute."""
        query = ('SELECT method_id FROM method '
                 'WHERE hash = ?;')
        row = self.db.execute(query, (self.hash,)).fetchone()
        return row.method_id
    
    @property
    def path_id(self):
        """Get the path id of the method."""
        if not self.path.get_method_id() == self.method_id:
            self._setup_path()
        return self._path_id
    
    @property
    def method_dir(self):
        """Get the directory name of the method."""
        if not self.path.get_method_id() == self.method_id:
            self._setup_path()
        return self._method_dir
    
    @property
    def parent_method_dir(self):
        """Get the parent directory name of the method."""
        if not self.path.get_method_id() == self.method_id:
            self._setup_path()
        return self._parent_method_dir
    
    @property
    def inchikey(self):
        """Get inchikey."""
        return self._inchikey
    
    @inchikey.setter
    def inchikey(self, inchikey):
        """Set inchikey, and update inchikey of logger."""
        if inchikey is not None and not is_inchikey(inchikey):
            raise RuntimeError('invalid inchikey: %s' % inchikey)
        self._inchikey = inchikey
        self.log_all.inchikey = inchikey
    
    @classmethod
    def get_method_name(cls):
        """Return the name of the method, derived from the subclass name."""
        return cls.__name__.replace('_', '').lower()
    
    def _setup_path(self):
        """Setup path given current method id and parent path."""
        self.path.setup_path(self.method_id, self._parent_path_id)
        self._path_id = self.path.get_path_id()
        self._method_dir = self.path.get_path_directory()
        self._parent_method_dir = self.path.get_parent_path_directory()
    
    def _insert_method(self):
        """Set insert program to db, set up hash, and insert method to db."""
        total_changes = self.db.total_changes
        query = ('INSERT OR IGNORE INTO method '
                 '(program_id, geop, name, shortdesc, citation, hash) '
                 'SELECT program.program_id, ?, ?, ?, ?, ? '
                 'FROM program '
                 'WHERE program.name=? AND program.version=?')
        self.db.execute(query, (self.geop, self.method_name, self.shortdesc,
                                self.method_citation, self.hash,
                                self.prog_name, self.prog_version))
        if self.db.total_changes - total_changes > 0:
            self.log_all.info('new %s method added to MESS.DB',
                              self.method_name)
    
    def _insert_program(self):
        """Adds row to program table in mess.db."""
        total_changes = self.db.total_changes
        query = ('INSERT OR IGNORE INTO program '
                 '(name, version, url, citation) '
                 'VALUES (?, ?, ?, ?)')
        self.db.execute(query,
                        (self.prog_name, self.prog_version, self.prog_url,
                         self.prog_citation))
        if self.db.total_changes - total_changes > 0:
            self.log_all.info('program %s %s added to MESS.DB',
                              self.prog_name, self.prog_version)
    
    def _insert_parameters(self):
        """Import paramaters dict to mess.db.
        
        Args:
            name: Name of parameter.
            setting: The value the parameter is set to.
        """
        added_parameters = 0
        for name, setting in self.parameters.items():
            query = ('INSERT OR IGNORE INTO parameter (name) VALUES (?)')
            self.db.execute(query, (name, ))
            total_changes = self.db.total_changes
            query = ('INSERT OR IGNORE INTO method_parameter '
                     '(method_id, parameter_id, setting) '
                     'SELECT ?, parameter.parameter_id, ? '
                     'FROM program, parameter '
                     'WHERE parameter.name=?')
            self.db.execute(query, (self.method_id, setting, name))
            added_parameters += (self.db.total_changes - total_changes)
        if added_parameters > 0:
            self.log_all.info('%i method parameters added to MESS.DB',
                              added_parameters)
    
    def get_insert_property_query(self, inchikey, name, description,
                                  format_, value, units=''):
        """Returns query to insert property value to mess.db.
        
        Args:
            inchikey: The inchikey of a molecule in MESS.DB.
            method_path_id: Path id for the calculations that generated the
                            property.
            name: The property name.
            description: A description of the property.
            format_: A description of the format the property is in.
            value: The calculated property.
            units: Units for the property value.
        """
        query = ('INSERT OR IGNORE INTO molecule_method_property_denorm '
                 'VALUES (?, ?, ?, ?, ?, ?, ?);')
        return (query, (inchikey, self.path_id, name, description,
                        format_, units, value))
    
    def get_insert_moldata_queries(self, inchikey, mol,
                                   description='', units=''):
        """Returns queries to insert molecule data values to mess.db."""
        for name, value in mol.data.iteritems():
            yield self.get_insert_property_query(inchikey,
                                                 name,
                                                 description,
                                                 type(value).__name__,
                                                 value,
                                                 units)
    
    def get_timing_query(self, inchikey, start):
        """Get a query to insert execution time property into db."""
        return self.get_insert_property_query(inchikey, 'runtime',
                                              'execution time',
                                              type(start).__name__,
                                              time.time() - start, 's')
    
    def set_parent_path(self, parent_path):
        """Set the parent path (e.g., path to method containing input
        geometry.)"""
        if parent_path > 0:
            self._parent_path_id = parent_path
    
    def has_parent_path(self, inchikey):
        """Returns True if molecule has had entire parent path calculated,
        False otherwise."""
        query = ('SELECT inchikey FROM molecule_method_property WHERE '
                 'inchikey = ? AND method_path_id = ?')
        try:
            self.db.execute(query,
                            (inchikey, self._parent_path_id)).fetchone()[0]
            return True
        except TypeError:
            return False
    
    def check_dependencies(self):
        """If check_dependencies is not implemented, raise error."""
        raise NotImplementedError(("every method needs a 'check_dependencies' "
                                   'method'))
    
    def check(self):
        """If check is not implemented, raise error."""
        # the check method should be called before a calculation (so
        # calculations are not repeated) and after (to verify success)
        raise NotImplementedError("every method needs a 'check' method")
    
    def map(self, inchikey, inchikey_dir):
        """Generally, maps molecule to calculation via method, emits
        query/value pairs.
        """
        raise NotImplementedError(("every method needs a 'map' method"))
    
    def reduce(self, query, values):
        """Run queries/values on the db."""
        total_changes = self.db.total_changes
        if query or values[0]:
            self.db.executemany(query, values)
            self.log_all.info('%i properties added to MESS.DB',
                              self.db.total_changes - total_changes)
            total_changes = self.db.total_changes
    
    def setup(self):
        """Set up method."""
        self._insert_program()
        self._insert_method()
        self._insert_parameters()
Пример #6
0
class TestMethodPath(unittest.TestCase):
    def setUp(self):
        self.tmp_dir = './tmp'
        if not os.path.exists(self.tmp_dir):
            os.mkdir(self.tmp_dir)
        self.path = MethodPath()
        with suppress_stderr():  # silence 'MESS.DB created' message
            self.path._db = MessDB(database='%s/test.db' % self.tmp_dir)
        self.path._graph = DirectedGraph()

    def tearDown(self):
        shutil.rmtree(self.tmp_dir)

    def assert_path_consistency(self):
        self.assertEquals(self.path._path_id, self.path.get_path_id())

    def test_init(self):
        self.assertEquals(self.path._db.tries, 0)
        self.assertEquals(self.path._graph._node_count, 0)
        self.assertEquals(self.path._path, [])
        self.assertIsNone(self.path._path_id)
        self.assert_path_consistency()
    
    def test_load_graph(self):
        insert_query = 'INSERT INTO method_edge VALUES (?, ?, ?)'
        self.path._db.executemany(insert_query, ((1, 1, 1),
                                                 (2, 1, 2),
                                                 (3, 2, 3)))
        self.path._load_graph()
        self.assertEquals(sorted(self.path._graph.get_node_ids()), [1, 2, 3])
        self.assert_path_consistency()

    def test_setup_path(self):
        self.path.setup_path(1)
        self.assertEquals(self.path._graph._node_count, 1)
        self.assertEquals(self.path._path, [1])
        self.assertEquals(self.path.get_length(), 0)
        self.assert_path_consistency()
        # check that new path isn't added for same method
        new_path = MethodPath()
        new_path._db = self.path._db
        new_path._graph = DirectedGraph()
        new_path._load_graph()
        new_path.setup_path(1)
        self.assertEquals(new_path._graph._node_count, 1)
        self.assertEquals(new_path._path, [1])
        self.assertEquals(new_path.get_length(), 0)

        
    def test_extend_path(self):
        self.path.setup_path(42)
        self.path.extend_path(43)
        self.assertEquals(self.path._graph._node_count, 2)
        self.assertEquals(self.path._path, [1, 2])
        self.assertEquals(self.path.get_length(), 1)
        self.path.extend_path(44)
        self.assertEquals(self.path._graph._node_count, 3)
        self.assertEquals(self.path._path, [1, 2, 5])
        self.assertEquals(self.path.get_length(), 2)
        self.path.extend_path(42)
        self.assertEquals(self.path._graph._node_count, 3)
        self.assertEquals(self.path._path, [1, 2, 5, 8])
        self.assertEquals(self.path.get_length(), 3)
        self.assert_path_consistency()
        
    def test_set_path(self):
        self.path.setup_path(42)
        self.path.extend_path(43)
        self.path.extend_path(44)
        self.path.set_path(2)
        self.assertEquals(self.path._graph._node_count, 3)
        self.assertEquals(self.path._path, [1, 2])
        self.assertEquals(self.path.get_length(), 1)
        self.assert_path_consistency()
    
    def test_get_path_id(self):
        self.assertIsNone(self.path.get_path_id())
        self.path.setup_path(42)
        self.path.extend_path(43)
        self.path.extend_path(44)
        self.assertEquals(self.path.get_path_id(), 3)
    
    def test_get_parent_path_id(self):
        self.assertIsNone(self.path.get_parent_path_id())
        self.path.setup_path(42)
        self.path.extend_path(43)
        self.path.extend_path(44)
        self.assertEquals(self.path.get_parent_path_id(), 2)
    
    def test_get_method_id(self):
        self.assertIsNone(self.path.get_method_id())
        self.path.setup_path(42)
        self.assertEquals(self.path.get_method_id(), 42)
        self.path.extend_path(43)
        self.assertEquals(self.path.get_method_id(), 43)
    
    def test_get_parent_method_id(self):
        self.assertIsNone(self.path.get_parent_method_id())
        self.path.setup_path(42)
        self.assertEquals(self.path.get_parent_method_id(), 42)
        self.path.extend_path(43)
        self.assertEquals(self.path.get_parent_method_id(), 42)
        self.path.extend_path(44)
        self.assertEquals(self.path.get_parent_method_id(), 43)
    
    def test_get_superparent_method_id(self):
        self.assertIsNone(self.path.get_superparent_method_id())
        self.path.setup_path(42)
        self.assertEquals(self.path.get_parent_method_id(), 42)
        self.path.extend_path(43)
        self.assertEquals(self.path.get_parent_method_id(), 42)
        self.path.extend_path(44)
        self.assertEquals(self.path.get_superparent_method_id(), 42)
        self.path.extend_path(45)
        self.assertEquals(self.path.get_superparent_method_id(), 43)
    
    def test_get_directory(self):
        self.assertIsNone(self.path._get_directory(1, 2, 3))

    def test_get_path_directory(self):
        self.assertIsNone(self.path.get_path_directory())

    def test_get_parent_path_directory(self):
        self.assertIsNone(self.path.get_parent_path_directory())
Пример #7
0
 def execute(self, args):
     """Match molecules to SMARTS patterns."""
     if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty():
         sys.exit('No input specified.')
     if not (args.cir or args.fingerprint or args.spectrophore):
         sys.exit('You did not request any annotations.')
     if args.spectrophore:
         if args.path is None:
             sys.exit(('Spectrophore calculation requires 3D geometry. '
                       'You must specify a 3D geometry with --path.'))
         else:
             path = MethodPath()
             path.set_path(args.path)
             method_dir = path.get_path_directory()
             sp_args = {'normalization': args.spectrophore_normalization,
                        'accuracy': args.spectrophore_accuracy,
                        'stereo': args.spectrophore_stereospecificity,
                        'resolution': args.spectrophore_resolution}
     self.db = MessDB()
     inchi_select_query = 'SELECT inchi FROM molecule WHERE inchikey = ?'
     fp_select_query = ('SELECT fingerprint FROM molecule_fingerprint '
                        'WHERE inchikey = ? '
                        'AND name = ? '
                        'AND settings = ? '
                        'AND method_path_id = ?')
     fp_insert_query = ('INSERT INTO molecule_fingerprint '
                        '(inchikey, name, settings, '
                        'fingerprint, method_path_id) '
                        'VALUES (?, ?, ?, ?, ?)')
     for row in args.inchikeys:
         self.inchikey = row.split()[0].strip()
         if args.cir:
             self.update_iupac(self.inchikey)
             self.update_synonyms(self.inchikey)
         if args.fingerprint:
             inchi = self.db.execute(inchi_select_query,
                                     (self.inchikey,)).fetchone()[0]
             mol = pybel.readstring('inchi', 'InChI=%s' % inchi)
             canonical = pybel.ob.OBOp.FindType(b'canonical')
             canonical.Do(mol.OBMol)
             fp = Match.calculate_fingerprint(mol, args.fingerprint)
             try:
                 db_fp = self.db.execute(fp_select_query,
                                         (self.inchikey,
                                          args.fingerprint,
                                          '',
                                          '')).fetchone()[0]
                 if not str(fp) == db_fp:
                     self.log_console.warning(('new %s fingerprint '
                                               'for %s did not match '
                                               'fingerprint in db, '
                                               'db not updated'),
                                              args.fingerprint,
                                              self.inchikey)
             except TypeError:
                 self.db.execute(fp_insert_query, (self.inchikey,
                                                   args.fingerprint,
                                                   '',
                                                   str(fp),
                                                   ''))
                 self.log_all.info('%s fingerprint for %s added to db',
                                   args.fingerprint, self.inchikey)
         if args.spectrophore:
             xyz_file = os.path.join(get_inchikey_dir(self.inchikey),
                                     method_dir,
                                     '%s.xyz' % self.inchikey)
             mol = pybel.readfile('xyz', xyz_file).next()
             sp = Match.calculate_spectrophore(mol, sp_args)
             try:
                 db_sp = self.db.execute(fp_select_query,
                                         (self.inchikey,
                                          'Spectrophore',
                                          json.dumps(sp_args,
                                                     sort_keys=True),
                                          args.path)).fetchone()[0]
                 if not str(sp) == db_sp:
                     self.log_console.warning(('new Spectrophore '
                                               'fingerprint for '
                                               '%s did not match '
                                               'fingerprint in db, '
                                               'db not updated'),
                                              self.inchikey)
             except TypeError:
                 json_sp_args = json.dumps(sp_args, sort_keys=True)
                 self.db.execute(fp_insert_query, (self.inchikey,
                                                   'Spectrophore',
                                                   json_sp_args,
                                                   str(sp),
                                                   args.path))
                 self.log_all.info(('Spectrophore fingerprint for %s '
                                    'with parameters %s and '
                                    'geometry from path %i '
                                    'added to db'),
                                   self.inchikey, json_sp_args, args.path)