def _to_all(self, inchikey=None): """Log to console, central log, and molecule log. Use to report things that change the database.""" if inchikey is not None and not is_inchikey(inchikey): sys.exit('invalid inchikey passed to logger') logger = logging.getLogger('mess') for handler in logger.handlers: try: if ('molecules/' in handler.baseFilename or '/dev/null' in handler.baseFilename): if inchikey is not None: logger.removeHandler(handler) mol_log = '%s/%s.log' % (get_inchikey_dir(inchikey), inchikey) logger.addHandler(logging.FileHandler(mol_log)) break elif '/dev/null' not in handler.baseFilename: logger.removeHandler(handler) logger.addHandler(logging.FileHandler('/dev/null')) break except AttributeError: continue if self.context is not None: return logging.getLogger('mess.%s' % self.context.lower()) else: return logging.getLogger('mess')
def mapreduce_local(self, inchikeys, method): """Run a method's map and reduce functions locally.""" keys = {} for inchikey in inchikeys: if not is_inchikey(inchikey, enforce_standard=True): sys.exit('%s is not a valid InChIKey.' % inchikey) for key, values in method.map(inchikey, get_inchikey_dir(inchikey)): try: keys[key].append(values) except KeyError: keys[key] = [values] for key, values in keys.iteritems(): method.reduce(key, values)
def execute(self, args): """Run self checks.""" self.db = MessDB() cur = self.db.execute('SELECT inchikey FROM molecule') self.db_inchikeys = set() # check that inchikeys are all valid for result in cur: if is_inchikey(result.inchikey, enforce_standard=True): self.db_inchikeys.add(result.inchikey) else: self.log_console.warning(('%s is not a valid standard ' 'InChiKey!'), result.inchikey) self.check_dir_structure() self.check_db_structure() self.check_db_dir_inchikey_concordance() self.summary()
def mapreduce_server(self, inchikeys, method): """Start a mapreduce server.""" self.log_console.info('hostname is %s' % gethostname()) datasource = {} for inchikey in inchikeys: if not is_inchikey(inchikey, enforce_standard=True): sys.exit('%s is not a valid InChIKey.' % inchikey) datasource[inchikey] = get_inchikey_dir(inchikey) server = mapreduce.Server() server.datasource = datasource server.password = method.hash hostfile = os.path.join(os.path.dirname(__file__), '../../temp/%s.host' % server.password) with open(hostfile, 'w') as f: f.write(gethostname()) server.run() self.log_console.info('all mappers and reducers have finished')
def check_dir_structure(self): """Check that the structure of the molecules dir is consistent.""" moldir = os.path.join(os.path.dirname(__file__), '../../molecules') for l in os.listdir(moldir): lp = os.path.join(moldir, l) if not os.path.isdir(lp): if 'README' not in l and not l.startswith('.'): self.log_console.warning(('Unexpected file in ' 'molecules dir: %s'), l) continue if not len(l) == 1: self.log_console.warning(('Unexpected dir in ' 'molecules dir: %s'), l) continue for ll in os.listdir(lp): llp = os.path.join(moldir, l, ll) if not os.path.isdir(llp): self.log_console.warning(('Unexpected file ' 'in molecules dir: ' '%s/%s'), l, ll) continue if not (len(ll) == 2 and ll.isalpha()): self.log_console.warning(('Unexpected dir ' 'in molecules dir: ' '%s/%s'), l, ll) continue for lll in os.listdir(llp): lllp = os.path.join(moldir, l, ll, lll) if not os.path.isdir(lllp): self.log_console.warning(('Unexpected file ' 'in molecules dir: ' '%s/%s/%s'), l, ll, lll) continue if not is_inchikey(l + ll + lll, enforce_standard=True): self.log_console.warning(('Unexpected dir ' 'in molecules dir: ' '%s/%s/%s'), l, ll, lll) continue self.check_molecule_dir(l + ll + lll, lllp)
def inchikey(self, inchikey): """Set inchikey, and update inchikey of logger.""" if inchikey is not None and not is_inchikey(inchikey): raise RuntimeError('invalid inchikey: %s' % inchikey) self._inchikey = inchikey self.log_all.inchikey = inchikey
def execute(self, args): """Run import method for every molecule in source.""" source = Source() source.setup(args.source) self.log_console.info('reading molecules') molecules = OrderedDict() # not required, but useful for debugging # crashing imports threedee = False pybel.ob.obErrorLog.SetOutputLevel(-1) for source_file in source.files(): for mol in pybel.readfile(source_file.split('.')[-1], os.path.join(source.source_dir, source_file)): if not threedee and mol.dim == 3: threedee = True try: decorate(mol, UnicodeDecorator) except IndexError: self.log_console.error('Unexpected error importing %s.', mol.title) continue inchikey = mol.write('inchikey').rstrip() if not is_inchikey(inchikey): self.log_console.info( ("'%s' is not an importable molecule."), mol.title) continue molecules[inchikey] = (mol, source) if not args.skip_fragments: cansmi = mol.write('can').split()[0] if cansmi.count('.') > 0: for fragment in cansmi.split('.'): fragmol = pybel.readstring('can', fragment) decorate(fragmol, UnicodeDecorator) inchikey = fragmol.write('inchikey').rstrip() if not is_inchikey(inchikey): self.log_console.info( ("'%s' fragment in %s " "is not an importable molecule."), fragment, mol.title) else: fragmol.title = mol.title molecules[inchikey] = (fragmol, source) import0d = Import0D() import0d.setup() if threedee: import3d = Import3D() import3d.shortdesc = source.dirname import3d.setup() self.log_console.info('setting up molecule dirs') queries = {} for inchikey, (mol, source) in molecules.iteritems(): for query, values in import0d.map(mol, source): try: queries[query].append(values) except KeyError: queries[query] = [values] if mol.dim == 3: import3d.map(mol, source) self.log_console.info('loading simple properties') for query, values in queries.iteritems(): import0d.reduce(query, values)
def test_is_inchikey(self): self.assertTrue(utils.is_inchikey('BQJCRHHNABKAKU-KBQPJGBKSA-N')) self.assertTrue(utils.is_inchikey('ADVPTQAUNPRNPO-UHFFFAOYSA-N', True)) self.assertFalse(utils.is_inchikey('not an inchikey'))