Exemplo n.º 1
0
  def _featurize_complexes(self, df, featurizer, parallel=True,
                           worker_pool=None):
    """Generates circular fingerprints for dataset."""
    protein_pdbs = list(df["protein_pdb"])
    ligand_pdbs = list(df["ligand_pdb"])
    complexes = zip(ligand_pdbs, protein_pdbs)

    def featurize_wrapper(ligand_protein_pdb_tuple):
      ligand_pdb, protein_pdb = ligand_protein_pdb_tuple
      print("Featurizing %s" % ligand_pdb[0:2])
      molecule_features = featurizer.featurize_complexes([ligand_pdb], [protein_pdb])
      return molecule_features

    if worker_pool is None:
      features = []
      for ligand_protein_pdb_tuple in zip(ligand_pdbs, protein_pdbs):
        features.append(featurize_wrapper(ligand_protein_pdb_tuple))
    else:
      if worker_pool is None:
        worker_pool = ProcessingPool(mp.cpu_count())
        features = worker_pool.map(featurize_wrapper, 
                                   zip(ligand_pdbs, protein_pdbs))
      else:
        features = worker_pool.map_sync(featurize_wrapper, 
                                        zip(ligand_pdbs, protein_pdbs))
      #features = featurize_wrapper(zip(ligand_pdbs, protein_pdbs))
    df[featurizer.__class__.__name__] = list(features)
Exemplo n.º 2
0
    def _featurize_complexes(self,
                             df,
                             featurizer,
                             parallel=True,
                             worker_pool=None):
        """Generates circular fingerprints for dataset."""
        protein_pdbs = list(df["protein_pdb"])
        ligand_pdbs = list(df["ligand_pdb"])
        complexes = zip(ligand_pdbs, protein_pdbs)

        def featurize_wrapper(ligand_protein_pdb_tuple):
            ligand_pdb, protein_pdb = ligand_protein_pdb_tuple
            print("Featurizing %s" % ligand_pdb[0:2])
            molecule_features = featurizer.featurize_complexes([ligand_pdb],
                                                               [protein_pdb])
            return molecule_features

        if worker_pool is None:
            features = []
            for ligand_protein_pdb_tuple in zip(ligand_pdbs, protein_pdbs):
                features.append(featurize_wrapper(ligand_protein_pdb_tuple))
        else:
            if worker_pool is None:
                worker_pool = ProcessingPool(mp.cpu_count())
                features = worker_pool.map(featurize_wrapper,
                                           zip(ligand_pdbs, protein_pdbs))
            else:
                features = worker_pool.map_sync(featurize_wrapper,
                                                zip(ligand_pdbs, protein_pdbs))
            #features = featurize_wrapper(zip(ligand_pdbs, protein_pdbs))
        df[featurizer.__class__.__name__] = list(features)
Exemplo n.º 3
0
    def _featurize_compounds(self,
                             df,
                             featurizer,
                             parallel=True,
                             worker_pool=None):
        """Featurize individual compounds.

       Given a featurizer that operates on individual chemical compounds 
       or macromolecules, compute & add features for that compound to the 
       features dataframe
    """
        sample_smiles = df["smiles"].tolist()

        if worker_pool is None:
            features = []
            for ind, smiles in enumerate(sample_smiles):
                if ind % self.log_every_n == 0:
                    log("Featurizing sample %d" % ind, self.verbose)
                mol = Chem.MolFromSmiles(smiles)
                features.append(featurizer.featurize([mol]))
        else:

            def featurize_wrapper(smiles, dilled_featurizer):
                print("Featurizing %s" % smiles)
                mol = Chem.MolFromSmiles(smiles)
                featurizer = dill.loads(dilled_featurizer)
                feature = featurizer.featurize([mol])
                return feature

            if worker_pool is None:
                dilled_featurizer = dill.dumps(featurizer)
                worker_pool = ProcessingPool(mp.cpu_count())
                featurize_wrapper_partial = partial(
                    featurize_wrapper, dilled_featurizer=dilled_featurizer)
                features = []
                for smiles in sample_smiles:
                    features.append(featurize_wrapper_partial(smiles))
            else:
                features = worker_pool.map_sync(featurize_wrapper,
                                                sample_smiles)

        df[featurizer.__class__.__name__] = features
Exemplo n.º 4
0
  def _featurize_compounds(self, df, featurizer, parallel=True,
                           worker_pool=None):    
    """Featurize individual compounds.

       Given a featurizer that operates on individual chemical compounds 
       or macromolecules, compute & add features for that compound to the 
       features dataframe
    """
    sample_smiles = df["smiles"].tolist()

    if worker_pool is None:
      features = []
      for ind, smiles in enumerate(sample_smiles):
        if ind % self.log_every_n == 0:
          log("Featurizing sample %d" % ind, self.verbose)
        mol = Chem.MolFromSmiles(smiles)
        features.append(featurizer.featurize([mol]))
    else:
      def featurize_wrapper(smiles, dilled_featurizer):
        print("Featurizing %s" % smiles)
        mol = Chem.MolFromSmiles(smiles)
        featurizer = dill.loads(dilled_featurizer)
        feature = featurizer.featurize([mol])
        return feature

      if worker_pool is None:
        dilled_featurizer = dill.dumps(featurizer)
        worker_pool = ProcessingPool(mp.cpu_count())
        featurize_wrapper_partial = partial(featurize_wrapper,
                                            dilled_featurizer=dilled_featurizer)
        features = []
        for smiles in sample_smiles:
          features.append(featurize_wrapper_partial(smiles))
      else:
        features = worker_pool.map_sync(featurize_wrapper, 
                                        sample_smiles)

    df[featurizer.__class__.__name__] = features