def make_map(): # representation types morg2 = RepresentationType(name='morg2', representation_func=skchemize(morg, radius=2, nBits=2048), metadata="""Hashed Circular fingerprint generated by the Morgan algorithm, """ """implemented in <a href="http://www.rdkit.org">RDKit</a>. <br/>""" """Parameters used: Radius = 2, Bit length = 2048""") targets = RepresentationType(name='targets', representation_func=PIDGIN(), metadata="""Bayes affinity fingerprint for 1080 human targets, produced """ """using the <a href="https://github.com/lhm30/PIDGIN">PIDGIN (Prediction of targets IncluDinG INactives)</a>""" """Target Prediction algorithm, implemented in <a href="https://github.com/richlewis42/scikit-chem">scikit-chem</a>.""") random = RepresentationType(name='random', representation_func=lambda m: pd.Series(np.random.random(100)), metadata="""Uniformly distributed random feature vector of length 100""" """implemented using <a href="http://www.numpy.org">numpy</a> <a href="http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.random.html#numpy.random.random">random</a> module""") representation_types = [ morg2, targets, random ] # reduction types pca = ReductionMethod(name='PCA', model=PCA(n_components=2), metadata="""<a href="http://en.wikipedia.org/wiki/Principal_component_analysis">Principal component analysis</a> implemented in <a href="http://scikit-learn.org/stable/" target="_blank">scikit-learn</a>\n""" """<br/>Default parameters used.""") mds = ReductionMethod(name='MDS', model=MDS(), metadata= """<a href="http://en.wikipedia.org/wiki/Multidimensional_scaling" target="_blank">Multidimensional Scaling</a> implemented in <a href="http://scikit-learn.org/stable/" target="_blank">scikit-learn</a>""" """<br/>Default parameters used.""") tsne = ReductionMethod(name='t-SNE', model=TSNE(perplexity=1), metadata= """<a href="http://lvdmaaten.github.io/tsne/">Student's t-distributed stochastic neighbour embedding</a>, """ """implemented according to <a href="http://lvdmaaten.github.io/publications/papers/JMLR_2008.pdf">van der Maartin et al. 2008</a>\n""" """<br/>Parameters used: Perplexity = 1, theta=0""") reduction_types = [ pca, mds, tsne ] # activity types pIC20 = ActivityType(name='pIC20', metadata= """negative based-10 logarithm of the <a href="http://en.wikipedia.org/wiki/IC50">IC20</a>, the concentation of""" """compound required for 20% inhibition of growth of Lymphoma cells""") IC20 = ActivityType(name='IC20', metadata= """<a href="http://en.wikipedia.org/wiki/IC50">IC20</a>, the concentation of""" """compound required for 20% inhibition of growth of Lymphoma cells""") activity_types = [ pIC20, IC20 ] # synergy types excessOverBliss = SynergyType(name='ExcessOverBliss', metadata= """Difference in observed vs expected activity of the component compounds,""" """each at the IC20 concentration (when known) assuming the <a href="http://doi.wiley.com/10.1111/j.1744-7348.1939.tb06990.x">Bliss Independence model</a>""") synergy_types = [ excessOverBliss ] # data compound_df = skc.read_smiles(os.path.join(DIRNAME, 'compounds.smiles'), name_column=1, title_line=True) compound_df['pIC20'] = -np.log10(compound_df['IC20']) combination_df = pd.read_csv(os.path.join(DIRNAME, 'combinations.csv')) combination_df.set_index('id', inplace=True) synergy_map = SynergyMap(compound_df=compound_df, combination_df=combination_df, representation_types=representation_types, reduction_types=reduction_types, activity_types=activity_types, synergy_types=synergy_types, metadata='DREAM Drug Combination Challenge Data') return synergy_map
def make_map(): # representation types morg2 = RepresentationType( name='morg2', representation_func=skchemize(morg, radius=2, nBits=2048), metadata= """Hashed Circular fingerprint generated by the Morgan algorithm, """ """implemented in <a href="http://www.rdkit.org">RDKit</a>. <br/>""" """Parameters used: Radius = 2, Bit length = 2048""") targets = RepresentationType( name='targets', representation_func=PIDGIN(), metadata= """Bayes affinity fingerprint for 1080 human targets, produced """ """using the <a href="https://github.com/lhm30/PIDGIN">PIDGIN (Prediction of targets IncluDinG INactives)</a>""" """Target Prediction algorithm, implemented in <a href="https://github.com/richlewis42/scikit-chem">scikit-chem</a>.""" ) random = RepresentationType( name='random', representation_func=lambda m: pd.Series(np.random.random(100)), metadata="""Uniformly distributed random feature vector of length 100""" """implemented using <a href="http://www.numpy.org">numpy</a> <a href="http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.random.html#numpy.random.random">random</a> module""" ) representation_types = [morg2, targets, random] # reduction types pca = ReductionMethod( name='PCA', model=PCA(n_components=2), metadata= """<a href="http://en.wikipedia.org/wiki/Principal_component_analysis">Principal component analysis</a> implemented in <a href="http://scikit-learn.org/stable/" target="_blank">scikit-learn</a>\n""" """<br/>Default parameters used.""") mds = ReductionMethod( name='MDS', model=MDS(), metadata= """<a href="http://en.wikipedia.org/wiki/Multidimensional_scaling" target="_blank">Multidimensional Scaling</a> implemented in <a href="http://scikit-learn.org/stable/" target="_blank">scikit-learn</a>""" """<br/>Default parameters used.""") tsne = ReductionMethod( name='t-SNE', model=TSNE(perplexity=1), metadata= """<a href="http://lvdmaaten.github.io/tsne/">Student's t-distributed stochastic neighbour embedding</a>, """ """implemented according to <a href="http://lvdmaaten.github.io/publications/papers/JMLR_2008.pdf">van der Maartin et al. 2008</a>\n""" """<br/>Parameters used: Perplexity = 1, theta=0""") reduction_types = [pca, mds, tsne] # activity types pIC20 = ActivityType( name='pIC20', metadata= """negative based-10 logarithm of the <a href="http://en.wikipedia.org/wiki/IC50">IC20</a>, the concentation of""" """compound required for 20% inhibition of growth of Lymphoma cells""") IC20 = ActivityType( name='IC20', metadata= """<a href="http://en.wikipedia.org/wiki/IC50">IC20</a>, the concentation of""" """compound required for 20% inhibition of growth of Lymphoma cells""") activity_types = [pIC20, IC20] # synergy types excessOverBliss = SynergyType( name='ExcessOverBliss', metadata= """Difference in observed vs expected activity of the component compounds,""" """each at the IC20 concentration (when known) assuming the <a href="http://doi.wiley.com/10.1111/j.1744-7348.1939.tb06990.x">Bliss Independence model</a>""" ) synergy_types = [excessOverBliss] # data compound_df = skc.read_smiles(os.path.join(DIRNAME, 'compounds.smiles'), name_column=1, title_line=True) compound_df['pIC20'] = -np.log10(compound_df['IC20']) combination_df = pd.read_csv(os.path.join(DIRNAME, 'combinations.csv')) combination_df.set_index('id', inplace=True) synergy_map = SynergyMap(compound_df=compound_df, combination_df=combination_df, representation_types=representation_types, reduction_types=reduction_types, activity_types=activity_types, synergy_types=synergy_types, metadata='DREAM Drug Combination Challenge Data') return synergy_map
def to_dict(self): """produce a dict representation of the type Args: None Returns: dict: A dictionary of the synergy type's name and metadata. """ return {"name": self.name, "metadata": self.metadata} morg2 = RepresentationType( name='morg2', representation_func=skchemize(morg, radius=2, nBits=2048), metadata= """Hashed Circular fingerprint generated by the Morgan algorithm, """ """implemented in <a href="http://www.rdkit.org">RDKit</a>. <br/>""" """Parameters used: Radius = 2, Bit length = 2048""") targets = RepresentationType( name='targets', representation_func=PIDGIN(), metadata="""Bayes affinity fingerprint for 1080 human targets, produced """ """using the <a href="https://github.com/lhm30/PIDGIN">PIDGIN (Prediction of targets IncluDinG INactives)</a>""" """Target Prediction algorithm, implemented in <a href="https://github.com/richlewis42/scikit-chem">scikit-chem</a>.""" ) random = RepresentationType( name='random',
def make_map(): morg2 = RepresentationType(name='morg2', representation_func=skchemize(morg, radius=2, nBits=2048), metadata="""Hashed Circular fingerprint generated by the Morgan algorithm, """ """implemented in <a href="http://www.rdkit.org">RDKit</a>. <br/>""" """Parameters used: Radius = 2, Bit length = 2048""") targets = RepresentationType(name='targets', representation_func=PIDGIN(), metadata="""Bayes affinity fingerprint for 1080 human targets, produced """ """using the <a href="https://github.com/lhm30/PIDGIN">PIDGIN (Prediction of targets IncluDinG INactives)</a>""" """Target Prediction algorithm, implemented in <a href="https://github.com/richlewis42/scikit-chem">scikit-chem</a>.""") random = RepresentationType(name='random', representation_func=lambda m: pd.Series(np.random.random(100)), metadata="""Uniformly distributed random feature vector of length 100""" """implemented using <a href="http://www.numpy.org">numpy</a> <a href="http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.random.html#numpy.random.random">random</a> module""") representation_types = [ morg2, targets, random ] # reduction types pca = ReductionMethod(name='PCA', model=PCA(n_components=2), metadata="""<a href="http://en.wikipedia.org/wiki/Principal_component_analysis">Principal component analysis</a> implemented in <a href="http://scikit-learn.org/stable/" target="_blank">scikit-learn</a>\n""" """<br/>Default parameters used.""") mds = ReductionMethod(name='MDS', model=MDS(), metadata= """<a href="http://en.wikipedia.org/wiki/Multidimensional_scaling" target="_blank">Multidimensional Scaling</a> implemented in <a href="http://scikit-learn.org/stable/" target="_blank">scikit-learn</a>""" """<br/>Default parameters used.""") tsne = ReductionMethod(name='t-SNE', model=TSNE(perplexity=10), metadata= """<a href="http://lvdmaaten.github.io/tsne/">Student's t-distributed stochastic neighbour embedding</a>, """ """implemented according to <a href="http://lvdmaaten.github.io/publications/papers/JMLR_2008.pdf">van der Maartin et al. 2008</a>\n""" """<br/>Parameters used: Perplexity = 10, theta=0""") reduction_types = [ pca, mds, tsne ] # activity types pIC50 = ActivityType(name='pIC50', metadata= """<a href="http://en.wikipedia.org/wiki/IC50">IC50</a>, the concentation of""" """compound required for 50% inhibition of growth of Malarial cells""") activity_types = [ pIC50 ] # synergy types MedianExcess = SynergyType(name='MedianExcess', metadata="") NumExcess = SynergyType(name='NumExcess', metadata="") LS3x3 = SynergyType(name='LS3x3', metadata="") DBSumPos = SynergyType(name='DBSumPos', metadata="") DBSumNeg = SynergyType(name='DBSumNeg', metadata="") pGamma = SynergyType(name='pGamma', metadata="") ExcessHSA = SynergyType(name='-ExcessHSA', metadata="") ExcessCRX = SynergyType(name='-ExcessCRX', metadata="") pGamma_scrambled = SynergyType(name='pGamma_scrambled', metadata="") synergy_types = [ pGamma, MedianExcess, NumExcess, LS3x3, DBSumPos, DBSumNeg, ExcessHSA, ExcessCRX, pGamma_scrambled ] # data compound_df = skc.read_sdf('compounds.sdf') compound_df['name'] = compound_df.Name compound_df.drop('Name', axis=1, inplace=True) compound_df['id'] = compound_df.index compound_df.drop('NCGC_ID', axis=1, inplace=True) compound_df.set_index('id', inplace=True) compound_df['pIC50'] = compound_df['pIC50'].apply(float) compound_df['IC50'] = compound_df.IC50.apply(float) combination_df = pd.read_csv('combinations.csv') combination_df.set_index('id', inplace=True) synergy_map = SynergyMap(compound_df=compound_df, combination_df=combination_df, representation_types=representation_types, reduction_types=reduction_types, activity_types=activity_types, synergy_types=synergy_types, metadata="Malaria NCATS dataset") return synergy_map
def to_dict(self): """produce a dict representation of the type Args: None Returns: dict: A dictionary of the synergy type's name and metadata. """ return {"name": self.name, "metadata": self.metadata} morg2 = RepresentationType( name="morg2", representation_func=skchemize(morg, radius=2, nBits=2048), metadata="""Hashed Circular fingerprint generated by the Morgan algorithm, """ """implemented in <a href="http://www.rdkit.org">RDKit</a>. <br/>""" """Parameters used: Radius = 2, Bit length = 2048""", ) targets = RepresentationType( name="targets", representation_func=PIDGIN(), metadata="""Bayes affinity fingerprint for 1080 human targets, produced """ """using the <a href="https://github.com/lhm30/PIDGIN">PIDGIN (Prediction of targets IncluDinG INactives)</a>""" """Target Prediction algorithm, implemented in <a href="https://github.com/richlewis42/scikit-chem">scikit-chem</a>.""", ) random = RepresentationType( name="random",
def make_map(): morg2 = RepresentationType( name='morg2', representation_func=skchemize(morg, radius=2, nBits=2048), metadata= """Hashed Circular fingerprint generated by the Morgan algorithm, """ """implemented in <a href="http://www.rdkit.org">RDKit</a>. <br/>""" """Parameters used: Radius = 2, Bit length = 2048""") targets = RepresentationType( name='targets', representation_func=PIDGIN(), metadata= """Bayes affinity fingerprint for 1080 human targets, produced """ """using the <a href="https://github.com/lhm30/PIDGIN">PIDGIN (Prediction of targets IncluDinG INactives)</a>""" """Target Prediction algorithm, implemented in <a href="https://github.com/richlewis42/scikit-chem">scikit-chem</a>.""" ) random = RepresentationType( name='random', representation_func=lambda m: pd.Series(np.random.random(100)), metadata="""Uniformly distributed random feature vector of length 100""" """implemented using <a href="http://www.numpy.org">numpy</a> <a href="http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.random.html#numpy.random.random">random</a> module""" ) representation_types = [morg2, targets, random] # reduction types pca = ReductionMethod( name='PCA', model=PCA(n_components=2), metadata= """<a href="http://en.wikipedia.org/wiki/Principal_component_analysis">Principal component analysis</a> implemented in <a href="http://scikit-learn.org/stable/" target="_blank">scikit-learn</a>\n""" """<br/>Default parameters used.""") mds = ReductionMethod( name='MDS', model=MDS(), metadata= """<a href="http://en.wikipedia.org/wiki/Multidimensional_scaling" target="_blank">Multidimensional Scaling</a> implemented in <a href="http://scikit-learn.org/stable/" target="_blank">scikit-learn</a>""" """<br/>Default parameters used.""") tsne = ReductionMethod( name='t-SNE', model=TSNE(perplexity=10), metadata= """<a href="http://lvdmaaten.github.io/tsne/">Student's t-distributed stochastic neighbour embedding</a>, """ """implemented according to <a href="http://lvdmaaten.github.io/publications/papers/JMLR_2008.pdf">van der Maartin et al. 2008</a>\n""" """<br/>Parameters used: Perplexity = 10, theta=0""") reduction_types = [pca, mds, tsne] # activity types pIC50 = ActivityType( name='pIC50', metadata= """<a href="http://en.wikipedia.org/wiki/IC50">IC50</a>, the concentation of""" """compound required for 50% inhibition of growth of Malarial cells""") activity_types = [pIC50] # synergy types MedianExcess = SynergyType(name='MedianExcess', metadata="") NumExcess = SynergyType(name='NumExcess', metadata="") LS3x3 = SynergyType(name='LS3x3', metadata="") DBSumPos = SynergyType(name='DBSumPos', metadata="") DBSumNeg = SynergyType(name='DBSumNeg', metadata="") pGamma = SynergyType(name='pGamma', metadata="") ExcessHSA = SynergyType(name='-ExcessHSA', metadata="") ExcessCRX = SynergyType(name='-ExcessCRX', metadata="") pGamma_scrambled = SynergyType(name='pGamma_scrambled', metadata="") synergy_types = [ pGamma, MedianExcess, NumExcess, LS3x3, DBSumPos, DBSumNeg, ExcessHSA, ExcessCRX, pGamma_scrambled ] # data compound_df = skc.read_sdf('compounds.sdf') compound_df['name'] = compound_df.Name compound_df.drop('Name', axis=1, inplace=True) compound_df['id'] = compound_df.index compound_df.drop('NCGC_ID', axis=1, inplace=True) compound_df.set_index('id', inplace=True) compound_df['pIC50'] = compound_df['pIC50'].apply(float) compound_df['IC50'] = compound_df.IC50.apply(float) combination_df = pd.read_csv('combinations.csv') combination_df.set_index('id', inplace=True) synergy_map = SynergyMap(compound_df=compound_df, combination_df=combination_df, representation_types=representation_types, reduction_types=reduction_types, activity_types=activity_types, synergy_types=synergy_types, metadata="Malaria NCATS dataset") return synergy_map