class GMFConfig_dbg:
    def __init__(self, device, n=150, m=150, prob=.5):
        self.device = device
        self.create_generator(n, m, prob)
        self.create_model()

    def create_model(self):

        print('-' * 15, "Creating model", '-' * 15)

        latent_dim = 10
        config = {
            'num_virus': self.n_v,
            'num_human': self.n_h,
            'latent_dim': latent_dim,
            'sparse':
            False  # set false for now because some optimizers dont work with sparse
        }

        self.model = GMF(config)
        self.model.to(self.device)

        print(self.model)
        # print("params-------")
        # print(list(self.model.parameters()))
        # print("end-------")
        # print('grad: ', list(self.model.parameters())[0].grad)
        # print('grad: ', list(self.model.parameters())[3].grad)
        # print('grad: ', list(self.model.parameters())[4].grad)
        print('-' * 15, "Done with model", '-' * 15)
        print()

    def create_generator(self, m, n, prob):
        ############################
        ##  generate bipartite
        ###########################
        print('-' * 15, "Generating graph", '-' * 15)
        G = nx.bipartite.random_graph(n, m, prob)
        observed = list(G.edges())
        nodes = list(G.nodes())
        virusUprot = []
        humanUprot = []
        edges = []

        for i in tqdm(range(n)):
            for j in tqdm(range(n, m + n)):
                virusUprot.append(i)
                humanUprot.append(j)
                if (i, j) in observed:
                    edges.append(1.0)
                else:
                    edges.append(0.0)

        M = pd.DataFrame({
            'virusUprot': virusUprot,
            'humanUprot': humanUprot,
            'edge': edges
        })

        htoi = {v: k for k, v in enumerate(M['humanUprot'].unique())}
        vtoi = {v: k for k, v in enumerate(M['virusUprot'].unique())}
        print('-' * 15, "Dataframe created", '-' * 15)
        print()

        ############################
        ##   Prepare data (dataloader)
        ############################
        print('-' * 15, "Creating data loaders", '-' * 15)

        data_config = {
            'interactions': M,
            'htoi': htoi,
            'vtoi': vtoi,
            'pct_test': .10,
            'device': self.device
        }

        self.n_v = len(vtoi)
        self.n_h = len(htoi)
        self.gen = ProteinInteractionGenerator(data_config)

        print('-' * 15, "Generator done", '-' * 15)
        print()

    def get_generator(self):
        return self.gen

    def get_model(self):
        return self.model
Пример #2
0
class GMFConfig:
    def __init__(self, path, debug, device):
        self.device = device
        self.create_generator(path, debug)
        self.create_model()

    def create_model(self):

        print('-' * 15, "Creating model", '-' * 15)

        latent_dim = 2799
        config = {
            'num_virus': self.n_v,
            'num_human': self.n_h,
            'latent_dim': latent_dim,
            'sparse': False # set false for now because some optimizers dont work with sparse
        }

        self.model = GMF(config)
        self.model.to(self.device)

        print(self.model)
        print('-' * 15, "Done with model", '-' * 15)
        print()

    def create_generator(self, path, debug):
        ############################
        ##   paths 
        ########################### 
        train_csv =f'{path}full_train.csv'

        ############################
        ##   Load data
        ############################
        print('-' * 15, "Loading data", '-' * 15)

        print("loading traning matrix at: ", train_csv)
        M = pd.read_csv(train_csv)

        if debug:
            print("Making debug dataset.....")
            pos = M.loc[M['edge'] > 0].sample(frac=1)
            negs = M.loc[M['edge'] == 0].sample(frac=1)
            M = pd.concat([pos, negs[:len(pos)]], ignore_index=True).sample(frac=1)

        htoi = {v:k for k,v in enumerate(M['humanUprot'].unique())}
        vtoi = {v:k for k,v in enumerate(M['virusUprot'].unique())}

        ############################
        ##   Prepare data (dataloader)
        ############################
        print('-' * 15, "Creating data loaders", '-' * 15)

        data_config = {
            'interactions':M,
            'htoi':htoi,
            'vtoi':vtoi,
            'pct_test':.10,
            'device': self.device
        }

        self.n_v = len(vtoi)
        self.n_h = len(htoi)
        self.gen = ProteinInteractionGenerator(data_config)

        print('-' * 15, "Generator done", '-' * 15)
        print()

    def get_generator(self):
        return self.gen

    def get_model(self):
        return self.model