Пример #1
0
    def _run(self):
        """
        Run the experiment.
        """
        logging.info("Loading Network...")
        self.ppi_networkx, self.ppi_adj, self.protein_to_node = load_network(
            self.params.ppi_network)

        logging.info("Loading PPI Matrices...")
        self.ppi_matrices = load_ppi_matrices(self.params.ppi_matrices)

        logging.info("Sorting Nodes by Degree...")
        self.nodes_sorted_by_deg, self.nodes_ranked_by_deg = sort_by_degree(
            self.ppi_networkx)

        logging.info("Running Experiment...")
        self.results = []

        if self.params.n_processes > 1:
            with tqdm(total=len(self.diseases)) as t:
                p = Pool(self.params.n_processes)
                for results in p.imap(process_disease_wrapper,
                                      self.diseases.values()):
                    self.results.append(results)
                    t.update()
        else:
            with tqdm(total=len(self.diseases)) as t:
                for disease in self.diseases.values():
                    results = self.process_disease(disease)
                    self.results.append(results)
                    t.update()
        self.results = pd.DataFrame(self.results)
Пример #2
0
    def _run(self):
        """
        Run the experiment.
        """
        logging.info("Loading Network...")
        self.ppi_networkx, self.ppi_adj, self.protein_to_node = load_network(
            self.params.ppi_network)
        self.node_to_protein = {
            node: protein
            for protein, node in self.protein_to_node.items()
        }

        logging.info("Loading PPI Matrices...")
        self.ppi_matrices = load_ppi_matrices(self.params.ppi_matrices)

        logging.info("Building Degree Buckets...")
        self.degree_to_bucket = build_degree_buckets(
            self.ppi_networkx, min_len=self.params.min_bucket_len)
        for degree, bucket in self.degree_to_bucket.items():
            print("Degree: {}, Size: {}".format(degree, len(bucket)))

        logging.info("Running Experiment...")
        self.results = []
        self.indices = []

        if self.params.n_processes > 1:
            with tqdm(total=len(self.diseases)) as t:
                p = Pool(self.params.n_processes)
                for indices, results in p.imap(process_disease_wrapper,
                                               self.diseases.values()):
                    self.indices.extend(indices)
                    self.results.extend(results)
                    t.update()
        else:
            with tqdm(total=len(self.diseases)) as t:
                for disease in self.diseases.values():
                    indices, results = self.process_disease(disease)
                    self.indices.extend(indices)
                    self.results.extend(results)
                    t.update()

        index = pd.MultiIndex.from_tuples(self.indices,
                                          names=['disease', 'protein'])
        self.results = pd.DataFrame(self.results, index=index)
Пример #3
0
    def __init__(self, dir):
        super(CodiseaseProbExp, self).__init__(dir)

        # Set the logger

        set_logger(os.path.join(self.dir, 'experiment.log'),
                   level=logging.INFO,
                   console=True)

        # Log title
        logging.info("Co-disease probability in the PPI Network")
        logging.info("Sabri Eyuboglu  -- SNAP Group")
        logging.info("======================================")

        logging.info("Loading Network...")
        self.ppi_networkx, self.ppi_adj, self.protein_to_node = load_network(
            self.params.ppi_network)

        logging.info("Loading Disease Associations...")
        self.diseases = load_diseases(self.params.diseases_path,
                                      self.params.disease_subset)

        # unpack params
        self.ppi_matrices = {
            name: np.load(file)
            for name, file in self.params.ppi_matrices.items()
        }
        self.top_k = self.params.top_k
        self.n_buckets = self.params.n_buckets
        self.window_length = self.params.window_length
        self.smooth = self.params.smooth
        self.plots = self.params.plots

        if hasattr(self.params, 'codisease_matrix'):
            logging.info("Loading Codisease Matrix...")
            self.codisease_matrix = np.load(self.params.codisease_matrix)
        else:
            logging.info("Building Codisease Matrix...")
            self.codisease_matrix = self.build_codisease_matrix()
Пример #4
0
    params = Params(json_path)
    params.update(json_path)

    # Set the logger
    set_logger(os.path.join(args.experiment_dir, 'experiment.log'),
               level=logging.INFO,
               console=True)

    # Log Title
    logging.info("Complementarity Sharing in Disease Pathways")
    logging.info("Sabri Eyuboglu  -- SNAP Group")
    logging.info("======================================")

    # Load data from params file
    logging.info("Loading PPI Network...")
    _, _, protein_to_node = load_network(params.ppi_network)

    logging.info("Loading Disease Associations...")
    diseases_dict = load_diseases(params.diseases_path, params.disease_subset)

    logging.info("Loading PPI Matrices...")
    ppi_matrices = {}
    for name, path in params.ppi_matrices.items():
        ppi_matrices[name] = np.load(path)

    #Run Experiment
    logging.info("Running Experiment...")

    #Compute mean and standard deviation
    means = {}
    stds = {}
Пример #5
0
    # Set the logger
    set_logger(os.path.join(args.prediction_dir, 'experiment.log'),
               level=logging.INFO,
               console=True)

    # Log Title
    logging.info("Disease protein Prediction")
    logging.info("Sabri Eyuboglu  -- SNAP Group -- Stanford University")
    logging.info("======================================")

    # Load Data
    logging.info("Loading Diseases...")
    diseases = load_diseases(params.diseases_path, params.disease_subset)
    logging.info("Loading PPI Network...")
    ppi_networkx, ppi_network_adj, protein_to_node = load_network(
        params.ppi_network)
    node_to_protein = {
        node: protein
        for protein, node in protein_to_node.items()
    }

    if (params.method == "ppi_matrix"):
        logging.info("Loading PPI Matrix...")
        ppi_matrix = np.load(params.ppi_matrix)

    elif (params.method == 'gcn'):
        gcn_method = GCN(params, ppi_network_adj)

    # Run Predictions
    if MULTIPROCESS:
        p = Pool(N_PROCESSES)
Пример #6
0
    def __init__(self, dir):
        """ Initialize the disease protein prediction experiment 
        Args: 
            dir (string) The directory where the experiment should be run
        """
        super(DPPExperiment, self).__init__(dir)

        # Set the logger
        set_logger(os.path.join(args.dir, 'experiment.log'), level=logging.INFO, console=True)

        # Log Title 
        logging.info("Disease Protein Prediction in the PPI Network")
        logging.info("Sabri Eyuboglu  -- SNAP Group")
        logging.info("======================================")

        # Load data from params file
        logging.info("Loading PPI Network...")
        self.ppi_networkx, self.ppi_network_adj, self.protein_to_node = load_network(self.params.ppi_network)
        self.node_to_protein = {node: protein for protein, node in self.protein_to_node.items()}
        logging.info("Loading Disease Associations...")
        self.diseases_dict = load_diseases(self.params.diseases_path, self.params.disease_subset, ['none'])

        # Load method specific data 
        # TODO: Build class for each method 
        if(self.params.method == "ppi_matrix"):
            logging.info("Loading PPI Matrix...")
            self.ppi_matrix = np.load(self.params.ppi_matrix)
            # normalize columns of ppi_matrix
            if(self.params.normalize):
                if hasattr(self.params, "norm_type"):
                    if self.params.norm_type == "frac":
                        self.ppi_matrix = self.ppi_matrix / np.sum(self.ppi_matrix, 
                                                                   axis=0)
                    elif self.params.norm_type == "zscore":
                        self.ppi_matrix = (self.ppi_matrix - np.mean(self.ppi_matrix, axis=0)) / np.std(self.ppi_matrix, axis=0)
                else:
                    self.ppi_matrix = (self.ppi_matrix - np.mean(self.ppi_matrix, axis=0)) / np.std(self.ppi_matrix, axis=0)
         
            # zero out the diagonal
            np.fill_diagonal(self.ppi_matrix, 0)  

        elif (self.params.method == 'lr'):
            logging.info("Loading Feature Matrices...")
            self.feature_matrices = []
            for features_filename in self.params.features:
                self.feature_matrices.append(
                    build_embedding_feature_matrix(self.protein_to_node, 
                                                   features_filename))
        elif (self.params.method == 'l2_rw'):
            self.method = L2RandomWalk(self.params)

        elif (self.params.method == 'pathway_expansion'):
            self.method = PathwayExpansion(self.params, 
                                           self.ppi_networkx, 
                                           self.ppi_network_adj)
        
        elif (self.params.method == "learned_cn"):
            self.method = LearnedCN(self.dir,
                                    self.params,
                                    self.ppi_network_adj,
                                    self.diseases_dict,
                                    self.protein_to_node)

        elif (self.params.method == 'gcn'):
            self.method = GCN(self.params, self.ppi_network_adj)