def read_lgn(filepath):
    edges_in_lgn = []
    with open(filepath) as lgn_file:
        lgn_reader = csv.reader(lgn_file)
        lgn_reader.next() # skip the header row

        edges_in_lgn = []
        for row in lgn_reader:
            gene_1, gene_2 = helper.swap(row[0], row[1])
            edges_in_lgn.append([gene_1, gene_2])

    genes_in_lgn = helper.genes_from_edges(edges_in_lgn)

    return genes_in_lgn, edges_in_lgn
    def __connection_in_extra_genes_step_0(self, block):
        """
        calculate c[0]: whether or not this 'extra gene' appear in a specific block
        """
        control = dict()
        max_length = len(self.genes_in_lgn) + 2

        genes_in_block = helper.genes_from_edges(block)
        extra_genes = set(genes_in_block) - set(self.genes_in_lgn)

        """ since we do not care how many time extra genes appear in block
        we ONLY pay attention to whether it appears or not
        """
        for gene in extra_genes:
            control[gene] = [0] * max_length
            control[gene][0] = 1

            # number of times it's connected with the LGN - c[1]
            # number of times it's connected with the subLGN - c[2]
            # will be calculated later

        return control
Пример #3
0
import os
import csv
import pdb
import logging

import helper

def read_lgn(filepath):
    edges_in_lgn = []
    with open(filepath) as lgn_file:
        lgn_reader = csv.reader(lgn_file, delimiter=';')
        lgn_reader.next() # skip the header row

        edges_in_lgn = []
        for row in lgn_reader:
            try:
                gene_1, gene_2 = helper.swap(row[0], row[1])
                edges_in_lgn.append([gene_1, gene_2])
            except IndexError, e:
                logging.warning("%s is not in the correct format" % row)
                pass

    genes_in_lgn = helper.genes_from_edges(edges_in_lgn)

    return genes_in_lgn, edges_in_lgn