Пример #1
0
def read_similarity_graph(similarity_graph_file, fasta_file):
    my_dict = FastaIO.read_fasta_file_as_dict(fasta_file)

    with open(similarity_graph_file, 'r') as csvfile:
        reader = list(csv.reader(csvfile, delimiter=','))

        X = []
        Y = []
        for candidate_pair in reader:
            read1 = my_dict[candidate_pair[0]].seq
            read2 = my_dict[candidate_pair[1]].seq

            if len(read1) < 400:
                rest = ''
                for i in range(400 - len(read1)):
                    rest += 'A'
                read1 += rest

            if len(read2) < 400:
                rest = ''
                for i in range(400 - len(read2)):
                    rest += 'A'
                read2 += rest

            read1 = read1[0:400]
            read2 = read2[0:400]
            pair = binarize_pair(read1, read2)
            X.append(pair)
            Y.append(1)

        all_pairs = np.array(X)
        all_labels = np.array(Y)

        all_pairs = all_pairs.reshape(all_pairs.shape[0], all_pairs.shape[1],
                                      all_pairs.shape[2], 1)

        test = DataSet(all_pairs, all_labels)
        ds = collections.namedtuple('Datasets', ['test'])

        return ds(test=test)
Пример #2
0
import csv
import networkx
from DataOperations import graphOperations
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import scipy.io
import community
from DataOperations import FastaIO
from collections import Counter

dataset = FastaIO.read_fasta_file('../files/isoseq_flnc1.fasta')
dataset_dict = FastaIO.read_fasta_file_as_dict('../files/isoseq_flnc1.fasta')
id_dict = {}
id_list = []
G = networkx.Graph()
print(dataset_dict[
    'm150803_002149_42161_c100745121910000001823165807071563_s1_p0/14/1140_57_CCS']
      .seq)

ind = 0
for item in dataset:
    id_dict[item.id] = ind
    ind += 1
    id_list.append(item.id)
    G.add_node(item.id)

print(id_list)
my_dict = []
"""
mat = scipy.io.loadmat('Data50.mat')