Пример #1
0
def main():
    readslist = []
    readslist2 = []
    input = "/Users/Xin/Desktop/IC_project/output/rpoB_output/ICW_rpoB.fasta"
    output0 = "read_counts.csv"
    output = open("ICW_rpoB_nopaired.fasta", 'w')
    reads_tag ='_2:N:0:5'
    try:
        f0 = open(input, 'r')
        print("Open success!")
    except IOError:
        print ("no such file!")
    for line in f0:
        tmp0 = re.search("^\>(M00704:49:000000000-AFW6D[\d\:]+)",line)
        if tmp0:
            read = tmp0.group(1)
            readslist.append(read)
    print("Number of the reads in the list is ", len(readslist))
    f0.close()
    df_reads = pd.DataFrame(readslist, columns = ['read'])
    read_counts = df_reads['read'].value_counts()
    read_counts.to_csv(output0)
    f= open(input, 'r') 
        
    with open("/Users/Xin/Desktop/IC_project/output/rpoB_output/read_counts.csv", 'r') as csvfile:
        f1 = csv.DictReader(csvfile, delimiter = ",", fieldnames = ['read','count'])
        for row in f1:
            if int(row['count']) == 2:
                readslist2.append(row['read']+reads_tag)  
        subset = Subsampling()  
        subset.exclude(f, readslist2, output)
def main():
    input1dir = input('Reads collections: ')
    input2dir = input('Reads filter: ')
    outputdir = input('Output file: ')
    output = open(outputdir, 'w')
    try:
        all_reads = open(input1dir, 'r')
    except IOError:
        print ("no such file!") 
    readslist = []
    with open(input2dir, 'r') as inputfile:
        f = csv.DictReader(inputfile, delimiter =",", fieldnames=['reads','genus'])
        for row in f:
            readslist.append(row['reads']) # all the reads
        print("total reads in filter list:", len(readslist)) 
        subset = Subsampling()  
        subset.exclude(all_reads, readslist, output)
def main(argv):
    if len(argv[1:]) == 3:
        input1dir = argv[1]
        input2dir = argv[2]
        outputdir = argv[3]
    else:
        print("Three arguements are needed!!")
    output = open(outputdir, 'w')
    try:
        all_reads = open(input1dir, 'r')
    except IOError:
        print ("no such file!") 
    readslist = []
    with open(input2dir, 'r') as inputfile:
        f = csv.DictReader(inputfile, delimiter =",", fieldnames=['reads','genus'])
        for row in f:
            readslist.append(row['reads']) # all the reads
        print("total reads in filter list:", len(readslist)) 
        subset = Subsampling()  
        subset.exclude(all_reads, readslist, output)