Exemplo n.º 1
0
import IsolateClone
import ParseTable 
import ClusterClone
import translator
import AnnotateProtein
import WriteFast
import ReadIgBlastn
parser= argparse.ArgumentParser(prog='cat all.xls files',description="python PostAnalysis.py -d path -s species -c chain",epilog='')
parser.add_argument ('-d','--directory',help='input file directory',default='/home/zhaiqi1/NGS/mycode/Ab_NGS_4/test/results',action='store')
parser.add_argument('-s', '--species', help='mouse, rabbit or human', default="mouse")
parser.add_argument('-c', '--chain', help="folder", default="H")

args=parser.parse_args()

############### read the table from the Fastq2fastA################
raw_AbDict,count_seq=ParseTable.ParseTable(args.directory)
print "There are total %s sequences in the table." % str(count_seq)
print "Total number of sequences meets the keywords requirement\t:%s\n" %  str(len(raw_AbDict)) 

Outfile_summary=open(os.path.join(args.directory,"Summary.txt"),'w')

Outfile_summary.write("Total number of sequences meets the keywords requirement\t:%s\n" %  str(len(raw_AbDict)))
#print raw_AbDict
############################## cluster the clone based on the keywords_3, and then correct the pcr error ########
keywords_3=['CDR3-PRO','RID','DNAlen']
groupDict = IsolateClone.identifyClone(raw_AbDict,keywords_3)
Outfile_keywords3=os.path.join(args.directory,"uniqueclone.txt")
IsolateClone.writeCount(groupDict,Outfile_keywords3,keywords_3)  #this output has not been corrected

Outfile_summary.write("There are  DNA sequences by same CDR3-DNA, GERMLINE-V, RID, DNAlen : %s \n " % str(len(groupDict)))
print ("There are  DNA sequences by same CDR3-DNA, GERMLINE-V, RID, DNAlen : %s \n" % str(len(groupDict)))