def MakeReference(self): if not os.path.isfile(self.strRefFile): with open(self.strBarcodeFile) as Barcode, \ open(self.strTargetSeqFile) as Target, \ open(self.strReferenceSeqFile) as Ref, \ open(self.strRefFile, 'w') as Output: listBarcode = Helper.RemoveNullAndBadKeyword(Barcode) listTarget = Helper.RemoveNullAndBadKeyword(Target) listRef = Helper.RemoveNullAndBadKeyword(Ref) ## defensive assert len(listBarcode) == len(listTarget) == len(listRef), 'Barcode, Target and Reference must be a same row number.' listName = [] for strBar, strTar in zip(listBarcode, listTarget): strBar = strBar.replace('\n', '').replace('\r', '').strip().upper() strTar = strTar.replace('\n', '').replace('\r', '').strip().upper() Helper.CheckIntegrity(self.strBarcodeFile, strBar) ## defensive Helper.CheckIntegrity(self.strBarcodeFile, strTar) ## defensive listName.append(strBar + ':' + strTar + '\n') for i, strRow in enumerate(listRef): strRow = strRow.replace('\r', '').strip().upper() Output.write('>' + listName[i] + strRow + '\n')
def MakeReference(self): with open(self.strBarcodeFile) as Barcode, \ open(self.strReferenceSeqFile) as Ref, \ open(self.strRefFile, 'w') as Output: listBarcode = Helper.RemoveNullAndBadKeyword(Barcode) listRef = Helper.RemoveNullAndBadKeyword(Ref) ## defensive assert len(listBarcode) == len( listRef), 'Barcode and Reference must be a same row number.' dictBarcode = {} for strBarcode in listBarcode: strBarcode = strBarcode.replace('\n', '').replace('\r', '').upper() Helper.CheckIntegrity(self.strBarcodeFile, strBarcode) ## defensive listBarcode = strBarcode.split(':') strBarSample = listBarcode[0] strBarcode = listBarcode[1] dictBarcode[strBarSample] = strBarcode for strRef in listRef: strRef = strRef.replace('\n', '').replace('\r', '').upper() Helper.CheckIntegrity(self.strBarcodeFile, strRef) ## defensive listRef = strRef.split(':') strRefSample = listRef[0] strRef = listRef[1] try: sBarcode = dictBarcode[strRefSample] Output.write('%s\t%s\t%s\n' % (strRefSample, sBarcode, strRef)) except KeyError: logging.error('no matching') logging.error(strRefSample, strRef)
def Main(): print('BaseEdit program start: %s' % datetime.now()) sCmd = ( "BaseEdit frequency analyzer\n\n./Run_BaseEdit_freq.py -t 15 -w 16-48 --indel_check_pos 39-40 --target_ref_alt A,T --PAM_seq NGG --PAM_pos 43-45 --Guide_pos 23-42" " --gap_open -10 --gap_extend 1\n\n" "The sequence position is the one base position (start:1)\n" "1: Barcode\n" "2: Base target window (end pos = PAM pos +3)\n" "3: Indel check pos\n" "4: PAM pos\n" "5: Guide pos (without PAM)\n\n" "TATCTCTATCAGCACACAAGCATGCAATCACCTTGGGTCCAAAGGTCC\n" "<------1------><----------------2--------------->\n" " <3> <4> \n" " <---------5--------> \n\n") parser = OptionParser(sCmd) parser.add_option("-t", "--thread", default="1", type="int", dest="multicore", help="multiprocessing number") parser.add_option('--gap_open', default='-10', type='float', dest='gap_open', help='gap open: -100~0') parser.add_option('--gap_extend', default='1', type='float', dest='gap_extend', help='gap extend: 1~100') parser.add_option("-w", "--target_window", type="str", dest="target_window", help="a window size for target sequence : 20-48") parser.add_option( "--indel_check_pos", type="str", dest="indel_check_pos", help= "indel check position to filter : 39-40; insertion 39, deletion 39 & 40" ) parser.add_option("--target_ref_alt", type="str", dest="target_ref_alt", help="Ref 'A' is changed to Alt 'T': A,T") parser.add_option("--PAM_seq", type="str", dest="PAM_seq", help="PAM sequence: NGG, NGC ...") parser.add_option( "--PAM_pos", type="str", dest="PAM_pos", help="PAM position range in the reference seqeunce : 43-45") parser.add_option( "--Guide_pos", type="str", dest="Guide_pos", help="Guide position range in the reference seqeunce : 23-42") parser.add_option('--python', dest='python', help='The python path including the CRISPResso2') parser.add_option('--user', dest='user_name', help='The user name with no space') parser.add_option('--project', dest='project_name', help='The project name with no space') options, args = parser.parse_args() InstInitFolder = InitialFolder(options.user_name, options.project_name, os.path.basename(__file__)) InstInitFolder.MakeDefaultFolder() InstInitFolder.MakeInputFolder() InstInitFolder.MakeOutputFolder() logging.basicConfig( format='%(process)d %(levelname)s %(asctime)s : %(message)s', level=logging.DEBUG, filename=InstInitFolder.strLogPath, filemode='a') logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.info('Program start') if options.multicore > 15: logging.warning('Optimal treads <= 15') logging.info(str(options)) with open(InstInitFolder.strProjectFile) as Sample_list: listSamples = Helper.RemoveNullAndBadKeyword(Sample_list) strInputProject = './Input/{user}/Query/{project}'.format( user=options.user_name, project=options.project_name) @CheckProcessedFiles def RunPipeline(**kwargs): for strSample in listSamples: if strSample[0] == '#': continue tupSampleInfo = Helper.SplitSampleInfo(strSample) if not tupSampleInfo: continue strSample, strRef, strExpCtrl = tupSampleInfo InstBaseEdit = clsBaseEditRunner(strSample, strRef, options, InstInitFolder) InstBaseEdit.MakeReference() listCmd = InstBaseEdit.MakeIndelSearcherCmd() ###print(lCmd[:5]) RunMulticore(listCmd, options.multicore) ## from CoreSystem.py InstBaseEdit.MakeMergeTarget() InstBaseEdit.CopyToAllResultFolder() RunPipeline(InstInitFolder=InstInitFolder, strInputProject=strInputProject, listSamples=listSamples, logging=logging) print('BaseEdit program end: %s' % datetime.now())
def Main(): parser = OptionParser('Indel search program for CRISPR CAS9 & CPF1\n<All default option> python2.7 Run_indel_searcher.py --pam_type Cas9 --pam_pos Forward') parser.add_option('-t', '--thread', default='1', type='int', dest='multicore', help='multiprocessing number, recommendation:t<16') parser.add_option('-c', '--chunk_number', default='400000', type='int', dest='chunk_number', help='split FASTQ, must be multiples of 4. file size < 1G recommendation:40000, size > 1G recommendation:400000') parser.add_option('-q', '--base_quality', default='20', dest='base_quality', help='NGS read base quality') parser.add_option('--gap_open', default='-10', type='float', dest='gap_open', help='gap open: -100~0') parser.add_option('--gap_extend', default='1', type='float', dest='gap_extend', help='gap extend: 1~100') parser.add_option('-i', '--insertion_window', default='4', type='int', dest='insertion_window', help='a window size for insertions') parser.add_option('-d', '--deletion_window', default='4', type='int', dest='deletion_window', help='a window size for deletions') parser.add_option('--pam_type', dest='pam_type', help='PAM type: Cas9 Cpf1') parser.add_option('--pam_pos', dest='pam_pos', help='PAM position: Forward Reverse') parser.add_option('--python', dest='python', help='The python path including the CRISPResso2') parser.add_option('--user', dest='user_name', help='The user name with no space') parser.add_option('--project', dest='project_name', help='The project name with no space') parser.add_option('--pickle', dest='pickle', default='False', help='Dont remove the pickles in the tmp folder : True, False') parser.add_option('--split', dest='split', default='False', help='Dont remove the split files in the input folder : True, False') parser.add_option('--classfied_FASTQ', dest='class_fastq', default='True', help='Dont remove the ClassfiedFASTQ in the tmp folder : True, False') parser.add_option('--ednafull', dest='ednafull', help='The nucleotide alignment matrix') options, args = parser.parse_args() InstInitFolder = InitialFolder(options.user_name, options.project_name, os.path.basename(__file__)) InstInitFolder.MakeDefaultFolder() InstInitFolder.MakeInputFolder() InstInitFolder.MakeOutputFolder() logging.basicConfig(format='%(process)d %(levelname)s %(asctime)s : %(message)s', level=logging.DEBUG, filename=InstInitFolder.strLogPath, filemode='a') logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) logging.info('Program start') if options.multicore > 15: logging.warning('Optimal treads <= 15') logging.info(str(options)) """ InstInitFolder.strProjectFile is... ./User/Nahye/2019_Nahye_Cas9D7_samples.txt in INPUT3 190819_Nahye_12K_D4_D0_1-Cas9D7 Cas9D7 Ctrl 190819_Nahye_12K_D4_eCas9_Rep1-Cas9D7 Cas9D7 Exp 190819_Nahye_12K_D4_eCas9_Rep2-Cas9D7 Cas9D7 Exp 190819_Nahye_12K_D4_evo_Rep1-Cas9D7 Cas9D7 Exp """ with open(InstInitFolder.strProjectFile) as Sample_list: listSamples = Helper.RemoveNullAndBadKeyword(Sample_list) intProjectNumInTxt = len(listSamples) strInputProject = './Input/{user}/FASTQ/{project}'.format(user=options.user_name, project=options.project_name) @CheckProcessedFiles def RunPipeline(**kwargs): setGroup = set() """ listSamples is ... 190819_Nahye_12K_D4_D0_1-Cas9D7 Cas9D7 Ctrl 190819_Nahye_12K_D4_eCas9_Rep1-Cas9D7 Cas9D7 Exp ... """ for strSample in listSamples: """ tupSampleInfo is ... tuple instance (190819_Nahye_12K_D4_D0_1-Cas9D7, Cas9D7, Ctrl) """ tupSampleInfo = Helper.SplitSampleInfo(strSample) if not tupSampleInfo: continue """ strSample = 190819_Nahye_12K_D4_D0_1-Cas9D7 ... sample name , strRef = Cas9D7 ... reference name , strExpCtrl = Ctrl/Exp/"" """ strSample, strRef, strExpCtrl = tupSampleInfo setGroup.add(strExpCtrl) """ options ... has PAM type: Cas9 Cpf1, PAM position: Forward Reverse ... """ InstRunner = clsIndelSearcherRunner(strSample, strRef, options, InstInitFolder) #""" logging.info('SplitFile') InstRunner.SplitFile() logging.info('MakeReference') InstRunner.MakeReference() """ Indel_searcher_crispresso_hash.py """ logging.info('MakeIndelSearcherCmd') listCmd = InstRunner.MakeIndelSearcherCmd() logging.info('RunMulticore') RunMulticore(listCmd, options.multicore) ## from CoreSystem.py logging.info('MakeOutput') InstRunner.MakeOutput() logging.info('RunIndelFreqCalculator') InstRunner.RunIndelFreqCalculator() #""" if setGroup == {'EXP', 'CTRL'}: InstRunner.IndelNormalization() elif setGroup in [set(), set([]), set(['']), set([' '])]: pass else: logging.error('The group category is not appropriate. : %s' % setGroup) logging.error('Please make sure your project file is correct.') logging.error('The group category must be Exp or Ctrl') raise Exception #""" RunPipeline(InstInitFolder=InstInitFolder, strInputProject=strInputProject, intProjectNumInTxt=intProjectNumInTxt, listSamples=listSamples, logging=logging) logging.info('Program end')