Exemplo n.º 1
0
    def init_analysis(self):
        #1: read annotation file
        if 'file_annotation' in self.par.keys():
            self.par['annot_df'] = myDataframe.basic().annot_df(
                self.par['file_annotation'])
            #genome annotation: associations of protein-peptides
            self.par['dict_pro_pep'] = myCommon.basic(
                self.par).protein_peptides()
            #virus only
            if 'VirScan' in self.par['file_annotation']:
                #extract aa stretch
                #get dependent petides that two peptides shared at least  7-aa.
                self.par['dependent_pep'] = myCommon.basic(
                    self.par).taxon_dependent_peptides()

        #2: check bowtie or build bowtie index
        myAlign.alignment(self.par).build_bowtie_index()

        #3: sample info
        self.par = myParallel.samples(self.par).export_sample_info()
        #samples of negative controls
        group1 = self.par['group1']
        if 'NC' in group1.keys():
            self.par['NC_samples'] = group1['NC'].split(',')
            self.par['phip_samples'] = list(
                set(self.par['sample_names']) - set(self.par['NC_samples']))
            print('\nNumber of negative Controls (Beads only): ',
                  self.par['NC_samples'].__len__())
            print('Number of PhIP samples: ',
                  self.par['sample_names'].__len__())
            #myDict.basic(self.par['sample_dirs']).print_dict()

        #read reference sequence file (*.fa)
        ref_dict, ref_ids = myGenome.genome(self.par['file_ref_fa']).read_fa()
        self.par['ref_dict'] = ref_dict
Exemplo n.º 2
0
        #current dir
        par['dir_bin'] = os.path.dirname(os.path.realpath(__file__)) + '/'
        par['dir_home'] = os.path.abspath(
            os.path.join(par['dir_bin'], os.pardir)) + '/'
        print('Home directory of phip pipsline: ', par['dir_home'])

        #libraries. default is human and virus
        for lib in par['ref_libs']:
            par['dir_result'] = myIO.dir_os(
                os.path.abspath(par['out'] + '_' + lib)).create_dir()
            if os.path.isdir(par['dir_result']):
                #1: sample_info.csv
                par['file_sample_info'] = par['dir_result'] + 'sample_info.csv'
                print('The sample information file: ', par['file_sample_info'])
                #read sample_info.csv
                myParallel.samples(par).export_sample_info()
                #2: copy template variables.txt into lib folder
                template_file = '{}variables_{}.txt'.format(
                    par['dir_bin'], lib)
                var_file = '{}variables.txt'.format(par['dir_result'])
                print('Save {} and then update it.'.format(var_file))
                shutil.copy(template_file, var_file)
                #update parameters of variables.txt
                refresh = {
                    'dir_home': par['dir_home'],
                    'dir_result': par['dir_result']
                }
                refresh['dir_raw_data'] = par['dir_raw_data'] if par[
                    'dir_raw'] == 'NA' else par['dir_raw']
                myIO.file_os(var_file, '=').line_replace(refresh)
            else:
Exemplo n.º 3
0
def par_command(argv):
    phip_libs = ['human', 'virus', 'PE', 'allergome', 'LISH']
    #initiate parameters
    par = {'fq_file':'NA','barcode_file':'NA','index_file':'NA','I1_file':'NA','I2_file':'NA', \
        'dir_raw_data':'NA', 'dir_raw':'NA','dir_in':'NA', 'out':'NA', \
        'dir_result':'NA', 'multiplexing_mode':0, 'ref_libs':phip_libs[:2], \
        'seq_start':0, 'seq_end':0, 'seq_min':10, 'seq_max':0 }
    usage_out = 'Usage:\n' + argv[0] + ' [options] -o <raw data directory> ' + \
                '-f <fastq file> -i <index file> -b <barcode file>\n'
    try:
        opts, args = getopt.getopt(argv[1:],"hf:i:b:o:t:l:x:y:m:n:z:c:",["help",\
                "fastq_file", "index_file", "barcode_file", "dir_raw_data", "trim_len",\
                'fixed_end5', 'dir_in', 'out', 'I1_file','I2_file','dir_raw','ref_library'])
    except getopt.GetoptError:
        print(usage_out)
        sys.exit(2)

    #get parameters
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            print(usage_out)
            #common usage
            # python Process_FASTQ.py -f * -i * -b * -o * -y *"
            print("-h --help\tUsage information of this script.")
            print(
                "-t --trim_len\tTrim sequences from the 5'-end or 3'-end of reads (Optional)"
            )
            print(
                "-f --fastq_file\tFastq file determined by a sequencing analyzer."
            )
            print("-i --index_file\tIndex file matched with the fastq file.")
            print(
                "-b --barcode_file\tBarcode file matched with the index file.")
            print(
                "-o --raw_data\tDirectory storing demulitplexed *fastq files.")
            print(
                "-y --out\tDirectory storing sample_info.csv and variables.txt."
            )
            print(
                "-c --ref_library\tReference libraries can be one of {}, default is {}."
                .format(phip_libs, phip_libs[:2]))
            sys.exit(2)
        elif opt in ("-f", "--fastq_file"):
            par['fq_file'] = os.path.abspath(arg)
            par['multiplexing_mode'] += 1
        elif opt in ("-i", "--index_file"):
            par['index_file'] = os.path.abspath(arg)
            par['multiplexing_mode'] += 1
        elif opt in ("-b", "--barcode_file"):
            par['barcode_file'] = os.path.abspath(arg)
            par['multiplexing_mode'] += 1
        elif opt in ("-o", "--raw_data"):
            par['dir_raw_data'] = myIO.dir_os(
                os.path.abspath(arg)).create_dir()
        elif opt in (
                "-z",
                "--all_raw_data"):  # only for one more sets of fastq splits
            par['dir_raw'] = myIO.dir_os(os.path.abspath(arg)).create_dir()
        elif opt in ('-x', "--dir_in"):
            par['dir_in'] = os.path.abspath(arg)
            par['fq_files'] = myParallel.samples({}).seek_fq(par['dir_in'])
        elif opt in ('-y', "--out"):
            par['out'] = arg
        elif opt in ("-l", "--fixed_len"):
            len_min, len_max = arg.split(':')
            par['seq_min'] = abs(int(len_min))
            par['seq_max'] = abs(int(len_max))
        elif opt in ("-t", "--trim_len"):
            trim_end5, trim_end3 = arg.split(':')
            par['seq_start'] = abs(int(trim_end5))
            par['seq_end'] = -abs(int(trim_end3))
        elif opt in ("-m", "--I1_file"):
            par['I1_file'] = os.path.abspath(arg)
        elif opt in ("-n", "--I2_file"):
            par['I2_file'] = os.path.abspath(arg)
        elif opt in ("-c", "--ref_library"):
            libs = arg.split(',')
            par['ref_libs'] = [x for x in libs if x in phip_libs]
    #
    if par['seq_max'] > 0:
        par['seq_end'] = par['seq_max']
    #
    myDict.basic(par).print_dict()
    return par
def par_command(argv):
    phip_libs = ['human', 'virus', 'allergome', 'provirome', 'toxome', 'mouse', 'PE', 'zika', 'arbo', 'LISH']
    #initiate parameter
    na_str='fq_file,barcode_file,index_file,I1_file,I2_file,dir_raw_data,dir_in,out,dir_result'
    par=dict([(key, 'NA') for key in na_str.split(',')])
    par.update({'ref_libs':phip_libs[:2], 'seq_start':0, 'seq_end':None, 'seq_min':10})
    usage_out = 'Usage:\n' + argv[0] + ' [options] -o <raw data directory> ' + \
                '-f <fastq file> -i <index file> -b <barcode file>\n'
    try:
        opts, args = getopt.getopt(argv[1:],"hf:i:b:o:t:r:l:x:y:m:n:c:",["help",\
            "fastq_file", "index_file", "barcode_file", "dir_raw_data", "trim_5end", 'len_trim',\
            'fixed_end5', 'dir_in', 'out', 'I1_file','I2_file','ref_library'])
    except getopt.GetoptError:
        print(usage_out)
        sys.exit(2)
      
    #get parameters 
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            print(usage_out)
            #common usage
            # python Process_FASTQ.py -f * -i * -b * -o * -y *" 
            print("-h --help\tUsage information of this script.")
            print("-t --trim_len\tTrim sequences from the 5'-end or 3'-end of reads (Optional)")
            print("-f --fastq_file\tFastq file determined by a sequencing analyzer.")
            print("-i --index_file\tIndex file matched with the fastq file.")
            print("-b --barcode_file\tBarcode file matched with the index file.")
            print("-o --raw_data\tDirectory storing demulitplexed *fastq files.")
            print("-y --out\tDirectory storing sample_info.csv and variables.txt.")
            print("-c --ref_library\tReference libraries can be any of {}, default is {}.".format(phip_libs, phip_libs[:2]))
            sys.exit(2)
        elif opt in ("-f", "--fastq_file"):
            par['fq_file'] = os.path.abspath(arg)
        elif opt in ("-i", "--index_file"):
            par['index_file'] = os.path.abspath(arg)
        elif opt in ("-b", "--barcode_file"):
            par['barcode_file'] = os.path.abspath(arg)
        elif opt in ("-o", "--raw_data"):
            par['dir_raw_data'] = myIO.dir_os(os.path.abspath(arg)).create_dir()
        elif opt in ('-x', "--dir_in"):
            par['dir_in'] = os.path.abspath(arg)
            par['fq_files'] = myParallel.samples({}).seek_fq(par['dir_in'])
        elif opt in ('-y', "--out"):
            par['out'] = arg
        elif opt in ("-l", "--min_len"):
            # discard shorter reads due to poor sequencing
            par['seq_min'] = abs(int(arg))
        elif opt in ("-t", "--trim_5end"):
            #trim_end5: length of nt from the 5-end
            par['seq_start'] = abs(int(arg))
        elif opt in ("-r", "--fixed_len"):
            #len_trim: length of nt after trimming 5-end and 3-end
            par['seq_len'] = abs(int(arg))
            par['seq_end'] = par['seq_start'] + par['seq_len']
        elif opt in ("-m", "--I1_file"):
            par['I1_file'] = os.path.abspath(arg)
        elif opt in ("-n", "--I2_file"):
            par['I2_file'] = os.path.abspath(arg)
        elif opt in ("-c", "--ref_library"):
            libs = arg.split(',')
            par['ref_libs'] = [x for x in libs if x in phip_libs]
    #   
    myDict.basic(par).print_dict()
    return par