def __call__(self, parser, namespace, values, option_string=None): global snprun if len(values) == 2: snprun[values[0]] = int(values[1]) elif len(values) == 1: rr = int(values) else: chrlist = funcs.read_chrstr(values)
def update_chrlist(fixed, linechrs, chrlist): chrs = funcs.read_chrstr(linechrs) + chrlist for key, value in Counter(chrs).items(): if value > 1: if not fixed: print( "WARNING: chromosome %d has already been analysed in this run, so it was omitted. " % key + "If you want to analyse it anyway, please add '-fixed' attribute" ) chrlist.remove(key) if not chrlist: raise exceptions.WrongValueError( 'chrlist', chrlist, 'There are no chromosomes to analyze!!!') chrs = list(set(chrs)) chrs.sort() return funcs.make_chrstr(chrs)
'%d\t%s\t%s\t%s\t%d\n' % (runs[setname], setname, ', '.join([ k for k in dataset.keys() if k != setname ]), funcs.make_chrstr(chrlist), shared_snps)) return shared_snps dataset = OrderedDict() chrlist = [i for i in range(1, 24)] fixed = False run = None for q in range(len(sys.argv)): if sys.argv[q] == '-dataset': if sys.argv[q + 2][0] in ['.', '~', '/']: dataset[sys.argv[q + 1]] = sys.argv[q + 2] else: raise exceptions.NoParameterError( 'directory', 'After name of data set should appear a directory to folder with it.' ) if sys.argv[q] == '-chr': chrlist = funcs.read_chrstr(sys.argv[q + 1]) if sys.argv[q] == '-run': run = int(sys.argv[q + 1]) if sys.argv[q] == '-fixed': fixed = True found = find_shared(dataset, chrlist, fixed, run) print('%d shared SNPs found!' % found)
def read_boruta_params(chrlist, continuation, dataset, fixed, outdir, pat, run): file = '%sboruta_runs.txt' % outdir funcs.correct_boruta_runs_file(file) run_file = open(file, 'r') lines = run_file.readlines() towrite = '' occur = False for line in lines: if line.startswith(str(run) + '\t'): line = line.strip().split('\t') sets_order = line[1].strip().split('+') if list(dataset.keys()) != sets_order: if len(dataset.keys()) != len(sets_order): raise exceptions.WrongValueError( 'dataset', dataset, 'Other data sets were used in the given boruta run') else: for s in sets_order: try: dataset.move_to_end(s) except KeyError: raise exceptions.WrongValueError( 'dataset', dataset, 'Data set named %s was not used in the given boruta run' % s) if line[3] == 'None': patsubset = None else: patsubset = line[3].strip() if line[4] == '-': patruns = None else: patruns = list(map(int, line[4].split('+'))) patruns = OrderedDict([ (name, number) for name, number in zip(sets_order, patruns) ]) if line[5] == 'None': snpsubset = None else: snpsubset = line[5].strip() if line[6] == '-': snpruns = None else: snpruns = list(map(int, line[6].split('+'))) snpruns = OrderedDict([ (name, number) for name, number in zip(sets_order, snpruns) ]) testsize = float(line[7]) perc = list(map(int, line[8].split(','))) r = int(line[-2]) if continuation: line = update_chrlist(fixed, line, chrlist) strin = '' for el in line: strin += str(el) + '\t' strin += '\n' line = strin else: chrlist = funcs.read_chrstr(line[-1]) occur = True if continuation: towrite += line run_file.close() if not occur: raise exceptions.WrongValueError( '-run', str(run), 'Boruta run number %d has not been conducted yet.' % run) patients = set() if patsubset is not None: done = 0 for name in dataset.keys(): with open( '%s%s/%s_patients_%d.txt' % (dataset[name], patsubset, patsubset, patruns[name])) as file: selected = [int(line.strip()) for line in file.readlines()] patients = patients.union([pp + done for pp in selected]) done += pat[name] else: patients = set([i for i in range(sum(pat.values()))]) if testsize != 0: with open('%stestpat_%d.txt' % (outdir, run), 'r') as ts: testpat = set([int(el.strip()) for el in ts.readlines()]) trainpat = set([p for p in patients if p not in testpat]) else: testpat = set() trainpat = patients return chrlist, dataset, patruns, perc, r, snpsubset, snpruns, testpat, testsize, towrite, trainpat
def __call__(self, parser, namespace, values, option_string=None): global chrlist chrlist = funcs.read_chrstr(values)