def __call__(self, parser, namespace, values, option_string=None):
     global snprun
     if len(values) == 2:
         snprun[values[0]] = int(values[1])
     elif len(values) == 1:
         rr = int(values)
     else:
         chrlist = funcs.read_chrstr(values)
def update_chrlist(fixed, linechrs, chrlist):

    chrs = funcs.read_chrstr(linechrs) + chrlist
    for key, value in Counter(chrs).items():
        if value > 1:
            if not fixed:
                print(
                    "WARNING: chromosome %d has already been analysed in this run, so it was omitted. "
                    % key +
                    "If you want to analyse it anyway, please add '-fixed' attribute"
                )
                chrlist.remove(key)
                if not chrlist:
                    raise exceptions.WrongValueError(
                        'chrlist', chrlist,
                        'There are no chromosomes to analyze!!!')
    chrs = list(set(chrs))
    chrs.sort()
    return funcs.make_chrstr(chrs)
            '%d\t%s\t%s\t%s\t%d\n' % (runs[setname], setname, ', '.join([
                k for k in dataset.keys() if k != setname
            ]), funcs.make_chrstr(chrlist), shared_snps))

    return shared_snps


dataset = OrderedDict()
chrlist = [i for i in range(1, 24)]
fixed = False
run = None

for q in range(len(sys.argv)):
    if sys.argv[q] == '-dataset':
        if sys.argv[q + 2][0] in ['.', '~', '/']:
            dataset[sys.argv[q + 1]] = sys.argv[q + 2]
        else:
            raise exceptions.NoParameterError(
                'directory',
                'After name of data set should appear a directory to folder with it.'
            )
    if sys.argv[q] == '-chr':
        chrlist = funcs.read_chrstr(sys.argv[q + 1])
    if sys.argv[q] == '-run':
        run = int(sys.argv[q + 1])
    if sys.argv[q] == '-fixed':
        fixed = True

found = find_shared(dataset, chrlist, fixed, run)
print('%d shared SNPs found!' % found)
Пример #4
0
def read_boruta_params(chrlist, continuation, dataset, fixed, outdir, pat,
                       run):

    file = '%sboruta_runs.txt' % outdir
    funcs.correct_boruta_runs_file(file)
    run_file = open(file, 'r')
    lines = run_file.readlines()
    towrite = ''
    occur = False
    for line in lines:
        if line.startswith(str(run) + '\t'):
            line = line.strip().split('\t')
            sets_order = line[1].strip().split('+')
            if list(dataset.keys()) != sets_order:
                if len(dataset.keys()) != len(sets_order):
                    raise exceptions.WrongValueError(
                        'dataset', dataset,
                        'Other data sets were used in the given boruta run')
                else:
                    for s in sets_order:
                        try:
                            dataset.move_to_end(s)
                        except KeyError:
                            raise exceptions.WrongValueError(
                                'dataset', dataset,
                                'Data set named %s was not used in the given boruta run'
                                % s)
            if line[3] == 'None':
                patsubset = None
            else:
                patsubset = line[3].strip()
            if line[4] == '-':
                patruns = None
            else:
                patruns = list(map(int, line[4].split('+')))
                patruns = OrderedDict([
                    (name, number)
                    for name, number in zip(sets_order, patruns)
                ])

            if line[5] == 'None':
                snpsubset = None
            else:
                snpsubset = line[5].strip()
            if line[6] == '-':
                snpruns = None
            else:
                snpruns = list(map(int, line[6].split('+')))
                snpruns = OrderedDict([
                    (name, number)
                    for name, number in zip(sets_order, snpruns)
                ])

            testsize = float(line[7])
            perc = list(map(int, line[8].split(',')))
            r = int(line[-2])
            if continuation:
                line = update_chrlist(fixed, line, chrlist)
                strin = ''
                for el in line:
                    strin += str(el) + '\t'
                strin += '\n'
                line = strin
            else:
                chrlist = funcs.read_chrstr(line[-1])
            occur = True
        if continuation:
            towrite += line
    run_file.close()

    if not occur:
        raise exceptions.WrongValueError(
            '-run', str(run),
            'Boruta run number %d has not been conducted yet.' % run)

    patients = set()
    if patsubset is not None:
        done = 0
        for name in dataset.keys():
            with open(
                    '%s%s/%s_patients_%d.txt' %
                (dataset[name], patsubset, patsubset, patruns[name])) as file:
                selected = [int(line.strip()) for line in file.readlines()]
            patients = patients.union([pp + done for pp in selected])
            done += pat[name]
    else:
        patients = set([i for i in range(sum(pat.values()))])

    if testsize != 0:
        with open('%stestpat_%d.txt' % (outdir, run), 'r') as ts:
            testpat = set([int(el.strip()) for el in ts.readlines()])
        trainpat = set([p for p in patients if p not in testpat])
    else:
        testpat = set()
        trainpat = patients

    return chrlist, dataset, patruns, perc, r, snpsubset, snpruns, testpat, testsize, towrite, trainpat
 def __call__(self, parser, namespace, values, option_string=None):
     global chrlist
     chrlist = funcs.read_chrstr(values)