patients = patients.loc[pnames]

    for pname, patient in patients.iterrows():
        patient = Patient(patient)

        if VERBOSE >= 1:
            print 'Patient:', patient.name

        patient.discard_nonsequenced_samples()

        for fragment in fragments:
            if VERBOSE >= 1:
                print fragment

            # Check whether a reference exists at all
            ref_fn = patient.get_reference_filename(fragment)
            if not os.path.isfile(ref_fn):
                print 'ERROR: reference for fragment', fragment, 'not found!'
                continue
            elif VERBOSE >= 3:
                print 'OK: reference file found'
    
            refseq = SeqIO.read(ref_fn, 'fasta')

            # Check whether the consensus from the first sample is similar to
            # the reference
            for i, sample in enumerate(patient.itersamples()):
                if os.path.isfile(sample.get_consensus_filename(fragment)):
                    sample_init_seq = sample.get_consensus(fragment)
                    if (VERBOSE >= 1) and (i != 0):
                        print 'Consensus from initial sample missing, taking time point',
    VERBOSE = args.verbose
    pnames = args.patients
    use_save = args.save
    use_force = args.force

    patients = load_patients()
    if pnames is not None:
        patients = patients.loc[patients.index.isin(pnames)]

    for pname, patient in patients.iterrows():
        patient = Patient(patient)

        if VERBOSE:
            print 'Patient:', patient.name

        fn = patient.get_reference_filename('genomewide')
        refseq = SeqIO.read(fn, 'fasta', alphabet=ambiguous_dna)

        fragment_edges = get_edges_fragments(patient, VERBOSE=VERBOSE)
        annotate_sequence(refseq, VERBOSE=VERBOSE,
                          additional_edges={'fragment': fragment_edges})

        if VERBOSE >= 1:
            for feature in refseq.features:
                if feature.id[0] == 'F':
                    continue

                print feature.id, 
                from hivwholeseq.utils.genome_info import genes
                if feature.type in ('gene', 'protein'):
                    print feature.extract(refseq).seq.translate()
    if not fragments:
        fragments = ['F'+str(i) for i in xrange(1, 7)]
    if VERBOSE >= 2:
        print 'fragments', fragments

    if use_recover:
        for pname, patient in patients.iterrows():
            print pname
            patient = Patient(patient)
            patient.discard_nonsequenced_samples()

            for fragment in fragments:
                if VERBOSE >= 1:
                    print fragment

                fn = patient.get_reference_filename(fragment)
                fn_old = fn.replace('.fasta', '_old.fasta')
                if not os.path.isfile(fn_old):
                    print 'Old reference not found, skipping'
                    continue
                shutil.copy(fn_old, fn)
                os.chmod(fn, (stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | \
                              stat.S_IWUSR))
                os.chmod(fn_old, (stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | \
                                  stat.S_IWUSR))
                os.remove(fn_old)
                if VERBOSE >= 1:
                    print 'Old reference recovered and protection removed'
                continue
        sys.exit()
        patients = patients.loc[pnames]

    for pname, patient in patients.iterrows():
        patient = Patient(patient)

        if VERBOSE >= 1:
            print 'Patient:', patient.name

        patient.discard_nonsequenced_samples()

        for fragment in fragments:
            if VERBOSE >= 1:
                print fragment

            # Check whether a reference exists at all
            ref_fn = patient.get_reference_filename(fragment)
            if not os.path.isfile(ref_fn):
                print 'ERROR: reference for fragment', fragment, 'not found!'
                continue
            elif VERBOSE >= 3:
                print 'OK: reference file found'

            refseq = SeqIO.read(ref_fn, 'fasta')

            # Check whether the consensus from the first sample is similar to
            # the reference
            for i, sample in enumerate(patient.itersamples()):
                if os.path.isfile(sample.get_consensus_filename(fragment)):
                    sample_init_seq = sample.get_consensus(fragment)
                    if (VERBOSE >= 1) and (i != 0):
                        print 'Consensus from initial sample missing, taking time point',
示例#5
0
    VERBOSE = args.verbose
    pnames = args.patients
    use_save = args.save
    use_force = args.force

    patients = load_patients()
    if pnames is not None:
        patients = patients.loc[patients.index.isin(pnames)]

    for pname, patient in patients.iterrows():
        patient = Patient(patient)

        if VERBOSE:
            print 'Patient:', patient.name

        fn = patient.get_reference_filename('genomewide')
        refseq = SeqIO.read(fn, 'fasta', alphabet=ambiguous_dna)

        fragment_edges = get_edges_fragments(patient, VERBOSE=VERBOSE)
        annotate_sequence(refseq,
                          VERBOSE=VERBOSE,
                          additional_edges={'fragment': fragment_edges})

        if VERBOSE >= 1:
            for feature in refseq.features:
                if feature.id[0] == 'F':
                    continue

                print feature.id,
                from hivwholeseq.utils.genome_info import genes
                if feature.type in ('gene', 'protein'):