Exemplo n.º 1
0
def test_sequence_util():

    sequence = 'ACTG'
    rna_sequence = 'ACUG'

    transcribed = transcribe(sequence)
    print(transcribed)

    reverse_comp = reverse_complement(sequence)
    print(reverse_comp)

    reverse_transcribed_seq = reverse_transcribe(rna_sequence)
    print(reverse_transcribed_seq)

    assert is_nuc_acid('A')
    assert not is_nuc_acid('L')

    assert not is_nuc_acid(None)

    assert RNA2DNA('U') == 'T'
    assert DNA2RNA('T') == 'U'

    try:
        RNA2DNA('L')
    except Exception as e:
        assert 'Input sequence L is not valid nucleic acid sequence' in str(e)
Exemplo n.º 2
0
def test_is_nuc_acid():
    good_seq = "GCCAat"
    assert sequence.is_nuc_acid(good_seq)

    bad_seq = "AAAAXGG"
    assert not sequence.is_nuc_acid(bad_seq)

    not_str = 1234
    assert not sequence.is_nuc_acid(not_str)
Exemplo n.º 3
0
def multiple_sanger_analysis(definition_file,
                             output_dir,
                             data_dir=None,
                             verbose=False,
                             single_line=None,
                             allprops=False):
    '''
    :param definition_file: input excel file that defines sample/control/data associations
    :param output_dir: output directory
    :return:
    '''

    input_df = pd.read_excel(definition_file)

    results = []

    fails = []

    jobs = []
    n = 0
    for m, experiment in input_df.iterrows():

        label = experiment['Label']

        base_outputname = os.path.join(output_dir, '%s-%s' % (n, label))

        control_sequence_file = experiment['Control File']

        edit_sequence_file = experiment['Experiment File']

        guide = experiment['Guide Sequence']

        if 'Donor' in experiment and is_nuc_acid(experiment['Donor']):
            donor = experiment['Donor']
        else:
            donor = None

        print(donor)
        try:
            if pd.isnull(control_sequence_file):
                raise IOError(
                    "Control filepath not specified at line {} in definition file"
                    .format(n + 1))
            if pd.isnull(edit_sequence_file):
                raise IOError(
                    "Edit filepath not specified at line {} in definition file"
                    .format(n + 1))

            control_sequence_path = os.path.join(data_dir,
                                                 control_sequence_file)
            edit_sequence_path = os.path.join(data_dir, edit_sequence_file)

            if single_line is not None:
                if n != single_line:
                    continue

            msg = "analyzing"
            print("-" * 50, msg, n, experiment['Label'], guide)

            job_args = (control_sequence_path, edit_sequence_path,
                        base_outputname, guide)
            job_kwargs = {
                'verbose': verbose,
                'allprops': allprops,
                'donor': donor
            }
            result = single_sanger_analysis(*job_args, **job_kwargs)
            jobs.append((experiment, result))

        except Exception as e:
            fails.append(experiment)
            print("Single Sanger analysis failed", e)
            import traceback, sys
            traceback.print_exc(file=sys.stdout)

        n += 1

    for job in jobs:
        r = job[1]
        experiment = job[0]
        if r is not None:
            tmp = [
                experiment['Label'], r['ice'], r['ice_d'], r['rsq'],
                r['hdr_pct'], r['guides'], r['notes']
            ]
        else:
            tmp = [experiment['Label'], 'Failed', '', '', '', '', '']
        results.append(tmp)

    if results:

        input_df = pd.DataFrame(results)
        timestamp = '{:%Y-%m-%d-%H%M%S}'.format(datetime.datetime.now())
        out_file = os.path.join(output_dir,
                                "ice.results.{}.xlsx".format(timestamp))

        header = [
            "sample_name", "ice", 'ice_d', "r_squared", "hdr_pct", "guides",
            "notes"
        ]
        input_df.columns = header
        # to json
        out_dict = []
        for r in results:
            row = {}
            for idx, c in enumerate(header):
                row[c] = r[idx]
            out_dict.append(row)
        with open(out_file.replace('.xlsx', '.json'), 'w') as f:
            json.dump(out_dict, f, ensure_ascii=False)

        with pd.ExcelWriter(out_file) as writer:
            input_df.to_excel(writer, sheet_name="Results")

            md = {'version': __version__}
            metadata = pd.DataFrame.from_dict([md])
            metadata.to_excel(writer, sheet_name='Metadata')

        writer.save()

        return out_dict
    else:
        print("None of the samples were able to be analyzed")
        return False