def test_sequence_util(): sequence = 'ACTG' rna_sequence = 'ACUG' transcribed = transcribe(sequence) print(transcribed) reverse_comp = reverse_complement(sequence) print(reverse_comp) reverse_transcribed_seq = reverse_transcribe(rna_sequence) print(reverse_transcribed_seq) assert is_nuc_acid('A') assert not is_nuc_acid('L') assert not is_nuc_acid(None) assert RNA2DNA('U') == 'T' assert DNA2RNA('T') == 'U' try: RNA2DNA('L') except Exception as e: assert 'Input sequence L is not valid nucleic acid sequence' in str(e)
def test_is_nuc_acid(): good_seq = "GCCAat" assert sequence.is_nuc_acid(good_seq) bad_seq = "AAAAXGG" assert not sequence.is_nuc_acid(bad_seq) not_str = 1234 assert not sequence.is_nuc_acid(not_str)
def multiple_sanger_analysis(definition_file, output_dir, data_dir=None, verbose=False, single_line=None, allprops=False): ''' :param definition_file: input excel file that defines sample/control/data associations :param output_dir: output directory :return: ''' input_df = pd.read_excel(definition_file) results = [] fails = [] jobs = [] n = 0 for m, experiment in input_df.iterrows(): label = experiment['Label'] base_outputname = os.path.join(output_dir, '%s-%s' % (n, label)) control_sequence_file = experiment['Control File'] edit_sequence_file = experiment['Experiment File'] guide = experiment['Guide Sequence'] if 'Donor' in experiment and is_nuc_acid(experiment['Donor']): donor = experiment['Donor'] else: donor = None print(donor) try: if pd.isnull(control_sequence_file): raise IOError( "Control filepath not specified at line {} in definition file" .format(n + 1)) if pd.isnull(edit_sequence_file): raise IOError( "Edit filepath not specified at line {} in definition file" .format(n + 1)) control_sequence_path = os.path.join(data_dir, control_sequence_file) edit_sequence_path = os.path.join(data_dir, edit_sequence_file) if single_line is not None: if n != single_line: continue msg = "analyzing" print("-" * 50, msg, n, experiment['Label'], guide) job_args = (control_sequence_path, edit_sequence_path, base_outputname, guide) job_kwargs = { 'verbose': verbose, 'allprops': allprops, 'donor': donor } result = single_sanger_analysis(*job_args, **job_kwargs) jobs.append((experiment, result)) except Exception as e: fails.append(experiment) print("Single Sanger analysis failed", e) import traceback, sys traceback.print_exc(file=sys.stdout) n += 1 for job in jobs: r = job[1] experiment = job[0] if r is not None: tmp = [ experiment['Label'], r['ice'], r['ice_d'], r['rsq'], r['hdr_pct'], r['guides'], r['notes'] ] else: tmp = [experiment['Label'], 'Failed', '', '', '', '', ''] results.append(tmp) if results: input_df = pd.DataFrame(results) timestamp = '{:%Y-%m-%d-%H%M%S}'.format(datetime.datetime.now()) out_file = os.path.join(output_dir, "ice.results.{}.xlsx".format(timestamp)) header = [ "sample_name", "ice", 'ice_d', "r_squared", "hdr_pct", "guides", "notes" ] input_df.columns = header # to json out_dict = [] for r in results: row = {} for idx, c in enumerate(header): row[c] = r[idx] out_dict.append(row) with open(out_file.replace('.xlsx', '.json'), 'w') as f: json.dump(out_dict, f, ensure_ascii=False) with pd.ExcelWriter(out_file) as writer: input_df.to_excel(writer, sheet_name="Results") md = {'version': __version__} metadata = pd.DataFrame.from_dict([md]) metadata.to_excel(writer, sheet_name='Metadata') writer.save() return out_dict else: print("None of the samples were able to be analyzed") return False