def get_disruption_position_in_guide_donor_alignment(PAM, guide_with_full_PAM,
                                                     donor):
    '''
    takes the guide RNA sequence plus full PAM, the PAM, and the donor DNA
    aligs the guide and full PAM to the donor DNA sequence
    returns the best fitting alignment between the guide_with_full_PAM and donor, 
    and the closest position of disruption with 0 denoting PAM disruption, and positive integers denoting distance of SNP/indel from the PAM
    only an indel can have a position of 0, as a snp at position 0 does not disrupt the NGG
    '''
    sense_i, sense_j, sense_backtracking_array, sense_alignment_score = build_backtrack_for_fitting_alignment(
        donor, guide_with_full_PAM)
    rc_donor = bedgraph_computation.rev_comp(donor)
    antisense_i, antisense_j, antisense_backtracking_array, antisense_alignment_score = build_backtrack_for_fitting_alignment(
        rc_donor, guide_with_full_PAM)
    if sense_alignment_score > antisense_alignment_score:
        query, subject, alignment, snp_positions, indel_positions = fitting_alignment(
            sense_i, sense_j, sense_backtracking_array, donor,
            guide_with_full_PAM)
    else:
        query, subject, alignment, snp_positions, indel_positions = fitting_alignment(
            antisense_i, antisense_j, antisense_backtracking_array, rc_donor,
            guide_with_full_PAM)
    disruptions = process_disruption_positions_for_snps_indels(
        snp_positions, indel_positions)
    return disruptions, query, subject, alignment
def check_guide_disruption(log_outfile, individual_variants, variant_type,
                           disruption_position, m0, PAM_chrom, PAM_strand,
                           PAM_coord, guide_seq, donor_shift, donor_start,
                           donor_end, donor_strand, left_side_donor,
                           right_side_donor, ref_seq, variant_seq, donor,
                           genomic_target):
    guide_disrupted = True
    rc_donor = bedgraph_computation.rev_comp(donor)
    for nt in 'ACTG':
        for seq in donor, rc_donor:
            if (guide_seq + nt + 'GG') in seq:
                guide_disrupted = False
    if not guide_disrupted:
        info = 'guide not disrupted', variant_type, individual_variants, '\n', str(
            disruption_position
        ), '\n', guide_seq, '\n', bedgraph_computation.rev_comp(
            guide_seq
        ), '\n', left_side_donor + ' ' + ref_seq + ' ' + right_side_donor, '\n', left_side_donor + ' ' + variant_seq + ' ' + right_side_donor, '\n', donor, '\n', genomic_target, '\n', PAM_chrom, PAM_strand, str(
            PAM_coord), 'donor_shift_' + str(donor_shift), str(
                donor_start), str(donor_end), donor_strand
        log_outfile.write('\t'.join(info) + '\n')
        #raise ValueError
        return False

    guide_in_target = False
    rc_target = bedgraph_computation.rev_comp(genomic_target)
    for nt in 'ACTG':
        for seq in genomic_target, rc_target:
            if (guide_seq + nt + 'GG') in seq:
                guide_in_target = True

    if not guide_in_target:
        info = 'guide not in target', variant_type, individual_variants, '\n', str(
            disruption_position
        ), '\n', guide_seq, '\n', bedgraph_computation.rev_comp(
            guide_seq
        ), '\n', left_side_donor + ' ' + ref_seq + ' ' + right_side_donor, '\n', left_side_donor + ' ' + variant_seq + ' ' + right_side_donor, '\n', donor, '\n', genomic_target, '\n', PAM_chrom, PAM_strand, str(
            PAM_coord), 'donor_shift_' + str(donor_shift), str(
                donor_start), str(donor_end), donor_strand
        log_outfile.write('\t'.join(info) + '\n')
        return False

    return True
Пример #3
0
def assemble_mutated_donor_sequence(ORF_strand, left_donor, right_donor,
                                    temp_ORF_seq, aa_num,
                                    synonymous_codons_to_mutate, upstream):
    aa_idx = aa_num - 1
    if upstream:
        mutated_region = ''.join(
            temp_ORF_seq[aa_idx - synonymous_codons_to_mutate:aa_idx + 1])
    else:
        mutated_region = ''.join(temp_ORF_seq[aa_idx:aa_idx +
                                              synonymous_codons_to_mutate + 1])
    if ORF_strand == '-':
        mutated_region = bedgraph_computation.rev_comp(mutated_region)
    if len(mutated_region) % 3 != 0:
        print(temp_ORF_seq[aa_idx - synonymous_codons_to_mutate:aa_idx + 1])
    query_donor = left_donor + mutated_region.lower() + right_donor
    return query_donor
def global_alignment(v, w, PAM):
    '''
    takes two sequences v, and w, and best fit alignment of all of w against any part of v
    for the best fit alignment, returns the positions of the disruptions, the two sequences, and the alignment
    '''
    v = v.upper()
    w = w.upper()
    sense_i, sense_j, sense_backtracking_array, sense_alignment_score = build_backtrack_for_fitting_alignment(
        v, w)
    rc_v = bedgraph_computation.rev_comp(v)
    antisense_i, antisense_j, antisense_backtracking_array, antisense_alignment_score = build_backtrack_for_fitting_alignment(
        rc_v, w)
    if sense_alignment_score > antisense_alignment_score:
        query, subject, alignment, snp_positions, indel_positions = fitting_alignment(
            sense_i, sense_j, sense_backtracking_array, v, w)
    else:
        query, subject, alignment, snp_positions, indel_positions = fitting_alignment(
            antisense_i, antisense_j, antisense_backtracking_array, rc_v, w)
    disruptions = process_disruption_positions_for_snps_indels(
        PAM, snp_positions, indel_positions)
    return disruptions, query, subject, alignment
def assemble_mutated_donor_sequence(ORF_strand,
                                    left_donor,
                                    right_donor,
                                    ORF_seq,
                                    aa_range,
                                    mutated_region_lowercase=False):
    '''
    takes the homologous arms and mutated middle sequence,
    returns a complete donor with mutated sequence in lower case
    left donor and right donor are GENOMIC sequence, and thus may contain UTR and intron regions
    temp_ORF_seq is a tuple of codons, from initiating methionine to stop codon, and must be joined prior to assembly
    '''
    N_terminal_aa_num, C_terminal_aa_num = aa_range
    mutated_region = ''.join(ORF_seq[N_terminal_aa_num - 1:C_terminal_aa_num])
    if ORF_strand == '-':
        mutated_region = bedgraph_computation.rev_comp(mutated_region)
    if len(mutated_region) % 3 != 0:
        print(aa_range, ORF_seq[N_terminal_aa_num - 1:C_terminal_aa_num],
              'is not a multiple of three')
    if mutated_region_lowercase:
        mutated_region = mutated_region.lower()
    query_donor = left_donor + mutated_region + right_donor
    return query_donor
Пример #6
0
def get_proximal_sense_and_antisense_PAMs(genome_seq, ORF_chrom, ORF_strand,
                                          ORF_exon_coords, coord, upstream):
    '''
    takes a chromosome, strand, and coordinate
    
    for minus strand, coord is last coord of codon, for plus strand it is the first coord of codon    
    finds 5 PAMs for each strand
    returns the PAM_coordinate, guide, and guide with full PAM sequence (outlaws guides with excessive T stretch as defined by NUM_CONSECUTIVE_T_DISALLOWED )
    '''
    if (ORF_strand == '+' and upstream) or (ORF_strand == '-'
                                            and not upstream):
        plus_strand_coord = coord + GUIDE_LENGTH
        minus_strand_coord = coord + GUIDE_LENGTH
    else:
        plus_strand_coord = coord - GUIDE_LENGTH
        minus_strand_coord = coord - GUIDE_LENGTH

    plus_strand_PAMs = []
    shift = -GUIDE_LENGTH
    while len(plus_strand_PAMs
              ) < 10 and shift < DONOR_LENGTH - MINIMUM_HOMOLOGY * 2:
        query_seq = genome_seq[ORF_chrom][plus_strand_coord:plus_strand_coord +
                                          PAM_length]
        plus_strand_PAM_guide_seq = genome_seq[ORF_chrom][
            plus_strand_coord - GUIDE_LENGTH:plus_strand_coord]
        plus_strand_PAM_guide_seq_with_PAM = genome_seq[ORF_chrom][
            plus_strand_coord - GUIDE_LENGTH:plus_strand_coord + PAM_length]
        if bedgraph_computation.hamming_distance(
                query_seq, PAM
        ) == 0 and NUM_CONSECUTIVE_T_DISALLOWED * 'T' not in plus_strand_PAM_guide_seq:
            plus_strand_PAMs.append(
                ('+', query_seq, plus_strand_coord, plus_strand_PAM_guide_seq,
                 plus_strand_PAM_guide_seq_with_PAM))
        if ORF_strand == '+':
            if upstream:
                plus_strand_coord -= 1
            else:
                plus_strand_coord += 1
        else:
            if upstream:
                plus_strand_coord += 1
            else:
                plus_strand_coord -= 1
        shift += 1
    shift = -GUIDE_LENGTH
    minus_strand_PAMs = []

    while len(minus_strand_PAMs
              ) < 10 and shift < DONOR_LENGTH - MINIMUM_HOMOLOGY * 2:
        query_seq = bedgraph_computation.rev_comp(
            genome_seq[ORF_chrom][minus_strand_coord:minus_strand_coord +
                                  PAM_length])
        minus_strand_PAM_guide_seq = bedgraph_computation.rev_comp(
            genome_seq[ORF_chrom][minus_strand_coord +
                                  PAM_length:minus_strand_coord + PAM_length +
                                  GUIDE_LENGTH])
        minus_strand_PAM_guide_seq_with_PAM = bedgraph_computation.rev_comp(
            genome_seq[ORF_chrom][minus_strand_coord:minus_strand_coord +
                                  PAM_length + GUIDE_LENGTH])
        if bedgraph_computation.hamming_distance(
                query_seq, PAM
        ) == 0 and NUM_CONSECUTIVE_T_DISALLOWED * 'T' not in minus_strand_PAM_guide_seq:
            minus_strand_PAMs.append(('-', query_seq, minus_strand_coord,
                                      minus_strand_PAM_guide_seq,
                                      minus_strand_PAM_guide_seq_with_PAM))
        if ORF_strand == '+':
            if upstream:
                minus_strand_coord -= 1
            else:
                minus_strand_coord += 1
        else:
            if upstream:
                minus_strand_coord += 1
            else:
                minus_strand_coord -= 1
        shift += 1
    return plus_strand_PAMs + minus_strand_PAMs
Пример #7
0
def mutate_synonymous_codons(ORF_chrom, ORF_strand,
                             synonymous_codons_to_mutate, aa_num, aa_to_codon,
                             suboptimal_removed_aa_to_codon, ORF_info, ORF_seq,
                             upstream):
    '''
    takes an aa_num and the number of codons to mutate upstream or downstream (designated by upstream = False)
    returns plus_strand_coord start pos of change, ref, alt alleles, and the name of ORF and the synonymous changes
    does not mutate the actual target amino acid

    ## also checks synonymous donor for inadvertent introduction of BspQI sites, and changes to other synonymous codons if possible
    
    ORF_info[aa_num] = ( codon, aa_coords, aa )
    returns a list of donors and donor_infos
    
    chrVII	18036	   ACG...	GAC...	YDR025C_(C5C,G6G,H7H,I8I)_[CCG5CCA,...]_{Y,ACG,4,F,GAC})_variant_subpool_1
    '''
    aa_idx = aa_num - 1
    if ORF_strand == '+':
        if upstream:
            ref_allele_start_coord = ORF_info[
                aa_num - synonymous_codons_to_mutate][1][0]
        else:
            ref_allele_start_coord = ORF_info[aa_num][1][0]
    else:
        if upstream:
            ref_allele_start_coord = ORF_info[aa_num][1][2]
        else:
            ref_allele_start_coord = ORF_info[
                aa_num + synonymous_codons_to_mutate][1][2]
    mutated_region_length = 3 + synonymous_codons_to_mutate * 3  ## includes the amino acid to be mutated
    homologous_arm_length = DONOR_LENGTH - mutated_region_length
    left_donor_length = homologous_arm_length // 2
    right_donor_length = homologous_arm_length - left_donor_length
    left_donor = genome_seq[ORF_chrom][
        ref_allele_start_coord - left_donor_length:ref_allele_start_coord]
    right_donor = genome_seq[ORF_chrom][
        ref_allele_start_coord + mutated_region_length:ref_allele_start_coord +
        mutated_region_length + right_donor_length]
    extended_left_donor = genome_seq[ORF_chrom][ref_allele_start_coord -
                                                left_donor_length -
                                                200:ref_allele_start_coord]
    extended_right_donor = genome_seq[ORF_chrom][
        ref_allele_start_coord + mutated_region_length:ref_allele_start_coord +
        mutated_region_length + right_donor_length + 200]
    temp_ORF_seq = ORF_seq[:]
    for idx in range(1, synonymous_codons_to_mutate + 1):
        if upstream:
            current_aa_num = aa_num - idx
            current_aa_idx = current_aa_num - 1
        else:
            current_aa_num = aa_num + idx
            current_aa_idx = current_aa_num - 1
        codon, aa_coords, aa = ORF_info[current_aa_num]
        synonymous_codon_with_largest_hamming_dist = codon
        largest_hamming_dist_for_this_aa = 0
        for other_codon in suboptimal_removed_aa_to_codon[aa]:
            temp_ORF_seq[current_aa_idx] = other_codon
            hamming_distance_for_this_codon = bedgraph_computation.hamming_distance(
                other_codon, codon)
            query_donor = assemble_mutated_donor_sequence(
                ORF_strand, left_donor, right_donor, temp_ORF_seq, aa_num,
                synonymous_codons_to_mutate, upstream)
            ## checking for internal restriction sites with synonymoous changes
            if hamming_distance_for_this_codon > largest_hamming_dist_for_this_aa and INTERNAL_RESTRICTION_SITE not in query_donor and rc_INTERNAL_RESTRICTION_SITE not in query_donor:
                temp_ORF_seq_check_all_target_codons = temp_ORF_seq[:]
                all_target_codons_clear_of_BspQI_introduction = True
                for target_codon in codon_to_aa:
                    temp_ORF_seq_check_all_target_codons[aa_idx] = target_codon
                    check_all_target_codons_query_donor = assemble_mutated_donor_sequence(
                        ORF_strand,
                        left_donor[-(INTERNAL_RESTRICTION_SITE_LENGTH - 4):],
                        right_donor[:INTERNAL_RESTRICTION_SITE_LENGTH - 4],
                        temp_ORF_seq_check_all_target_codons, aa_num,
                        synonymous_codons_to_mutate, upstream)
                    ## if possible, remove synonymous codon changes that would generate a restriction site with the target codon and proximal upstream/downstream sequence
                    if INTERNAL_RESTRICTION_SITE in check_all_target_codons_query_donor or rc_INTERNAL_RESTRICTION_SITE in check_all_target_codons_query_donor:
                        all_target_codons_clear_of_BspQI_introduction = False
                if not all_target_codons_clear_of_BspQI_introduction:
                    print('restriction site prevented')
                if all_target_codons_clear_of_BspQI_introduction:
                    largest_hamming_dist_for_this_aa = hamming_distance_for_this_codon
                    synonymous_codon_with_largest_hamming_dist = other_codon
        temp_ORF_seq[
            current_aa_idx] = synonymous_codon_with_largest_hamming_dist
    control_donor = assemble_mutated_donor_sequence(
        ORF_strand, extended_left_donor, extended_right_donor, temp_ORF_seq,
        aa_num, synonymous_codons_to_mutate, upstream)
    if upstream:
        mutated_region = ''.join(
            temp_ORF_seq[aa_idx - synonymous_codons_to_mutate:aa_idx + 1])
    else:
        mutated_region = ''.join(temp_ORF_seq[aa_idx:aa_idx +
                                              synonymous_codons_to_mutate + 1])
    if ORF_strand == '-':
        mutated_region = bedgraph_computation.rev_comp(mutated_region)
    mutated_region_chromosomal_range = ref_allele_start_coord, ref_allele_start_coord + mutated_region_length
    donor_library = generate_donors(left_donor, right_donor, ORF, ORF_strand,
                                    synonymous_codons_to_mutate, aa_num,
                                    codon_to_aa, ORF_seq, temp_ORF_seq,
                                    ORF_info, ORF_chrom,
                                    ref_allele_start_coord, upstream)
    return control_donor, donor_library, mutated_region_chromosomal_range
Пример #8
0
def generate_donors(left_donor, right_donor, ORF, ORF_strand,
                    synonymous_codons_to_mutate, aa_num, codon_to_aa, ORF_seq,
                    temp_ORF_seq, ORF_info, ORF_chrom, ref_allele_start_coord,
                    upstream):
    '''
    generates a vcf-format  donor dictionary with the form: 
    donor_library [ YDR025C_Y4F_variant_ACG4GAC_C5C,G6G,H7H,I8I_CCG5CCA,... ] =  codon_pool, chrVII,	18036,   ACG..., 	GAC...	
    donor_library [ YDR025C_Y4Y_control_ACG4ACG_C5C,G6G,H7H,I8I_CCG5CCA,... ] =  codon_pool, chrVII,	18036,   ACG...,	ACG...	
    '''
    aa_idx = aa_num - 1
    donor_library = {}
    synonymous_aa = []
    synonymous_codons = []
    WT_target_codon, WT_target_aa_coords, WT_target_aa = ORF_info[aa_num]
    if upstream:
        for upstream_aa_num in range(aa_num - synonymous_codons_to_mutate,
                                     aa_num):
            upstream_aa_idx = upstream_aa_num - 1
            codon, aa_coords, aa = ORF_info[upstream_aa_num]
            synonymous_codon = temp_ORF_seq[upstream_aa_idx]
            synonymous_codons.append(codon + str(upstream_aa_num) +
                                     synonymous_codon)
            synonymous_aa.append(aa + str(upstream_aa_num) + aa)
    else:
        for downstream_aa_num in range(
                aa_num + 1, aa_num + synonymous_codons_to_mutate + 1):
            downstream_aa_idx = downstream_aa_num - 1
            codon, aa_coords, aa = ORF_info[downstream_aa_num]
            synonymous_codon = temp_ORF_seq[downstream_aa_idx]
            synonymous_codons.append(codon + str(downstream_aa_num) +
                                     synonymous_codon)
            synonymous_aa.append(aa + str(downstream_aa_num) + aa)
    synonymous_codons_name = ','.join(synonymous_codons)
    synonymous_aa_name = ','.join(synonymous_aa)
    ## generate donor and donor names
    for mutant_target_codon in codon_to_aa:
        temp_ORF_seq_for_target_codon = temp_ORF_seq[:]
        temp_ORF_seq_for_target_codon[aa_idx] = mutant_target_codon
        if upstream:
            mutated_region = ''.join(temp_ORF_seq_for_target_codon[
                aa_idx - synonymous_codons_to_mutate:aa_idx + 1])
            WT_region = ''.join(
                ORF_seq[aa_idx - synonymous_codons_to_mutate:aa_idx + 1])
        else:
            mutated_region = ''.join(
                temp_ORF_seq_for_target_codon[aa_idx:aa_idx +
                                              synonymous_codons_to_mutate + 1])
            WT_region = ''.join(ORF_seq[aa_idx:aa_idx +
                                        synonymous_codons_to_mutate + 1])
        if ORF_strand == '-':
            mutated_region = bedgraph_computation.rev_comp(mutated_region)
            WT_region = bedgraph_computation.rev_comp(WT_region)
        if mutant_target_codon == WT_target_codon:
            donor_type = 'control'
        else:
            donor_type = 'variant'
        aa_change = WT_target_aa + str(
            aa_num) + codon_to_aa[mutant_target_codon]
        codon_change = WT_target_codon + str(aa_num) + mutant_target_codon
        if upstream:
            spreading_direction = 'upstream_synonymous_changes:'
        else:
            spreading_direction = 'downstream_synonymous_changes:'
        donor_name = '_'.join([
            ORF, aa_change, donor_type, codon_change, spreading_direction,
            synonymous_aa_name, synonymous_codons_name
        ])
        full_donor_seq = assemble_mutated_donor_sequence(
            ORF_strand, left_donor, right_donor, temp_ORF_seq_for_target_codon,
            aa_num, synonymous_codons_to_mutate, upstream)
        vcf_fields = (ORF_chrom, ref_allele_start_coord, WT_region,
                      mutated_region, donor_name)
        if donor_type == 'variant':
            if mutant_target_codon in highest_frequency_codons:
                codon_pool = 0
            else:
                codon_pool = 1
            donor_library[donor_name] = codon_pool, full_donor_seq, vcf_fields
        else:
            donor_library[donor_name] = 0, full_donor_seq, vcf_fields
            donor_library[donor_name] = 1, full_donor_seq, vcf_fields
    return donor_library
Пример #9
0
REF_DIR = '/Users/kevinroy/Dropbox/'
DIR = '/Users/kevinroy/Dropbox/steinmetz_lab/CRISPR_saturation_genome_editing/'
STOP_CODONS_INCLUDED = True
MUTATE_INITIATING_METHIONINE = False
MUTATE_STOP_CODON = True
FOLD_OVERREPRESENTATION_OF_PSEUDO_WT_CONTROLS = 10

OLIGO_LENGTH = 210
GUIDE_LENGTH = 20
SUBPOOL_WINDOW_SIZE = 110

NUM_CONSECUTIVE_T_DISALLOWED = 6
MIN_GUIDE_DONOR_DISRUPTION_SCORE = 6
PAM = 'NGG'
rc_PAM = bedgraph_computation.rev_comp(PAM)
PAM_length = len(PAM)

END_RESTRICTION_SITES = 'GGCGCGCC', 'GCGGCCGC'
INTERNAL_RESTRICTION_SITE = 'GCTCTTC'  ## , 'GGTCTC' BsaI site in AmpR ORF would need to be removedr
rc_INTERNAL_RESTRICTION_SITE = bedgraph_computation.rev_comp(
    INTERNAL_RESTRICTION_SITE)
INTERNAL_RESTRICTION_SITE_LENGTH = len(INTERNAL_RESTRICTION_SITE)
MINIMUM_HOMOLOGY = 20

## fwd priming sequence: GGACTTTggcgcgcc
FWD_PRIMING_SEQUENCE = 'GGACTTTggcgcgcc'.upper()
## internal cloning site will be BspQI_cloning_site = 'GTTTGAAGAGC', a backup would be 'GGTCTC'  GTTTAgagacc for regions that contain the BspQI site
INTERNAL_CLONING_SITE = 'gtttgaagagc'.upper(
)  ## 'GTTTGAAGAGC'  ##  'gtttGAAGAGCGCTCTTCacga'
## a future alternative would be the BsaI site: GGTCTC, which would give the following internal cloning site: gtttaGAGACC  (BsaI cuts 1 and 5 nt away from its recognition site)
Пример #10
0
                     sequence_type, region_type, gene_id, gene_name
                 ]
                 feature_annotation = '_'.join(
                     [str(e) for e in feature_annotation])
                 complete_info = feature_info + seq_info + [
                     pA_annotation
                 ] + [feature_annotation]
                 coord_to_info[pA_coord] = complete_info
             output = [reads] + complete_info
             output = '\t'.join([str(e) for e in output]) + '\n'
             annotated_pA_sites.write(output)
 strand = '-'
 for chrom in minus_strand:
     for coord in sorted(list(minus_strand[chrom])):
         if coord > 51 and coord < (chrom_lengths[chrom] - 51):
             US50 = bedgraph_computation.rev_comp(
                 R64_genome[chrom][coord:coord + 50])
             DS50 = bedgraph_computation.rev_comp(
                 R64_genome[chrom][coord - 50:coord])
             reads = minus_strand[chrom][coord]
             pA_coord = (chrom, coord, strand)
             if pA_coord in coord_to_info:
                 complete_info = coord_to_info[pA_coord]
             else:
                 downstream_19 = bedgraph_computation.rev_comp(
                     R64_genome[chrom][coord - 19:coord])
                 downstream_6 = bedgraph_computation.rev_comp(
                     R64_genome[chrom][coord - 6:coord])
                 A19 = downstream_19.count('A')
                 G19 = downstream_19.count('G')
                 A6 = downstream_6.count('A')
                 G6 = downstream_6.count('G')
def get_sense_and_antisense_PAMs_for_ORF(
        genome_seq,
        ORF_chrom,
        ORF_exon_coords,
        GUIDE_LENGTH,
        PAM_length,
        NUM_CONSECUTIVE_T_DISALLOWED,
        MOST_T_IN_LAST_X_BP,
        azimuth_target_seq_to_BLAST_mismatch_counts,
        max_m0=0,
        max_m1=0,
        max_m2=0,
        BP_FLANKING_ORF=20,
        RESTRICTION_SITES_DISALLOWED=['GGCGCGCC', 'GCGGCCGC', 'GCTCTTC'],
        FWD_RESTRICTION_SITE_FRAGMENT='CGCC'):
    '''
    takes a chromosome, strand, and coordinate
    
    for minus strand, coord is last coord of codon, for plus strand it is the first coord of codon    
    finds 5 PAMs for each strand
    returns the PAM_coordinate, guide, and guide with full PAM sequence (outlaws guides with excessive T stretch as defined by NUM_CONSECUTIVE_T_DISALLOWED )
    '''
    start_coord = ORF_exon_coords[0]
    end_coord = ORF_exon_coords[-1]
    if end_coord < start_coord:
        temp_end_coord = end_coord
        end_coord = start_coord
        start_coord = temp_end_coord
    plus_strand_PAMs = {}
    minus_strand_PAMs = {}
    ## PAM_strand, PAM_seq, PAM_coord, guide_seq, guide_seq_with_PAM
    for coord in range(
            start_coord - GUIDE_LENGTH - PAM_length - BP_FLANKING_ORF,
            end_coord + GUIDE_LENGTH + PAM_length + BP_FLANKING_ORF):
        query_seq = genome_seq[ORF_chrom][coord:coord + PAM_length]
        plus_strand_PAM_guide_seq = genome_seq[ORF_chrom][coord -
                                                          GUIDE_LENGTH:coord]
        plus_strand_PAM_guide_seq_with_PAM = genome_seq[ORF_chrom][
            coord - GUIDE_LENGTH:coord + PAM_length]
        restriction_site_present = False
        for site in RESTRICTION_SITES_DISALLOWED:
            if site in FWD_RESTRICTION_SITE_FRAGMENT + plus_strand_PAM_guide_seq + 'GTTT' or bedgraph_computation.rev_comp(
                    site
            ) in FWD_RESTRICTION_SITE_FRAGMENT + plus_strand_PAM_guide_seq + 'GTTT':
                restriction_site_present = True
        if not restriction_site_present and bedgraph_computation.hamming_distance(
                query_seq, PAM
        ) == 0 and NUM_CONSECUTIVE_T_DISALLOWED * 'T' not in plus_strand_PAM_guide_seq and plus_strand_PAM_guide_seq[
                -MOST_T_IN_LAST_X_BP[1]:].count('T') <= MOST_T_IN_LAST_X_BP[0]:
            plus_strand_azimuth_seq = genome_seq[ORF_chrom][coord -
                                                            GUIDE_LENGTH -
                                                            4:coord +
                                                            PAM_length + 3]
            m0, m1, m2, m3, m4, m5 = azimuth_target_seq_to_BLAST_mismatch_counts[
                plus_strand_azimuth_seq]
            if m0 <= max_m0 and m1 <= max_m1 and m2 <= max_m2:
                plus_strand_PAMs[
                    coord] = '+', query_seq, coord, plus_strand_PAM_guide_seq, plus_strand_PAM_guide_seq_with_PAM

        rc_query_seq = bedgraph_computation.rev_comp(query_seq)
        minus_strand_PAM_guide_seq = bedgraph_computation.rev_comp(
            genome_seq[ORF_chrom][coord + PAM_length:coord + PAM_length +
                                  GUIDE_LENGTH])
        minus_strand_PAM_guide_seq_with_PAM = bedgraph_computation.rev_comp(
            genome_seq[ORF_chrom][coord:coord + PAM_length + GUIDE_LENGTH])
        restriction_site_present = False
        for site in RESTRICTION_SITES_DISALLOWED:
            if site in FWD_RESTRICTION_SITE_FRAGMENT + minus_strand_PAM_guide_seq + 'GTTT' or bedgraph_computation.rev_comp(
                    site
            ) in FWD_RESTRICTION_SITE_FRAGMENT + minus_strand_PAM_guide_seq + 'GTTT':
                restriction_site_present = True
        if not restriction_site_present and bedgraph_computation.hamming_distance(
                rc_query_seq, PAM
        ) == 0 and NUM_CONSECUTIVE_T_DISALLOWED * 'T' not in minus_strand_PAM_guide_seq and minus_strand_PAM_guide_seq[
                -MOST_T_IN_LAST_X_BP[1]:].count('T') <= MOST_T_IN_LAST_X_BP[0]:
            minus_strand_azimuth_seq = bedgraph_computation.rev_comp(
                genome_seq[ORF_chrom][coord - 3:coord + PAM_length +
                                      GUIDE_LENGTH + 4])
            m0, m1, m2, m3, m4, m5 = azimuth_target_seq_to_BLAST_mismatch_counts[
                minus_strand_azimuth_seq]
            if m0 <= max_m0 and m1 <= max_m1 and m2 <= max_m2:
                minus_strand_PAMs[
                    coord] = '-', rc_query_seq, coord, minus_strand_PAM_guide_seq, minus_strand_PAM_guide_seq_with_PAM

    return plus_strand_PAMs, minus_strand_PAMs