def result(seq): """Returns True if seq adhers to the gap threshold and gap fraction.""" seq_gaps = array(seq.gapVector()) # check if gap amount bad if sum(seq_gaps != template_gaps) / float(len(seq)) > gap_fraction: return False # check if gap runs bad if "\x01" * gap_run in logical_and(seq_gaps, logical_not(template_gaps)).astype(UInt8).tostring(): return False # check is insertion runs bad elif "\x01" * gap_run in logical_and(template_gaps, logical_not(seq_gaps)).astype(UInt8).tostring(): return False return True
def masked_to_unmasked(mask, remove_mask=False): """Returns array mapping indices in orig to indices in ungapped. Specifically, for each position in orig, returns the index of the position in the unmasked sequence of the last non-masked character at or before that index (i.e. if the index corresponds to a masked position, will return the index of the previous non-masked position since the masked positions aren't in the unmasked sequence by definition). If remove_mask is True (the default is False), sets the masked positions to -1 for easy detection. """ result = cumsum(logical_not(mask)) -1 if remove_mask: result = where(mask, -1, result) return result
def unmasked_to_masked(mask): """Returns array mapping indices in ungapped to indices in original. Any position where the mask is True will be omitted from the final result. """ return compress(logical_not(mask), arange(len(mask)))