def generate_inference_data(args): """ Generates inference data for the region provided through arguments. Parameters ---------- reads_path : path to the aligned reads file ref : reference sequence region : region for which data is required Returns ------- region_name : region name positions : positions corresponding provided region examples : examples corresponding provided region """ reads_path, ref, region = args region_string = f'{region.name}:{region.start + 1}-{region.end}' result = gen.generate_features(reads_path, ref, region_string) positions = [] examples = [] for P, X in zip(*result): positions.append(P) examples.append(X) print(f'>> finished generating examples for {region.name}:{region.start}-{region.end}') return region.name, positions, examples
def generate_train(args): bam_X, bam_Y, ref, region = args alignments = get_aligns(bam_Y, ref_name=region.name, start=region.start, end=region.end) filtered = filter_aligns(alignments) print(f'Finished generating labels for {region.name}:{region.start}-{region.end}.') if not filtered: print('No alignments.') return None positions, examples, labels = [], [], [] for a in filtered: pos_labels = dict() n_pos = set() t_pos, t_labels = get_pos_and_labels(a, ref, region) for p, l in zip(t_pos, t_labels): if l == ENCODED_UNKNOWN: n_pos.add(p) else: pos_labels[p] = l pos_sorted = sorted(list(pos_labels.keys())) region_string = f'{region.name}:{pos_sorted[0][0]+1}-{pos_sorted[-1][0]}' result = gen.generate_features(bam_X, str(ref), region_string) for P, X in zip(*result): Y = [] to_yield = True for p in P: assert is_in_region(p[0], filtered) if p in n_pos: to_yield = False break try: y_label = pos_labels[p] except KeyError: if p[1] != 0: y_label = encoding[GAP] else: raise KeyError(f'No label mapping for position {p}.') Y.append(y_label) if to_yield: positions.append(P) examples.append(X) labels.append(Y) print(f'Finished generating examples for {region.name}:{region.start}-{region.end}.') return region.name, positions, examples, labels
def generate_infer(args): bam_X, ref, region = args region_string = f'{region.name}:{region.start+1}-{region.end}' result = gen.generate_features(bam_X, ref, region_string) positions, examples = [], [] for P, X in zip(*result): positions.append(P) examples.append(X) print(f'Finished generating examples for {region.name}:{region.start}-{region.end}.') return region.name, positions, examples, None
def generate_train_data(args): """ Generates train data for the region provided through arguments. Parameters ---------- reads_path : path to the aligned reads file truth_genome_path : path to the truth genome ref : reference sequence region : region for which data is required Returns ------- region_name : region name positions : positions corresponding provided region examples : examples corresponding provided region labels : labels corresponding provided region """ reads_path, truth_genome_path, ref, region = args aligns = get_aligns(truth_genome_path, region) filtered_aligns = filter_aligns(aligns) print(f'>> finished generating labels for {region.name}:{region.start}-{region.end}') if not filtered_aligns: print(f'>> no alignments') return None positions = [] examples = [] labels = [] for align in filtered_aligns: position_label_dict = dict() positions_with_unknown_base = set() pos, lbls = get_postions_and_labels(align, ref, region) for position, label in zip(pos, lbls): if label == Coder.encode(Coder.UNKNOWN): positions_with_unknown_base.add(position) else: position_label_dict[position] = label sorted_positions = sorted(list(position_label_dict.keys())) region_string = f'{region.name}:{sorted_positions[0][0] + 1}-{sorted_positions[-1][0]}' result = gen.generate_features(reads_path, str(ref), region_string) for P, X in zip(*result): Y = [] to_yield = True for p in P: assert is_in_region(p[0], filtered_aligns) if p in positions_with_unknown_base: to_yield = False break try: y_label = position_label_dict[p] except KeyError: if p[1] != 0: y_label = Coder.encode(Coder.GAP) else: raise KeyError(f'error: No label mapping for position {p}!') Y.append(y_label) if to_yield: positions.append(P) examples.append(X) labels.append(Y) print(f'>> finished generating examples for {region.name}:{region.start}-{region.end}') return region.name, positions, examples, labels
import gmplot from gmplot import GoogleMapPlotter as gmp import gen from sklearn.externals import joblib from collections import Counter from sklearn.preprocessing import minmax_scale clf = joblib.load('classifier.pkl') cameras = ['116.avi', '117.avi', '118.avi'] traffic = [] for c in cameras: count, density = gen.generate_features(c) traffic_stat = clf.predict([count, density]) if traffic_stat == 2: print(c, ' : High traffic') elif traffic_stat == 1: print(c, ' : Medium traffic') else: print(c, ' : Low traffic') traffic.append(traffic_stat) count = [traffic.count(0), traffic.count(1), traffic.count(2)] status = count.index(max(count)) color = ['green', 'yellow', 'red'] gmap = gmp.from_geocode( "Veermata Jijabai Technological Institute, Mumbai, Maharashtra") gmap.scatter([19.018892, 19.024714], [72.855786, 72.856964], color[status],