def main(): dataset = 'CB1' neuropil = 'fake' filenames = util.get_filenames(dataset, neuropil) calculate_distance.save_distances( filenames['h5'], filenames['distance_npy']) D = np.load(filenames['distance_npy']) k=3 clusters, curr_medoids = kMedoids.cluster(D,k=k) cluster_type = 'K%02d_dicedist'%(k,) filenames = util.get_filenames(dataset, neuropil, cluster_type=cluster_type) nrrd_fname = filenames['clustering_result_nrrd'] save_to_nrrd( filenames['h5'], clusters, nrrd_fname )
def main(): # get command line arguments parser = argparse.ArgumentParser() parser.add_argument("-p", "--probability", type=float, default=TRAIN_PROP) args = parser.parse_args() # parse the file and sample files = get_filenames() data_parser = parse(file(files.dataset)) training_set, testing_set = sample(data_parser, args.probability) # write to file as json with file(files.train, 'w') as train_file: json.dump(training_set, train_file) print 'Dumped training set to %s,' % files.train, print 'size', len(training_set) with file(files.test, 'w') as test_file: json.dump(testing_set, test_file) print 'Dumped test set to %s,' % files.test, print 'size', len(testing_set) # check the file is written correctly with file(files.train, 'r') as train_file: assert json.load(train_file) == training_set print 'Training file check OK.' with file(files.test, 'r') as test_file: assert json.load(test_file) == testing_set print 'Test file check OK.'
def _fill_missing_args(args): (width, height) = determine_shape(args, args.projections, store=False) args.center_position_x = (args.center_position_x or [width / 2.]) args.center_position_z = (args.center_position_z or [height / 2.]) if not args.overall_angle: args.overall_angle = 360. LOG.info('Overall angle not specified, using 360 deg') if not args.number: if len(args.axis_angle_z) > 1: LOG.debug("--number not specified, using length of --axis-angle-z: %d", len(args.axis_angle_z)) args.number = len(args.axis_angle_z) else: num_files = len(get_filenames(args.projections)) if not num_files: raise RuntimeError("No files found in `{}'".format(args.projections)) LOG.debug("--number not specified, using number of files matching " "--projections pattern: %d", num_files) args.number = num_files if args.dry_run: if not args.number: raise ValueError('--number must be specified by --dry-run') determine_shape(args, args.projections, store=True) LOG.info('Dummy data W x H x N: {} x {} x {}'.format(args.width, args.height, args.number)) return args
def main(): dataset_functions = { 'bp': process_bp, 'gn1': process_gn1, 'other': process_other } ap = argparse.ArgumentParser() ap.add_argument('INPUT', help='Input directory (html files)') ap.add_argument('OUTPUT', help='Output directory') ap.add_argument('DATASET', choices=dataset_functions.keys(), help='Dataset type') ap.add_argument('--prefix', help='Add a prefix to the file names.') args = ap.parse_args() os.makedirs(args.OUTPUT, exist_ok=True) for f in tqdm(util.get_filenames(args.INPUT, '.html')): try: with open(f, 'rb') as hfile: doc = BeautifulSoup(hfile, features='html5lib') # for some reason, parsing malformed HTML twice works better doc2 = BeautifulSoup(doc.prettify(), features='html5lib') process(doc2, dataset_functions[args.DATASET]) f_name = os.path.basename(f) if args.prefix: f_name = args.prefix + f_name with open(os.path.join(args.OUTPUT, f_name), 'w', encoding='utf-8') as hfile: hfile.write(doc2.prettify()) except: tqdm.write('error processing {}'.format(f))
def _fill_missing_args(args): (width, height) = determine_shape(args, args.projections, store=False) args.center_position_x = (args.center_position_x or [width / 2.]) args.center_position_z = (args.center_position_z or [height / 2.]) if not args.overall_angle: args.overall_angle = 360. LOG.info('Overall angle not specified, using 360 deg') if not args.number: if len(args.axis_angle_z) > 1: LOG.debug( "--number not specified, using length of --axis-angle-z: %d", len(args.axis_angle_z)) args.number = len(args.axis_angle_z) else: num_files = len(get_filenames(args.projections)) if not num_files: raise RuntimeError("No files found in `{}'".format( args.projections)) LOG.debug( "--number not specified, using number of files matching " "--projections pattern: %d", num_files) args.number = num_files if args.dry_run: if not args.number: raise ValueError('--number must be specified by --dry-run') determine_shape(args, args.projections, store=True) LOG.info('Dummy data W x H x N: {} x {} x {}'.format( args.width, args.height, args.number)) return args
def main(): #src_path = sys.argv[1] # print('src_path ', src_path) # filenames = get_filenames(src_path) parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", type=int, choices=range(100), help="index for single input image") parser.add_argument("-t", "--tmin", type=float, default=0.2, help="minimum transmission rate") parser.add_argument("-A", "--Amax", type=int, default=220, help="maximum atmosphere light") parser.add_argument("-w", "--window", type=int, default=15, help="window size of dark channel") parser.add_argument("-r", "--radius", type=int, default=40, help="radius of guided filter") parser.add_argument("-p", "--path", type=str, default=1, help="path of dataset") args = parser.parse_args() src_path = 'img/' + str(args.path) print('src_path ', src_path) filenames = get_filenames(src_path) '''if args.input is not None: src, dest = filenames[args.input] dest = dest.replace("%s", "%s-%d-%d-%d-%d" % ("%s", args.tmin * 100, args.Amax, args.window, args.radius)) generate_results(src, dest, partial(dehaze, tmin=args.tmin, Amax=args.Amax, w=args.window, r=args.radius)) else:''' if len(src_path) > 0: # dummy case to make default indentation work '''for idx in SP_IDX: src, dest = filenames[idx] for param in SP_PARAMS: newdest = dest.replace("%s", "%s-%d-%d-%d-%d" % ("%s", param['tmin'] * 100, param['Amax'], param['w'], param['r'])) generate_results(src, newdest, partial(dehaze, **param))''' for src, dest in filenames: generate_results(src, dest, dehaze)
def create_data(): names_set = find_names() site = {} for file in glob.glob("../data/documents/*.txt"): name = file.split('.txt')[0].split('/')[-1] if name in names_set: site[name] = create_link(name) print(name) files = {} for fn in util.get_filenames('../data/documents'): with open(fn) as file: text = util.cut_away_sources(file.read()) files[fn.name.split('.txt')[0]] = text occ = collections.defaultdict(list) # search for occurences of people in all documents for name in names_set: for page_name, text in files.items(): if name in text: occ[name].append(create_link(page_name)) print(occ) merged = {} print(site) print(site.keys()) # # take filenames as special interest and search in all files # remaining_files = [f for f in files.keys() if f not in occ.keys()] # print(remaining_files) # for name in remaining_files: # for page_name, text in files.items(): # if name in text: # occ[name].append(create_link(page_name)) # create final data object for name, pages in occ.items(): if name in site.keys(): # remove profile from other pages if site[name] in pages: pages.remove(site[name]) merged[name] = {'pages': pages, 'profile': site[name]} else: merged[name] = {'pages': pages} with open('../extension/data.js', 'w') as outfile: json.dump(merged, outfile, ensure_ascii=False) with open('../extension/data.js', 'r+') as file: text_json = file.read() file.seek(0) file.write('var data = ' + text_json + ';') file.truncate()
def get_gaba_data_dict(filelist): """Returns a mapping from GABA conductance to TraubData objects""" ret = defaultdict(list) for fname in get_filenames(filelist): print fname, makepath(fname) data = TraubData(makepath(fname)) gaba = dict(data.fdata['/runconfig/GABA']) ret[float(gaba['conductance_scale'])].append(data) return ret
def main(): filenames = get_filenames() parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", type=int, choices=range(len(filenames)), help="index for single input image") parser.add_argument("-t", "--tmin", type=float, default=0.2, help="minimum transmission rate") parser.add_argument("-A", "--Amax", type=int, default=220, help="maximum atmosphere light") parser.add_argument("-w", "--window", type=int, default=15, help="window size of dark channel") parser.add_argument("-r", "--radius", type=int, default=40, help="radius of guided filter") args = parser.parse_args() if args.input is not None: src, dest = filenames[args.input] dest = dest.replace( "%s", "%s-%d-%d-%d-%d" % ("%s", args.tmin * 100, args.Amax, args.window, args.radius)) generate_results( src, dest, partial(dehaze, tmin=args.tmin, Amax=args.Amax, w=args.window, r=args.radius)) else: for idx in SP_IDX: src, dest = filenames[idx] for param in SP_PARAMS: newdest = dest.replace( "%s", "%s-%d-%d-%d-%d" % ("%s", param['tmin'] * 100, param['Amax'], param['w'], param['r'])) generate_results(src, newdest, partial(dehaze, **param)) for src, dest in filenames: generate_results(src, dest, dehaze)
def dump_ss_fraction_peaks(flistfilename, trange=(2, 20), cutoff=0.2, binsize=5e-3, lookahead=10): """Plot the peaks in fraction of spiny stellate cells over multiple simulations.""" #data_dict = get_gaba_data_dict(flistfilename) peak_frac_med = defaultdict(list) peak_frac_mean = defaultdict(list) iqr_dict = defaultdict(list) with open( 'gaba_scale_ss_frac_cutoff_{}_binwidth_{}ms_lookahead_{}.csv'. format(cutoff, binsize * 1000, lookahead), 'wb') as fd: writer = csv.writer(fd, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow( ('filename', 'gabascale', 'frac_mean', 'frac_med', 'frac_iqr')) for fname in get_filenames(flistfilename): data = TraubData(makepath(fname)) gaba = dict(data.fdata['/runconfig/GABA']) scale = gaba['conductance_scale'] print fname, gaba hist, bins = data.get_spiking_cell_hist('SpinyStellate', timerange=trange, binsize=binsize, frac=True) peaks, troughs = peakdetect(hist, bins[:-1], lookahead=lookahead) if len(peaks) == 0: print 'No peaks for', data.fdata.filename writer.writerow((fname, scale, '', '', '')) continue x, y = zip(*peaks) x = np.asarray(x) y = np.asarray(y) idx = np.flatnonzero(y > cutoff) frac_med = '' frac_mean = '' iqr = '' if len(idx) > 0: frac_med = np.median(y[idx]) frac_mean = np.mean(y[idx]) iqr = np.diff(np.percentile(y[idx], [25, 75])) if len(iqr) > 0: iqr = iqr[0] else: iqr = '' peak_frac_med[scale].append(frac_med) peak_frac_mean[scale].append(frac_mean) iqr_dict[scale].append(iqr) writer.writerow((fname, scale, frac_mean, frac_med, iqr)) return peak_frac_mean, peak_frac_med, iqr_dict
def plot_vm_fft_multifile(flist, figfile, trange=(2,20)): """Combine the Vm from cells of each celltype in all simulations and plot FT for each celltype. Since FT is linear, we can just sum the FT of individual simulations to get that of the sum of the Vms """ celltype_ft_dict = {} freq_comp = None for fname in get_filenames(flist): result = population_vm_spectrum(fname, trange) result.pop('TCR') for celltype, (freq, ft) in result.items(): if freq_comp is not None: np.testing.assert_array_equal(freq_comp, freq) else: freq_comp = freq if celltype in celltype_ft_dict: celltype_ft_dict[celltype] += ft else: celltype_ft_dict[celltype] = ft fig = plt.figure() ax = None # major_formatter = plt.FormatStrFormatter('%1.1g') for ii, celltype in enumerate(CELLTYPES): ax = fig.add_subplot(len(celltype_ft_dict), 1, ii+1, sharex=ax) ft = celltype_ft_dict[celltype] idx = (freq_comp > 0) & (freq_comp < 100) ax.plot(freq_comp[idx], ft[idx], color=cellcolor[celltype], label=celltype) # print ii, celltype # ax.legend() # ymax = ax.get_ylim()[1] # p10 = np.log10(ymax) # ymax = np.ceil(ymax / 10**int(p10)) * 10**int(p10) # ax.set_ylim((0, ymax)) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.yaxis.tick_left() ax.xaxis.tick_bottom() # ax.yaxis.set_major_formatter(major_formatter) # ax.set_yticks(ax.get_ylim()) # ax.xaxis.set_visible(False) # plt.setp(ax, frame_on=False) # ax.set_visible(True) # ax.get_xaxis().set_visible(True) ax.set_xlabel('Frequency (Hz)') ax.set_ylabel('power') #ax.set_yticks([-15, 0, 15]) ax.tick_params(axis='x', which='major', labelsize=11) plt.tight_layout() plt.savefig(figfile) plt.show()
def total_length(in_dir): """Calculate the total length of all files(in seconds).""" filenames = [] for ext in SUPPORTED_FORMAT: filenames.extend(util.get_filenames(in_dir, extention=ext)) print(filenames) length = 0 for fname in filenames: fpath = os.path.join(in_dir, fname) mas = MyAudioSegment(fpath) length += mas.length_in_seconds print('Length of {} is {}s'.format(fpath, length)) return length
def semengt_by_step(in_dir, out_dir, time_step, offset=0): """Segment by time steps(in seconds).""" filenames = [] for ext in SUPPORTED_FORMAT: filenames.extend(util.get_filenames(in_dir, extention=ext)) print(filenames) N = 0 for fname in filenames: fpath = os.path.join(in_dir, fname) mas = MyAudioSegment(fpath, verbose=True) n = mas.segment_second_step(out_dir, time_step, offset) N += n return N, len(filenames)
def main(): files = get_filenames() x, y = [], [] # generate the learning curve data data = list(parse(file(files.dataset))) for train_prop in np.arange(0.1, 0.99, 0.05): training_set, testing_set = sample(data, train_prop) tree = build_tree(training_set).prune(MIN_GAIN) check = [record[RESULT_IDX] == plurality(tree.classify(record)) for record in testing_set] counter = Counter(check) precision = counter[True] / float(counter[True] + counter[False]) print 'Training set sampling probability = %.2f:' % (train_prop) print 'training data size = %d,' % (len(training_set)), print 'test data size = %d,' % (len(testing_set)), print 'precision = %.4f' % (precision) x.append(len(training_set)) y.append(precision) # statistics ymean, ystd, ymin, ymax = np.mean(y), np.std(y), np.min(y), np.max(y) print 'Mean of precision = %.4f' % (ymean) print 'Standard deviation of precision = %.4f' % (ystd) print 'Min = %.4f, max = %.4f' % (ymin, ymax) xy = sorted(zip(x, y), key=lambda a: a[0]) x, y = zip(*xy) # setup decorations plt.rc('font', family='serif') plt.yticks(np.arange(0.0, 1.0, 0.1)) plt.ylim(0.0, 1.0) plt.grid(True) plt.title('Learning Curve') plt.xlabel('Training set size') plt.ylabel('Precision on test set') # plot smoothed learning curve xnew = np.linspace(np.min(x), np.max(x), 100) ynew = interp1d(x, y)(xnew) plt.plot(x, y, '.', xnew, ynew, '--') # annotation box = dict(boxstyle='square', fc="w", ec="k") txt = '$\mu = %.4f$, $\sigma = %.4f$' % (ymean, ystd) txt += ', $min = %.4f$, $max = %.4f$' % (ymin, ymax) plt.text(170, 0.05, txt, bbox=box) plt.savefig(files.curve) print 'Save learning curve to', files.curve
def multifile_firing_rate_distribution(flist='unconnected_network.csv', figfilename='Figure_3B.svg', trange=(2,20)): """Plots histograms showing distribution of firing rates among cells of each type collected from multiple simulations. """ start = trange[0] end = trange[1] rates = defaultdict(list) for fname in get_filenames(flist): data = TraubData(makepath(fname)) if data.simtime < end: end = data.simtime for celltype in CELLTYPES: for cell, spiketrain in data.spikes.items(): if cell.startswith(celltype): rate = 1.0 * np.count_nonzero((spiketrain > start) & (spiketrain < end)) / (end - start) rates[celltype].append(rate) bins = np.arange(0, 61.0, 5.0) hists = {} prev = np.zeros(len(bins) - 1) ax = None for ii, celltype in enumerate(CELLTYPES): ctype_rates = rates[celltype] ax = plt.subplot(len(rates), 1, ii+1, sharex=ax, sharey=ax) h, b = np.histogram(ctype_rates, bins=bins) h = np.asarray(h, dtype='float64') / len(ctype_rates) x = bins[:-1] plt.bar(x, h, color=cellcolor[celltype], width=(bins[1]-bins[0])) # bottom = prev, color=cellcolor[celltype], label=celltype) prev += h ax.tick_params(axis='y', right=False, left=False) # plt.setp(ax, frame_on=False) ax.tick_params(axis='x', top=False, bottom=True) ax.spines['bottom'].set_color((0, 0, 0, 0)) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) # ax.xaxis.set_visible(False) ax.xaxis.set_visible(True) ax.set_xticks(bins[::2]) ax.set_yticks([0, 1.0]) ax.tick_params(axis='y', left=True) plt.xlabel('Firing rate (Hz)') plt.tight_layout() plt.savefig(figfilename) plt.show()
def multifile_firing_rate_distribution(flist='unconnected_network.csv', figfilename='Figure_3B.svg', trange=(2, 20)): """Plots histograms showing distribution of firing rates among cells of each type collected from multiple simulations. """ start = trange[0] end = trange[1] rates = defaultdict(list) for fname in get_filenames(flist): data = TraubData(makepath(fname)) if data.simtime < end: end = data.simtime for celltype in CELLTYPES: for cell, spiketrain in data.spikes.items(): if cell.startswith(celltype): rate = 1.0 * np.count_nonzero((spiketrain > start) & ( spiketrain < end)) / (end - start) rates[celltype].append(rate) bins = np.arange(0, 61.0, 5.0) hists = {} prev = np.zeros(len(bins) - 1) ax = None for ii, celltype in enumerate(CELLTYPES): ctype_rates = rates[celltype] ax = plt.subplot(len(rates), 1, ii + 1, sharex=ax, sharey=ax) h, b = np.histogram(ctype_rates, bins=bins) h = np.asarray(h, dtype='float64') / len(ctype_rates) x = bins[:-1] plt.bar(x, h, color=cellcolor[celltype], width=(bins[1] - bins[0])) # bottom = prev, color=cellcolor[celltype], label=celltype) prev += h ax.tick_params(axis='y', right=False, left=False) # plt.setp(ax, frame_on=False) ax.tick_params(axis='x', top=False, bottom=True) ax.spines['bottom'].set_color((0, 0, 0, 0)) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) # ax.xaxis.set_visible(False) ax.xaxis.set_visible(True) ax.set_xticks(bins[::2]) ax.set_yticks([0, 1.0]) ax.tick_params(axis='y', left=True) plt.xlabel('Firing rate (Hz)') plt.tight_layout() plt.savefig(figfilename) plt.show()
def semengt_by_label(in_dir, out_dir, label_dir, offset=0): """Segment audio by time interval(in seconds).""" filenames = [] for ext in SUPPORTED_FORMAT: filenames.extend(util.get_filenames(in_dir, extention=ext)) print(filenames) N = 0 for fname in filenames: fpath = os.path.join(in_dir, fname) mas = MyAudioSegment(fpath) label_filename = mas.name + '.txt' label_path = os.path.join(label_dir, label_filename) if not util.is_exists(label_path): continue n = mas.segment_with_label_file(label_path, out_dir, offset) N += n return N, len(filenames)
def overlay_unconnected_fft(flist, figfile, trange=(2,20)): fig = plt.figure() # major_formatter = plt.FormatStrFormatter('%1.3g') celltype_ax_dict = {} for fname in get_filenames(flist): result = population_vm_spectrum(fname, trange) result.pop('TCR') nax = len(result) for ii, celltype in enumerate(CELLTYPES): ax = celltype_ax_dict.get(celltype, None) if ax is None: ax = fig.add_subplot(nax, 1, ii+1, sharex=ax) celltype_ax_dict[celltype] = ax freq, ft, = result[celltype] idx = (freq > 1) & (freq < 100) ax.plot(freq[idx], ft[idx], # color=cellcolor[celltype], label=celltype, alpha=0.5) plt.show()
def dump_ss_fraction_peaks(flistfilename, trange=(2,20), cutoff=0.2, binsize=5e-3, lookahead=10): """Plot the peaks in fraction of spiny stellate cells over multiple simulations.""" #data_dict = get_gaba_data_dict(flistfilename) peak_frac_med = defaultdict(list) peak_frac_mean = defaultdict(list) iqr_dict = defaultdict(list) with open('gaba_scale_ss_frac_cutoff_{}_binwidth_{}ms_lookahead_{}.csv'.format(cutoff, binsize*1000, lookahead), 'wb') as fd: writer = csv.writer(fd, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) writer.writerow(('filename', 'gabascale', 'frac_mean', 'frac_med', 'frac_iqr')) for fname in get_filenames(flistfilename): data = TraubData(makepath(fname)) gaba = dict(data.fdata['/runconfig/GABA']) scale = gaba['conductance_scale'] print fname, gaba hist, bins = data.get_spiking_cell_hist('SpinyStellate', timerange=trange, binsize=binsize, frac=True) peaks, troughs = peakdetect(hist, bins[:-1], lookahead=lookahead) if len(peaks) == 0: print 'No peaks for', data.fdata.filename writer.writerow((fname, scale, '', '', '')) continue x, y = zip(*peaks) x = np.asarray(x) y = np.asarray(y) idx = np.flatnonzero(y > cutoff) frac_med = '' frac_mean = '' iqr = '' if len(idx) > 0: frac_med = np.median(y[idx]) frac_mean = np.mean(y[idx]) iqr = np.diff(np.percentile(y[idx], [25,75])) if len(iqr) > 0: iqr = iqr[0] else: iqr = '' peak_frac_med[scale].append(frac_med) peak_frac_mean[scale].append(frac_mean) iqr_dict[scale].append(iqr) writer.writerow((fname, scale, frac_mean, frac_med, iqr)) return peak_frac_mean, peak_frac_med, iqr_dict
def plot_frequency_distribution(flist, figfile, trange=(2,20), freqbins=[0, 4, 10, 20, 40, 80,]): """Plot the distribution of various frequency peaks over multiple simulations of the unconnected network. Divide the range of frequencies into freqbins and then plot histogram of FT values in those. """ celltype_freq_dict = defaultdict(dict) for fname in get_filenames(flist): result = population_vm_spectrum(fname, trange) result.pop('TCR') nax = len(result) for celltype, (freq, ft) in result.items(): for ii, start in enumerate(freqbins): ft_list = celltype_freq_dict[celltype].get(start, []) print '$$', ii, freqbins[ii] if ii < len(freqbins) - 1: idx = (freq >= start) & (freq < freqbins[ii+1]) else: idx = freq >= start nonzero = ft[idx][ft[idx] > 0] ft_list += list(nonzero) celltype_freq_dict[celltype][start] = ft_list fig = plt.figure() ax = None axno = 0 for ii, celltype in enumerate(CELLTYPES): for jj, start in enumerate(freqbins): axno += 1 print '##', celltype, start, axno ax = fig.add_subplot(len(celltype_freq_dict), len(freqbins), axno) ax.hist(celltype_freq_dict[celltype][start], log=True) ax.set_xlabel('{}-{}'.format(start, 'inf' if jj == len(freqbins)-1 else freqbins[jj+1])) if jj == 0: ax.set_ylabel(celltype) # plt.tight_layout() plt.savefig(figfile) plt.show()
def main(): filenames = get_filenames() parser = argparse.ArgumentParser( description= 'Underwater Image Restoration by Blue-Green Channels Dehazing and Red Channel Correction' ) parser.add_argument( "-i", "--input", type=int, choices=range(len(filenames)), help="index for single input image: {} corresponds to indexes {}". format(filenames[0][0], list(range(len(filenames))))) parser.add_argument("-w", "--window", type=int, default=15, help="window size of dark channel") args = parser.parse_args() src, dest = filenames[args.input] generate_results(src, dest, partial(adaptiveExp_map, w=args.window))
def main(): files = get_filenames() train_data = json.load(file(files.train)) test_data = json.load(file(files.test)) print 'Before pruning:' tree = build_tree(train_data) check = [record[RESULT_IDX] == plurality(tree.classify(record)) for record in test_data] tree.to_image().save(files.tree) print Counter(check) print 'Saved tree plot to', files.tree print '----------------------' print 'After pruning:' tree.prune(0.1) check = [record[RESULT_IDX] == plurality(tree.classify(record)) for record in test_data] tree.to_image().save(files.pruned_tree) print Counter(check) print 'Saved pruned tree plot to', files.pruned_tree print '----------------------'
def main(): filenames = get_filenames() parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", type=int, choices=range(len(filenames)), help="index for single input image") parser.add_argument("-t", "--tmin", type=float, default=0.2, help="minimum transmission rate") parser.add_argument("-A", "--Amax", type=int, default=220, help="maximum atmosphere light") parser.add_argument("-w", "--window", type=int, default=15, help="window size of dark channel") parser.add_argument("-r", "--radius", type=int, default=40, help="radius of guided filter") args = parser.parse_args() if args.input is not None: src, dest = filenames[args.input] dest = dest.replace("%s", "%s-%d-%d-%d-%d" % ("%s", args.tmin * 100, args.Amax, args.window, args.radius)) generate_results(src, dest, partial(dehaze, tmin=args.tmin, Amax=args.Amax, w=args.window, r=args.radius)) else: for idx in SP_IDX: src, dest = filenames[idx] for param in SP_PARAMS: newdest = dest.replace("%s", "%s-%d-%d-%d-%d" % ("%s", param['tmin'] * 100, param['Amax'], param['w'], param['r'])) generate_results(src, newdest, partial(dehaze, **param)) for src, dest in filenames: generate_results(src, dest, dehaze)
config = get_config(sys.argv[1]) # experiment = Experiment("wXwnV8LZOtVfxqnRxr65Lv7C2") comet_dir_path = os.path.join(config["result_directory"], config["model"]) makedirs(comet_dir_path) experiment = OfflineExperiment( project_name="DeepGenomics", offline_directory=comet_dir_path) experiment.log_parameters(config) if torch.cuda.is_available(): # torch.cuda.set_device(str(os.environ["CUDA_VISIBLE_DEVICES"])) device = torch.device('cuda:{}'.format(os.environ["CUDA_VISIBLE_DEVICES"])) else: device = torch.device('cpu') print(device) number_of_examples = len(get_filenames(os.path.join(config["data"], "x"))) list_ids = [str(i) for i in range(number_of_examples)] random.shuffle(list_ids) t_ind, v_ind = round(number_of_examples * 0.7), round(number_of_examples * 0.9) train_indices, validation_indices, test_indices = list_ids[:t_ind], list_ids[t_ind:v_ind], list_ids[v_ind:] params = {'batch_size': config["training"]["batch_size"], 'shuffle': config["training"]["shuffle"], 'num_workers': config["training"]["num_workers"]} # Build generators training_set = Dataset(config["data"], train_indices) training_generator = data.DataLoader(training_set, **params) validation_set = Dataset(config["data"], validation_indices) validation_generator = data.DataLoader(validation_set, **params)
def find_names(): fns = util.get_filenames('../data/res2') all_names = set() for fn in fns: print(fn.name) with open(f'../data/res2/{fn.name}') as f: text = f.read() lines = text.split('\n') lines = [l.split(' ') for l in lines if ' ' in l] res = [] index = 0 while index < len(lines): tok, tag = lines[index] next_possible = index + 1 < len(lines) # find special cases if tag == 'B-PERparg' and next_possible: tok_n, tag_n = lines[index + 1] if tag_n == 'S-PER': res.append(tok + ' ' + tok_n) index += 1 # skip # find all PER consisting of at least 2 token if tag == 'B-PER' and next_possible: full_word = [tok] end_index = index + 1 while end_index < len(lines): tok_n, tag_n = lines[end_index] if tag_n == 'I-PER': full_word.append(tok_n) end_index += 1 elif tag_n == 'E-PER': full_word.append(tok_n) res.append(' '.join(full_word)) break else: # ignore all other tokens break index += 1 # filter out useless stuff res = [ r.replace('a.D.', '').replace('Katzemich/', 'Katzemich').replace( '/CSU', '').replace('/FDP', '').replace('/Hamburgisches', '').replace( ').Darüber', '').replace(')August', '').replace('AG.', '').replace( '(CDU', '').replace('Prof.', '').replace( ':\"Überraschenderweise', '').replace('/CDU', '').replace('informieren.\"', '').strip() for r in res if r not in blocklist and 'stiftung' not in r.lower() and 'von hayek' not in r.lower() ] res = [r for r in res if not r.startswith('Dr. ')] all_names.update(res) # remove entries that are consumed by other entries final = [] for na in all_names: skip = False for na2 in all_names: if na == na2: continue if na.lower() in na2.lower() and na.lower() + 's' != na2.lower(): skip = True break # remove e.g. 'Helmut Kohls' and keep 'Helmut Kohl' if na.lower() == na2.lower() + 's': skip = True break if not skip: final.append(na) return set(final)
# path_img = BASE_PATH+'/'+paths[0] # image = skimage.io.imread(path_img) # image = image.astype('float')/255 # filter_responses = visual_words.extract_filter_responses(image) # util.display_filter_responses(filter_responses) # dictionary = np.load('../results/train_dictionary.npy') # for path in paths: # image = skimage.io.imread(BASE_PATH+'/'+path) # image = image.astype('float')/255 # wordmap = visual_words.get_visual_words(image,dictionary) # #print(wordmap) # util.save_wordmap(wordmap, os.path.basename(path)) num_cores = multiprocessing.cpu_count() x, y = util.get_filenames(BASE_PATH) X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=10) print(len(X_train)) alpha = 50 # number of pixels per image K = 100 # number of cluster centers visual_words.compute_dictionary(X_train, RESULTS_PATH, K, alpha, 8) files = os.listdir(RESULTS_PATH) filter_responses = [] for file in files: filter_responses.append(np.load(os.path.join(RESULTS_PATH, file))) filter_responses = np.concatenate(filter_responses, axis=0) kmeans = sklearn.cluster.KMeans(n_clusters=K, n_jobs=-1).fit(filter_responses)
curInGameCount += -1 saved_frame[:] = [] last_check = save_count #clear folder clear_directory(path_out + "/temp") if (until != -1 and save_count == until): break frame_number += 1 clear_directory(path_out + "/temp") result_file.close() vidcap.release() print(video_name + " Done!") import util if __name__ == "__main__": full_raw_videos = util.get_filenames(FULL_RAW_PATH) for index, full_raw_video in enumerate(full_raw_videos): video_no_ext = full_raw_video.replace('.mp4', '') print(f"[Start {index+1}/{len(full_raw_videos)}] " + video_no_ext) FindTransitions(FULL_RAW_PATH, full_raw_video, CLASSIFIER_PATH + "inference_result") util.postprocess_timestamp('./inference_result/' + video_no_ext) util.cutVideo(video_no_ext, CLASSIFIER_PATH + 'inference_result/', FULL_RAW_PATH, CLASSIFIER_PATH + 'full_video/') print(f"[End {index+1}/{len(full_raw_videos)}] " + video_no_ext)
def main(): filenames = get_filenames() for src, dest in filenames: generate_results(src, dest, dehaze)
parser.add_option('--mean_file', type='string') parser.add_option('--W', type='string', help='file to store W') parser.add_option('--t', type='int', help='the number of eigenvalues') parser.add_option('--mean_shape', type='string') parser.add_option('--debug', action="store_true", default=False) parser.add_option('--method', type='choice', default='mean_pca', choices=['max', 'mean', 'max_pca', 'mean_pca', 'all']) (options, args) = parser.parse_args() return options p = init() if __name__ == '__main__': caffe.set_mode_gpu() m = Alignment(p.prototxt, p.model, p.layername, p.mean_file, p.W, p.t, p.mean_shape) filenames = get_filenames(p.filelists) if p.method == 'all': shapes_max = [] shapes_max_pca = [] shapes_mean = [] shapes_mean_pca = [] for filename in filenames: print filename img = caffe.io.load_image(p.root + filename) shapes = m.process_all(img) shapes_max.append(shapes[0]) shapes_max_pca.append(shapes[1]) shapes_mean.append(shapes[2]) shapes_mean_pca.append(shapes[3]) dump(np.array(shapes_max), filenames, p.outpath + '_max') dump(np.array(shapes_max_pca), filenames, p.outpath + '_max_pca')
parser.add_option('--mean_shape', type='string') parser.add_option('--debug', action="store_true", default=False) parser.add_option('--method', type='choice', default='mean_pca', choices=['max', 'mean', 'max_pca', 'mean_pca', 'all']) (options, args) = parser.parse_args() return options p = init() if __name__ == '__main__': caffe.set_mode_gpu() m = Alignment(p.prototxt, p.model, p.layername, p.mean_file, p.W, p.t, p.mean_shape) filenames = get_filenames(p.filelists) if p.method == 'all': shapes_max = [] shapes_max_pca = [] shapes_mean = [] shapes_mean_pca = [] for filename in filenames: print filename img = caffe.io.load_image(p.root + filename) shapes = m.process_all(img) shapes_max.append(shapes[0]) shapes_max_pca.append(shapes[1]) shapes_mean.append(shapes[2]) shapes_mean_pca.append(shapes[3]) dump(np.array(shapes_max), filenames, p.outpath + '_max') dump(np.array(shapes_max_pca), filenames, p.outpath + '_max_pca')
import sys sys.path.append("../wrangle") import util from flair.data import Sentence from flair.models import SequenceTagger fs = util.get_filenames() fs = [f for f in fs if f.name == 'Astroturfing.txt'] tagger = SequenceTagger.load('de-ner-germeval') # fs = [f for f in fs if f.name.lower() == 'Bund Katholischer Unternehmer.txt'.lower()] for f in fs: doc = util.preprocess( f, '/home/filter/spacy-data/de_core_news_sm-2.0.0/de_core_news_sm/de_core_news_sm-2.0.0' ) sents = [Sentence(' '.join([str(t) for t in s])) for s in doc.sents] tagger.predict(sents, mini_batch_size=64) res = [] for sentence in sents: for token in sentence: if token.tags['ner'] != '' and token.tags['ner'] != 'O': res.append(token.text + ' ' + token.tags['ner']) print(sents) print(res) with open('../data/res2/' + f.name, 'w') as outfile: outfile.write('\n'.join(res))