def kfold_validation(self, k=10): available_ram = psutil.virtual_memory()[1] available_ram = int(int(available_ram) * .9 * 1e-9) if available_ram > 5: jvm.start(max_heap_size='5g') else: print( 'Seem your machine has less than 5 GB amount of RAM available:\n' ) print('cannot start jvm.') sys.exit() ### print('\nCaricando ' + self.input_file + ' con opts -f' + str(self.features_number) + ' -c' + self.classifier_name + '\n') # load .arff file dataset = arff.load(open(input_file, 'r')) data = np.array(dataset['data']) self.features_names = [x[0] for x in dataset['attributes']] self.attributes_number = data.shape[1] self.dataset_features_number = self.attributes_number - self.levels_number # Factorization of Nominal features_index encoder = CategoricalEncoder(encoding='ordinal') nominal_features_index = [ i for i in range(len(dataset['attributes'][:-self.levels_number])) if dataset['attributes'][i][1] != u'NUMERIC' ] if len(nominal_features_index) > 0: data[:, nominal_features_index] = encoder.fit_transform( data[:, nominal_features_index]) prediction = [] probability = [] oracle = [] print('\n***\nStart testing with ' + str(k) + 'Fold cross-validation -f' + str(self.features_number) + ' -c' + self.classifier_name + '\n***\n') bar = progressbar.ProgressBar(maxval=k, widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage() ]) bar.start() temp_metrics = [] skf = StratifiedKFold(n_splits=k, shuffle=True) bar_cnt = 0 for train_index, test_index in skf.split( data, data[:, self.dataset_features_number + self.tag_under_test]): self.training_set = data[ train_index, :self.dataset_features_number] self.testing_set = data[test_index, :self.dataset_features_number] self.ground_through = data[train_index, self.dataset_features_number + self.tag_under_test] self.oracle = data[test_index, self.dataset_features_number + self.tag_under_test] self.prediction = np.ndarray(shape=[len(test_index), 1], dtype='<U24') self.probability = np.ndarray( shape=[len(test_index), len(set(self.ground_through))], dtype='<U24') classifier_to_call = getattr( self, supported_classifiers[self.classifier_name]) classifier_to_call() prediction.append(self.prediction) probability.append(self.probability) oracle.append(self.oracle) bar_cnt += 1 bar.update(bar_cnt) bar.finish() relations = [] relations = [] relations.append({ # Lv2:Lv1 u'Tor': u'Tor', u'TorPT': u'Tor', u'TorApp': u'Tor', u'I2PApp80BW': u'I2P', u'I2PApp0BW': u'I2P', u'I2PApp': u'I2P', u'JonDonym': u'JonDonym' }) relations.append({ # Lv3:Lv2 u'JonDonym': u'JonDonym', u'I2PSNARK_App80BW': u'I2PApp80BW', u'IRC_App80BW': u'I2PApp80BW', u'Eepsites_App80BW': u'I2PApp80BW', u'I2PSNARK_App0BW': u'I2PApp0BW', u'IRC_App0BW': u'I2PApp0BW', u'Eepsites_App0BW': u'I2PApp0BW', u'I2PSNARK_App': u'I2PApp', u'IRC_App': u'I2PApp', u'Eepsites_App': u'I2PApp', u'ExploratoryTunnels_App': u'I2PApp', u'ParticipatingTunnels_App': u'I2PApp', u'Tor': u'Tor', u'Streaming': u'TorApp', u'Torrent': u'TorApp', u'Browsing': u'TorApp', u'Flashproxy': u'TorPT', u'FTE': u'TorPT', u'Meek': u'TorPT', u'Obfs3': u'TorPT', u'scramblesuit': u'TorPT' }) oracle_inferred = [] prediction_inferred = [] for i in range(self.tag_under_test): oracle_inferred.append(list()) prediction_inferred.append(list()) # Infering superior levels for i in range(k): # Assign of prediction to a dummy to use this one in consecutive label swaps inferred_prediction = prediction[i].copy() inferred_oracle = oracle[i].copy() for j in reversed(range(self.tag_under_test)): inferred_oracle = np.vectorize(relations[j].get)( list(inferred_oracle)) inferred_prediction = np.vectorize(relations[j].get)( list(inferred_prediction)) oracle_inferred[j].append(inferred_oracle) prediction_inferred[j].append(inferred_prediction) print('\n***\nStart testing with incremental gamma threshold\n***\n') bar = progressbar.ProgressBar(maxval=9, widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage() ]) bar.start() oracle_gamma = [] prediction_gamma = [] classified_ratio = [] for i in range(9): gamma = float(i + 1) / 10.0 oracle_gamma.append(list()) prediction_gamma.append(list()) classified_ratio.append(list()) for j in range(k): indexes = [] p_cnt = 0 for p in probability[j]: if max(p) < gamma: indexes.append(p_cnt) p_cnt += 1 gamma_oracle = np.delete(oracle[j], [indexes]) gamma_prediction = np.delete(prediction[j], [indexes]) oracle_gamma[i].append(gamma_oracle) prediction_gamma[i].append(gamma_prediction) classified_ratio[i].append( float(len(gamma_prediction)) / float(len(prediction[j]))) bar.update(i) bar.finish() data_folder = './data_' + self.classifier_name + '/material/' if not os.path.exists('./data_' + self.classifier_name): os.makedirs('./data_' + self.classifier_name) os.makedirs(data_folder) elif not os.path.exists(data_folder): os.makedirs(data_folder) if self.packets_number != 0: file = open( data_folder + 'flat_early_level_' + str(self.level_target) + '_p_' + str(self.packets_number) + '.dat', 'w+') else: file = open( data_folder + 'flat_flow_level_' + str(self.level_target) + '_f_' + str(self.features_number) + '.dat', 'w+') for i in range(k): file.write('@fold\n') for o, p in zip(oracle[i], prediction[i]): file.write(str(o) + ' ' + str(p) + '\n') file.close() for i in range(self.tag_under_test): if self.packets_number != 0: file = open( data_folder + 'flat_early_level_' + str(self.level_target) + '_p_' + str(self.packets_number) + '_inferred_' + str(i + 1) + '.dat', 'w+') else: file = open( data_folder + 'flat_flow_level_' + str(self.level_target) + '_f_' + str(self.features_number) + '_inferred_' + str(i + 1) + '.dat', 'w+') for j in range(k): file.write('@fold\n') for o, p in zip(oracle_inferred[i][j], prediction_inferred[i][j]): file.write(str(o) + ' ' + str(p) + '\n') file.close() for i in range(9): if self.packets_number != 0: file = open( data_folder + 'flat_early_level_' + str(self.level_target) + '_p_' + str(self.packets_number) + '_gamma_' + str(float(i + 1) / 10.0) + '.dat', 'w+') else: file = open( data_folder + 'flat_flow_level_' + str(self.level_target) + '_f_' + str(self.features_number) + '_gamma_' + str(float(i + 1) / 10.0) + '.dat', 'w+') for j in range(k): file.write('@fold_cr\n') file.write(str(classified_ratio[i][j]) + '\n') for o, p in zip(oracle_gamma[i][j], prediction_gamma[i][j]): file.write(str(o) + ' ' + str(p) + '\n') file.close() ### jvm.stop()
def draw_progress_bar(self, value): bar = progressbar.ProgressBar(maxval=value, widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() for i in xrange(value): bar.update(i+1) sleep(0.2) bar.finish()
def retrack_reuse_data_association( h5_filename=None, output_h5_filename=None, kalman_filename=None, start=None, stop=None, less_ram=False, show_progress=False, show_progress_json=False, ): if os.path.exists(output_h5_filename): raise RuntimeError("will not overwrite old file '%s'" % output_h5_filename) ca = core_analysis.get_global_CachingAnalyzer() with ca.kalman_analysis_context(kalman_filename, data2d_fname=h5_filename) as h5_context: R = h5_context.get_reconstructor() if less_ram: ML_estimates_2d_idxs = h5_context.get_pytable_node( "ML_estimates_2d_idxs") else: ML_estimates_2d_idxs = h5_context.load_entire_table( "ML_estimates_2d_idxs") use_obj_ids = h5_context.get_unique_obj_ids() extra = h5_context.get_extra_info() dt = 1.0 / extra["frames_per_second"] dynamic_model_name = extra["dynamic_model_name"] kalman_model = dynamic_models.get_kalman_model(name=dynamic_model_name, dt=dt) kalman_model["max_frames_skipped"] = 2**62 # close to max i64 fps = extra["frames_per_second"] camn2cam_id, cam_id2camns = h5_context.get_caminfo_dicts() parsed = h5_context.read_textlog_header() if "trigger_CS3" not in parsed: parsed["trigger_CS3"] = "unknown" textlog_save_lines = [ "retrack_reuse_data_association running at %s fps, (top %s, trigger_CS3 %s, flydra_version %s)" % ( str(fps), str(parsed.get("top", "unknown")), str(parsed["trigger_CS3"]), flydra_analysis.version.__version__, ), "original file: %s" % (kalman_filename, ), "dynamic model: %s" % (dynamic_model_name, ), "reconstructor file: %s" % (kalman_filename, ), ] with open_file_safe( output_h5_filename, mode="w", title="tracked Flydra data file", delete_on_error=True, ) as output_h5: h5saver = KalmanSaver( output_h5, R, cam_id2camns=cam_id2camns, min_observations_to_save=0, textlog_save_lines=textlog_save_lines, dynamic_model_name=dynamic_model_name, dynamic_model=kalman_model, ) # associate framenumbers with timestamps using 2d .h5 file if less_ram: data2d = h5_context.get_pytable_node("data2d_distorted", from_2d_file=True) h5_framenumbers = data2d.cols.frame[:] else: data2d = h5_context.load_entire_table("data2d_distorted", from_2d_file=True) h5_framenumbers = data2d["frame"] h5_frame_qfi = result_utils.QuickFrameIndexer(h5_framenumbers) if show_progress: string_widget = StringWidget() objs_per_sec_widget = progressbar.FileTransferSpeed( unit="obj_ids ") widgets = [ string_widget, objs_per_sec_widget, progressbar.Percentage(), progressbar.Bar(), progressbar.ETA(), ] pbar = progressbar.ProgressBar( widgets=widgets, maxval=len(use_obj_ids)).start() for obj_id_enum, obj_id in enumerate(use_obj_ids): if show_progress: string_widget.set_string("[obj_id: % 5d]" % obj_id) pbar.update(obj_id_enum) if show_progress_json and obj_id_enum % 100 == 0: rough_percent_done = float(obj_id_enum) / len( use_obj_ids) * 100.0 result_utils.do_json_progress(rough_percent_done) tro = None first_frame_per_obj = True obj_3d_rows = h5_context.load_dynamics_free_MLE_position( obj_id) for this_3d_row in obj_3d_rows: # iterate over each sample in the current camera framenumber = this_3d_row["frame"] if start is not None: if not framenumber >= start: continue if stop is not None: if not framenumber <= stop: continue h5_2d_row_idxs = h5_frame_qfi.get_frame_idxs(framenumber) if len(h5_2d_row_idxs) == 0: # At the start, there may be 3d data without 2d data. continue # If there was a 3D ML estimate, there must be 2D data. frame2d = data2d[h5_2d_row_idxs] obs_2d_idx = this_3d_row["obs_2d_idx"] kobs_2d_data = ML_estimates_2d_idxs[int(obs_2d_idx)] # Parse VLArray. this_camns = kobs_2d_data[0::2] this_camn_idxs = kobs_2d_data[1::2] # Now, for each camera viewing this object at this # frame, extract images. observation_camns = [] observation_idxs = [] data_dict = {} used_camns_and_idxs = [] cam_ids_and_points2d = [] for camn, frame_pt_idx in zip(this_camns, this_camn_idxs): try: cam_id = camn2cam_id[camn] except KeyError: warnings.warn("camn %d not found" % (camn, )) continue # find 2D point corresponding to object cond = (frame2d["camn"] == camn) & ( frame2d["frame_pt_idx"] == frame_pt_idx) idxs = np.nonzero(cond)[0] if len(idxs) == 0: # no frame for that camera (start or stop of file) continue elif len(idxs) > 1: print( "MEGA WARNING MULTIPLE 2D POINTS\n", camn, frame_pt_idx, "\n\n", ) continue idx = idxs[0] frame2d_row = frame2d[idx] x2d_real = frame2d_row["x"], frame2d_row["y"] pt_undistorted = R.undistort(cam_id, x2d_real) x2d_area = frame2d_row["area"] observation_camns.append(camn) observation_idxs.append(idx) candidate_point_list = [] data_dict[camn] = candidate_point_list used_camns_and_idxs.append((camn, frame_pt_idx, None)) # with no orientation observed_2d = (pt_undistorted[0], pt_undistorted[1], x2d_area) cam_ids_and_points2d.append((cam_id, observed_2d)) if first_frame_per_obj: if len(cam_ids_and_points2d) < 2: warnings.warn( "some 2D data seems to be missing, cannot completely reconstruct" ) else: X3d = R.find3d( cam_ids_and_points2d, return_line_coords=False, simulate_via_tracking_dynamic_model= kalman_model, ) # first frame tro = TrackedObject( R, obj_id, framenumber, X3d, # obs0_position None, # obs0_Lcoords observation_camns, # first_observation_camns observation_idxs, # first_observation_idxs kalman_model=kalman_model, ) del X3d first_frame_per_obj = False else: tro.calculate_a_posteriori_estimate( framenumber, data_dict, camn2cam_id, skip_data_association=True, original_camns_and_idxs=used_camns_and_idxs, original_cam_ids_and_points2d=cam_ids_and_points2d, ) # done with all data for this obj_id if tro is not None: tro.kill() h5saver.save_tro(tro, force_obj_id=obj_id) if show_progress_json: result_utils.do_json_progress(100)
def run(df: pd.DataFrame, expected_imbalance_window: int = 100, num_prev_bars: int = 100, expected_num_ticks: int = 100, expected_num_ticks_min_max: list = [80, 200], run_type: str = 'tick') -> pd.DataFrame: """런 바를 구한다 df: 틱 데이터의 pandas.DataFrame 객체 입력 expected_imbalance_window: 기대 불균형의 최대 윈도우 크기 num_prev_bars: E[T]의 지수가중평균을 구할 때의 window 및 span 크기 expected_num_ticks: expected_num_ticks_min_max: 제한을 두지 않을 경우는 [0, np.inf]로 설정 run_type: tick->틱 불균형바, volume->거래량 불균형바, dollar->달러(원) 불균형바 """ print(f'(*) {run_type} run bar를 생성합니다.') # 바 추출 타입 체크 # assert run_type in ('tick', 'volume', 'dollar'), 'wrong run_type' _run_type = ('tick', 'volume', 'dollar').index(run_type) # 바 추출 이후 초기화 되지 않을 변수들 signs_sell = [] signs_buy = [] list_bars = [] num_ticks_bar = [] tick_num = 0 prv_sign = 0 expected_imbalance_buy = expected_imbalance_sell = None buy_ticks_proportion = [] # 바 추출 이후 초기화될 변수들 price_open = price_close = prv_price = None exp_buy_ticks_proportion = None exp_sell_ticks_proportion = None price_high, price_low = -np.inf, np.inf cum_theta = cum_tick = cum_dollar = cum_volume = cum_theta_buy = cum_theta_sell = buy_tick_num = 0 sample_size = len(df) data = df.values data_len = len(data) # 진행률 체크용 progress 바 생성 bar = progressbar.ProgressBar(maxval=data_len, widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage() ]) bar.start() same_time_idx = 0 prv_date_time = None for d in data: tick_num += 1 bar.update(tick_num) date_time = _date_time = d[0] # 해상도가 초단위이기 때문에 추후 일일 변동성 구할 때 중복된 값이 추출될 수 있음 # 단위 시간 애 중복이 있는 경우 tick 순서대로 microsecond 단위로 유니크한 값을 기록하도록 함 if prv_date_time == _date_time: same_time_idx += 1 date_time += '.{:06d}'.format(same_time_idx) else: same_time_idx = 0 date_time += '.{:06d}'.format(same_time_idx) prv_date_time = _date_time price = d[1] volume = d[2] dollar = price * volume ## make ohlc ## if price_open is None: price_open = price # open if price > price_high: price_high = price # high if price < price_low: price_low = price # low price_close = price # close ## 누적 tick / dollar / volume cum_tick += 1 cum_dollar += dollar cum_volume += volume ############## tick_delta 초기값 설정 ################# if prv_price is not None: tick_delta = price - prv_price # 이전 값이 있는 경우 delta 구함 else: tick_delta = 0 # None인 경우 0으로 세팅 ############## imbalance 계산 #################### if tick_delta != 0: _sign = 0 if tick_delta > 0: _sign = 1 elif tick_delta < 0: _sign = -1 else: _sign = prv_sign if _run_type == 0: # tick type imbalance = _sign elif _run_type == 1: # volume type imbalance = _sign * volume elif _run_type == 2: # dollar type imbalance = _sign * dollar if imbalance > 0: cum_theta_buy += imbalance buy_tick_num += 1 signs_buy.append(imbalance) # 매수 불균형 바 list elif imbalance < 0: _imbalance = abs(imbalance) cum_theta_sell += _imbalance signs_sell.append(_imbalance) # 매도 불균형 바 list prv_price = price prv_sign = _sign ############# 초기 기대 불균형 값 세팅 ################# if expected_imbalance_buy is None or expected_imbalance_sell is None: expected_imbalance_buy = _get_expected_imbalance( signs_buy, expected_num_ticks, expected_imbalance_window, warm_up=True) expected_imbalance_sell = _get_expected_imbalance( signs_sell, expected_num_ticks, expected_imbalance_window, warm_up=True) if expected_imbalance_buy is not None and expected_imbalance_sell is not None: exp_buy_ticks_proportion = buy_tick_num / cum_tick exp_sell_ticks_proportion = (1 - exp_buy_ticks_proportion) if exp_buy_ticks_proportion is None: max_proportion = None else: max_proportion = max( expected_imbalance_buy * exp_buy_ticks_proportion, expected_imbalance_sell * exp_sell_ticks_proportion) max_theta = max(cum_theta_buy, cum_theta_sell) ############# bar 추출 ############# if max_proportion is not None and max_theta > expected_num_ticks * max_proportion: #### bar 생성 #### bar_info = dict(date_time=date_time, tick_num=tick_num, open=price_open, high=price_high, low=price_low, close=price_close, cum_vol=cum_volume, cum_dallar=cum_dollar) # # 관측 값이 과도하게 크거나 작을 경우 이상치로 판단하여 무시함 # if _run_type: # if imbalance > 0 and is_outlier(signs_buy[:-1], signs_buy[-1]): # _ = signs_buy.pop() # 이미 입력된 이상치 제거 # continue # elif imbalance < 0 and is_outlier(signs_sell[:-1], signs_sell[-1]): # _ = signs_sell.pop() # 이미 입력된 이상치 제거 # continue list_bars.append(bar_info) num_ticks_bar.append(cum_tick) buy_ticks_proportion.append(buy_tick_num / cum_tick) # 기대 buy ticks proportion based on formed bars exp_buy_ticks_proportion = ewm_mean( buy_ticks_proportion[-num_prev_bars:], num_prev_bars)[-1] exp_sell_ticks_proportion = (1 - exp_buy_ticks_proportion) #### 기대값 계산 #### expected_num_ticks = _get_exp_num_ticks( num_ticks_bar, num_prev_bars, expected_num_ticks_min_max) # E[T]의 기대 크기 expected_imbalance_buy = _get_expected_imbalance( signs_buy, expected_num_ticks, expected_imbalance_window) # 기대 불균형 expected_imbalance_sell = _get_expected_imbalance( signs_sell, expected_num_ticks, expected_imbalance_window) # 기대 불균형 # 바 추출 이후 초기화될 변수들 price_open = price_close = None price_high, price_low = -np.inf, np.inf cum_theta_buy = cum_theta_sell = cum_tick = cum_dollar = cum_volume = buy_tick_num = 0 bar.finish() df = pd.DataFrame(list_bars) df['date_time'] = pd.to_datetime(df['date_time']) return df
def train(self): with tf.Graph().as_default(): logging.info("add model") var = self.add_model() saver = tf.train.Saver() # config = tf.ConfigProto(allow_soft_placement=True) # config.gpu_options.allow_growth = True # sess = tf.Session(config=config) sess = tf.Session() sess.run(tf.initialize_all_variables()) total_batch = int( np.ceil(len(self.train_data) / float(self.args.batch))) for epoch in xrange(self.args.epochs): total_loss = 0.0 total_acc_sum = 0.0 total_count = 0 pbar = pb.ProgressBar(widgets=[ "[TRAIN] ", pb.DynamicMessage('loss'), " ", pb.DynamicMessage('acc'), " ", pb.FileTransferSpeed(unit="batchs"), pb.Percentage(), pb.Bar(), pb.Timer(), " ", pb.ETA() ], maxval=total_batch).start() for i in xrange(total_batch): batchx, batchy = self.next_batch(self.args.batch) _, loss, acc_sum = sess.run( [var['opt'], var['cost'], var['acc_sum']], feed_dict={ var['x']: batchx, var['y']: batchy, var['keep_prob']: 0.7 }) total_loss += loss total_acc_sum += acc_sum total_count += len(batchx) pbar.update(i, loss=total_loss / total_count, acc=total_acc_sum / total_count) pbar.finish() v_loss, v_acc_sum = self.eval(sess, var) v_size = len(self.val_data) logging.info( "Epoch {}: tr_loss: {}, tr_acc: {}\n{}v_loss: {}, v_acc: {}" .format(epoch, total_loss / total_count, total_acc_sum / total_count, " ", v_loss / v_size, v_acc_sum / v_size)) logging.info("save model") save_path = saver.save(sess, self.args.model) logging.info("save model in path: {}".format(save_path))
labels = [dict_data[i] for i in ids] # encode the labels le = LabelEncoder() labels = le.fit_transform(labels) # initialize the HDF5 dataset writer, then store the class label names in the dataset dataset = HDF5DatasetWriter( (len(imagePaths), config.INPUT_SIZE, config.INPUT_SIZE, 3), config.TRAIN_HDF5) dataset.storeClassLabels(le.classes_) # initialize the progress bar widgets = [ "Saving Images: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA() ] pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start() # loop over the images in batches for i in np.arange(0, len(imagePaths)): # Grab values imagePath = imagePaths[i] label = labels[i] _id = ids[i] # load the input image using the Keras helper utility # while ensuring the image is resized image = load_img(imagePath,
def scan_regionset(regionset, options): """ This function scans all te region files in a regionset object and fills the ScannedRegionFile obj with the results """ total_regions = len(regionset.regions) total_chunks = 0 corrupted_total = 0 wrong_total = 0 entities_total = 0 too_small_total = 0 unreadable = 0 # init progress bar if not options.verbose: pbar = progressbar.ProgressBar(widgets=[ 'Scanning: ', FractionWidget(), ' ', progressbar.Percentage(), ' ', progressbar.Bar(left='[', right=']'), ' ', progressbar.ETA() ], maxval=total_regions) # queue used by processes to pass finished stuff q = queues.SimpleQueue() pool = multiprocessing.Pool(processes=options.processes, initializer=_mp_pool_init, initargs=(regionset, options, q)) if not options.verbose: pbar.start() # start the pool # Note to self: every child process has his own memory space, # that means every obj recived by them will be a copy of the # main obj result = pool.map_async(multithread_scan_regionfile, regionset.list_regions(None), max(1, total_regions // options.processes)) # printing status region_counter = 0 while not result.ready() or not q.empty(): time.sleep(0.01) if not q.empty(): r = q.get() if r == None: # something went wrong scanning this region file # probably a bug... don't know if it's a good # idea to skip it continue if not isinstance(r, world.ScannedRegionFile): raise ChildProcessException(r) else: corrupted, wrong, entities_prob, shared_offset, num_chunks = r.get_counters( ) filename = r.filename # the obj returned is a copy, overwrite it in regionset regionset[r.get_coords()] = r corrupted_total += corrupted wrong_total += wrong total_chunks += num_chunks entities_total += entities_prob if r.status == world.REGION_TOO_SMALL: too_small_total += 1 elif r.status == world.REGION_UNREADABLE: unreadable += 1 region_counter += 1 if options.verbose: if r.status == world.REGION_OK: stats = "(c: {0}, w: {1}, tme: {2}, so: {3}, t: {4})".format( corrupted, wrong, entities_prob, shared_offset, num_chunks) elif r.status == world.REGION_TOO_SMALL: stats = "(Error: not a region file)" elif r.status == world.REGION_UNREADABLE: stats = "(Error: unreadable region file)" print "Scanned {0: <12} {1:.<43} {2}/{3}".format( filename, stats, region_counter, total_regions) else: pbar.update(region_counter) if not options.verbose: pbar.finish() regionset.scanned = True
ap.add_argument("-d", "--dataset", required=True, help="path to input directory of images") ap.add_argument("-o", "--output", required=True, help="path to output directory of rotated iamges") args = vars(ap.parse_args()) # grab the paths to the input images (limiting ourselves to 10,000 # images) and shuffle them to make creating a training and testing # split easier imagePaths = list(paths.list_images(args["dataset"]))[:10000] random.shuffle(imagePaths) # initialize a dictionary to keep track of the number of each angle # chosen so far, then initialize the progress bar angles = {} widgets = ["Building Dataset: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()] pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start() # loop over the image paths for (i, imagePath) in enumerate(imagePaths): # determine the rotation angle, and load the image angle = np.random.choice([0, 90, 180, 270]) image = cv2.imread(imagePath) # if the image is None (meaning there was an issue loading the # image from disk, simply skip it) if image is None: continue
def plot_raydensity( map_object, station_events: List[Tuple[dict, dict]], domain: object, projection: cp.crs.Projection, ): """ Create a ray-density plot for all events and all stations. This function is potentially expensive and will use all CPUs available. Does require geographiclib to be installed. :param map_object: The cartopy domain plot object :type map_object: cp.mpl.geoaxes.GeoAxes :param station_events: A list of tuples with two dictionaries :type station_events: List[Tuple[dict, dict]] :param domain: An object with the domain plot :type domain: object :param projection: cartopy projection object :type projection: cp.crs.Projection """ import ctypes as C from lasif.tools.great_circle_binner import GreatCircleBinner from lasif.utils import Point import multiprocessing import progressbar from scipy.stats import scoreatpercentile # Merge everything so that a list with coordinate pairs is created. This # list is then distributed among all processors. station_event_list = [] for event, stations in station_events: e_point = Point(event["latitude"], event["longitude"]) for station in stations.values(): p = Point(station["latitude"], station["longitude"]) station_event_list.append((e_point, p)) circle_count = len(station_event_list) # The granularity of the latitude/longitude discretization for the # raypaths. Attempt to get a somewhat meaningful result in any case. if circle_count < 1000: lat_lng_count = 1000 elif circle_count < 10000: lat_lng_count = 2000 else: lat_lng_count = 3000 cpu_count = multiprocessing.cpu_count() def to_numpy(raw_array, dtype, shape): data = np.frombuffer(raw_array.get_obj()) data.dtype = dtype return data.reshape(shape) print("\nLaunching %i great circle calculations on %i CPUs..." % (circle_count, cpu_count)) widgets = [ "Progress: ", progressbar.Percentage(), progressbar.Bar(), "", progressbar.ETA(), ] pbar = progressbar.ProgressBar(widgets=widgets, maxval=circle_count).start() def great_circle_binning(sta_evs, bin_data_buffer, bin_data_shape, lock, counter): new_bins = GreatCircleBinner( domain.min_lat, domain.max_lat, lat_lng_count, domain.min_lon, domain.max_lon, lat_lng_count, ) for event, station in sta_evs: with lock: counter.value += 1 if not counter.value % 25: pbar.update(counter.value) new_bins.add_greatcircle(event, station) bin_data = to_numpy(bin_data_buffer, np.uint32, bin_data_shape) with bin_data_buffer.get_lock(): bin_data += new_bins.bins # Split the data in cpu_count parts. def chunk(seq, num): avg = len(seq) / float(num) out = [] last = 0.0 while last < len(seq): out.append(seq[int(last):int(last + avg)]) last += avg return out chunks = chunk(station_event_list, cpu_count) # One instance that collects everything. collected_bins = GreatCircleBinner( domain.min_lat, domain.max_lat, lat_lng_count, domain.min_lon, domain.max_lon, lat_lng_count, ) # Use a multiprocessing shared memory array and map it to a numpy view. collected_bins_data = multiprocessing.Array(C.c_uint32, collected_bins.bins.size) collected_bins.bins = to_numpy(collected_bins_data, np.uint32, collected_bins.bins.shape) # Create, launch and join one process per CPU. Use a shared value as a # counter and a lock to avoid race conditions. processes = [] lock = multiprocessing.Lock() counter = multiprocessing.Value("i", 0) for _i in range(cpu_count): processes.append( multiprocessing.Process( target=great_circle_binning, args=( chunks[_i], collected_bins_data, collected_bins.bins.shape, lock, counter, ), )) for process in processes: process.start() for process in processes: process.join() pbar.finish() stations = chain.from_iterable( (_i[1].values() for _i in station_events if _i[1])) # Remove duplicates stations = [(_i["latitude"], _i["longitude"]) for _i in stations] stations = set(stations) title = "%i Events, %i unique raypaths, " "%i unique stations" % ( len(station_events), circle_count, len(stations), ) plt.title(title, size="xx-large") data = collected_bins.bins.transpose() if data.max() >= 10: data = np.log10(np.clip(data, a_min=0.5, a_max=data.max())) data[data >= 0.0] += 0.1 data[data < 0.0] = 0.0 max_val = scoreatpercentile(data.ravel(), 99) else: max_val = data.max() cmap = cm.get_cmap("gist_heat") cmap._init() cmap._lut[:120, -1] = np.linspace(0, 1.0, 120)**2 lngs, lats = collected_bins.coordinates ln, la = project_points(projection, lngs, lats) map_object.pcolormesh(ln, la, data, cmap=cmap, vmin=0, vmax=max_val, zorder=10) # Draw the coastlines so they appear over the rays. Otherwise things are # sometimes hard to see. map_object.add_feature(cp.feature.COASTLINE, zorder=13) map_object.add_feature(cp.feature.BORDERS, linestyle=":", zorder=13)
def plot_raydensity(map_object, station_events, domain): """ Create a ray-density plot for all events and all stations. This function is potentially expensive and will use all CPUs available. Does require geographiclib to be installed. """ import ctypes as C from lasif import rotations from lasif.domain import RectangularSphericalSection from lasif.tools.great_circle_binner import GreatCircleBinner from lasif.utils import Point import multiprocessing import progressbar from scipy.stats import scoreatpercentile if not isinstance(domain, RectangularSphericalSection): raise NotImplementedError( "Raydensity currently only implemented for rectangular domains. " "Should be easy to implement for other domains. Let me know.") # Merge everything so that a list with coordinate pairs is created. This # list is then distributed among all processors. station_event_list = [] for event, stations in station_events: if domain.rotation_angle_in_degree: # Rotate point to the non-rotated domain. e_point = Point(*rotations.rotate_lat_lon( event["latitude"], event["longitude"], domain.rotation_axis, -1.0 * domain.rotation_angle_in_degree)) else: e_point = Point(event["latitude"], event["longitude"]) for station in stations.values(): # Rotate point to the non-rotated domain if necessary. if domain.rotation_angle_in_degree: p = Point(*rotations.rotate_lat_lon( station["latitude"], station["longitude"], domain.rotation_axis, -1.0 * domain.rotation_angle_in_degree)) else: p = Point(station["latitude"], station["longitude"]) station_event_list.append((e_point, p)) circle_count = len(station_event_list) # The granularity of the latitude/longitude discretization for the # raypaths. Attempt to get a somewhat meaningful result in any case. lat_lng_count = 1000 if circle_count < 1000: lat_lng_count = 1000 if circle_count < 10000: lat_lng_count = 2000 else: lat_lng_count = 3000 cpu_count = multiprocessing.cpu_count() def to_numpy(raw_array, dtype, shape): data = np.frombuffer(raw_array.get_obj()) data.dtype = dtype return data.reshape(shape) print("\nLaunching %i greatcircle calculations on %i CPUs..." % (circle_count, cpu_count)) widgets = [ "Progress: ", progressbar.Percentage(), progressbar.Bar(), "", progressbar.ETA() ] pbar = progressbar.ProgressBar(widgets=widgets, maxval=circle_count).start() def great_circle_binning(sta_evs, bin_data_buffer, bin_data_shape, lock, counter): new_bins = GreatCircleBinner(domain.min_latitude, domain.max_latitude, lat_lng_count, domain.min_longitude, domain.max_longitude, lat_lng_count) for event, station in sta_evs: with lock: counter.value += 1 if not counter.value % 25: pbar.update(counter.value) new_bins.add_greatcircle(event, station) bin_data = to_numpy(bin_data_buffer, np.uint32, bin_data_shape) with bin_data_buffer.get_lock(): bin_data += new_bins.bins # Split the data in cpu_count parts. def chunk(seq, num): avg = len(seq) / float(num) out = [] last = 0.0 while last < len(seq): out.append(seq[int(last):int(last + avg)]) last += avg return out chunks = chunk(station_event_list, cpu_count) # One instance that collects everything. collected_bins = GreatCircleBinner(domain.min_latitude, domain.max_latitude, lat_lng_count, domain.min_longitude, domain.max_longitude, lat_lng_count) # Use a multiprocessing shared memory array and map it to a numpy view. collected_bins_data = multiprocessing.Array(C.c_uint32, collected_bins.bins.size) collected_bins.bins = to_numpy(collected_bins_data, np.uint32, collected_bins.bins.shape) # Create, launch and join one process per CPU. Use a shared value as a # counter and a lock to avoid race conditions. processes = [] lock = multiprocessing.Lock() counter = multiprocessing.Value("i", 0) for _i in range(cpu_count): processes.append( multiprocessing.Process(target=great_circle_binning, args=(chunks[_i], collected_bins_data, collected_bins.bins.shape, lock, counter))) for process in processes: process.start() for process in processes: process.join() pbar.finish() stations = chain.from_iterable( (list(_i[1].values()) for _i in station_events if _i[1])) # Remove duplicates stations = [(_i["latitude"], _i["longitude"]) for _i in stations] stations = set(stations) title = "%i Events, %i unique raypaths, "\ "%i unique stations" % (len(station_events), circle_count, len(stations)) plt.title(title, size="xx-large") data = collected_bins.bins.transpose() if data.max() >= 10: data = np.log10(np.clip(data, a_min=0.5, a_max=data.max())) data[data >= 0.0] += 0.1 data[data < 0.0] = 0.0 max_val = scoreatpercentile(data.ravel(), 99) else: max_val = data.max() cmap = cm.get_cmap("gist_heat") cmap._init() cmap._lut[:120, -1] = np.linspace(0, 1.0, 120)**2 # Slightly change the appearance of the map so it suits the rays. map_object.fillcontinents(color='#dddddd', lake_color='#dddddd', zorder=2) lngs, lats = collected_bins.coordinates # Rotate back if necessary! if domain.rotation_angle_in_degree: for lat, lng in zip(lats, lngs): lat[:], lng[:] = rotations.rotate_lat_lon( lat, lng, domain.rotation_axis, domain.rotation_angle_in_degree) ln, la = map_object(lngs, lats) map_object.pcolormesh(ln, la, data, cmap=cmap, vmin=0, vmax=max_val, zorder=10) # Draw the coastlines so they appear over the rays. Otherwise things are # sometimes hard to see. map_object.drawcoastlines(zorder=3) map_object.drawcountries(linewidth=0.2, zorder=3)
def handle(self, *args, **options): if not os.path.exists(DATA_DIR): self.logger.info('Creating %s' % DATA_DIR) os.mkdir(DATA_DIR) translation_hack_path = os.path.join(DATA_DIR, 'translation_hack') self.noinsert = options.get('noinsert', False) self.widgets = [ 'RAM used: ', MemoryUsageWidget(), ' ', progressbar.ETA(), ' Done: ', progressbar.Percentage(), progressbar.Bar(), ] for url in SOURCES: destination_file_name = url.split('/')[-1] force = options.get('force_all', False) if not force: for f in options['force']: if f in destination_file_name or f in url: force = True geonames = Geonames(url, force=force) downloaded = geonames.downloaded force_import = options.get('force_import_all', False) if not force_import: for f in options['force_import']: if f in destination_file_name or f in url: force_import = True if downloaded or force_import: self.logger.info('Importing %s' % destination_file_name) if url in TRANSLATION_SOURCES: if options.get('hack_translations', False): if os.path.exists(translation_hack_path): self.logger.debug( 'Using translation parsed data: %s' % translation_hack_path) continue i = 0 progress = progressbar.ProgressBar(maxval=geonames.num_lines(), widgets=self.widgets) for items in geonames.parse(): if url in CITY_SOURCES: self.city_import(items) elif url in REGION_SOURCES: self.region_import(items) elif url in COUNTRY_SOURCES: self.country_import(items) elif url in TRANSLATION_SOURCES: # free some memory if getattr(self, '_country_codes', False): del self._country_codes if getattr(self, '_region_codes', False): del self._region_codes self.translation_parse(items) reset_queries() i += 1 progress.update(i) progress.finish() if url in TRANSLATION_SOURCES and options.get( 'hack_translations', False): with open(translation_hack_path, 'w+') as f: pickle.dump(self.translation_data, f) if options.get('hack_translations', False): with open(translation_hack_path, 'r') as f: self.translation_data = pickle.load(f) self.logger.info('Importing parsed translation in the database') self.translation_import()
def make_progress_bar(text=None): widgets = (['%s: ' % text] if text else []) + [progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA()] return progressbar.ProgressBar(widgets=widgets)
def loadMSRA(self, seqName, mode='train', replace=False, tApp=False): '''seqName: P0 - P8 mode: if train, only save the cropped image replace: replace the previous cache file if exists tApp: append to previous loaded file if True ''' if not hasattr(self, 'frmList'): self.frmList = [] if not tApp: self.frmList = [] pickleCachePath = '{}/msra_{}.pkl'.format(self.cache_base_path, seqName) if os.path.isfile(pickleCachePath) and not replace: print 'direct load from the cache' t1 = time.time() f = open(pickleCachePath, 'rb') (self.frmList) += cPickle.load(f) t1 = time.time() - t1 print 'loaded with {}s'.format(t1) return self.frmList Camera.setCamera('INTEL') pbar = pb.ProgressBar( maxval=500 * len(self.msra_pose_list), widgets=['Loading MSRA | ', pb.Percentage(), pb.Bar()]) pbar.start() pbIdx = 0 seqPath = '/'.join([self.msra_base_path, seqName]) for pose_name in self.msra_pose_list: curPath = '/'.join([seqPath, pose_name, 'joint.txt']) f = open(curPath, 'r') frmNum = int(f.readline()[:-1]) for frmIdx in range(frmNum): frmPath = '/'.join( [seqPath, pose_name, '%06i_depth.bin' % (frmIdx)]) dm = DepthMap('MSRA', frmPath) skel = f.readline().split() skel = np.asarray([float(pt) for pt in skel]) def cvtMSRA_skel(init_skel): skel = init_skel.copy() for i in range(len(skel)): if i % 3 == 2: skel[i] *= -1.0 return skel skel = cvtMSRA_skel(skel) self.frmList.append(Frame(dm, skel)) if mode is 'train': self.frmList[-1].saveOnlyForTrain() pbar.update(pbIdx) pbIdx += 1 pbar.finish() if not os.path.exists(self.cache_base_path): os.makedirs(self.cache_base_path) f = open(pickleCachePath, 'wb') cPickle.dump((self.frmList), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() print 'loaded with {} frames'.format(len(self.frmList))
def loadNYU(self, frmStartNum, cameraIdx = 1, tFlag = 'train', tApp =\ False, isReplace=False): '''frmStartNum: starting frame index cameraIdx: [1,3] tFlag: save only the cropped image if is 'train' tApp: append to the previously loaded file if True ''' Camera.setCamera('KINECT') if cameraIdx not in [1]: raise ValueError( 'invalid cameraIdx, current only support view from 1') if tFlag not in ['train', 'test']: raise ValueError('invalid tFlag, can be only train or test') # load the annotation file matPath = '{}/{}/joint_data.mat'.format(self.nyu_base_path, tFlag) joint = sio.loadmat(matPath) joint_xyz = joint['joint_xyz'][cameraIdx - 1] joint_uvd = joint['joint_uvd'][cameraIdx - 1] matPath = './data/center_uvd_{}.mat'.format(tFlag) center = sio.loadmat(matPath) center = center['center_uvd'] # determine the start and end frame if frmStartNum >= len(joint_xyz): raise ValueError( 'invalid start frame, shoud be lower than {}'.format( len(joint_xyz))) fileIdx = int(frmStartNum / self.nyu_frm_perfile) frmStartNum = fileIdx * self.nyu_frm_perfile if tFlag == 'train': frmEndNum = min(frmStartNum + self.nyu_frm_perfile, len(joint_xyz)) elif tFlag == 'test': frmEndNum = len(joint_xyz) print 'frmStartNum={}, frmEndNum={}, fileIdx={}'.format( frmStartNum, frmEndNum, fileIdx) pickleCachePath = '{}/nyu_{}_{}_{}.pkl'.format(self.cache_base_path, tFlag, cameraIdx, fileIdx) if not hasattr(self, 'frmList'): self.frmList = [] if not tApp: self.frmList = [] if os.path.isfile(pickleCachePath) and isReplace == False: print 'direct load from the cache' print 'cache dir ={}'.format(pickleCachePath) t1 = time.time() f = open(pickleCachePath, 'rb') self.frmList += cPickle.load(f) t1 = time.time() - t1 print 'loaded with {}s'.format(t1) return pbar = pb.ProgressBar( maxval=frmEndNum - frmStartNum, widgets=['Loading NYU | ', pb.Percentage(), pb.Bar()]) pbar.start() pbIdx = 0 for frmIdx in range(frmStartNum, frmEndNum): frmPath = '{}/{}/depth_{}_{:07d}.png'.format( self.nyu_base_path, tFlag, cameraIdx, frmIdx + 1) dm = DepthMap('NYU', frmPath) skel = joint_xyz[frmIdx] skel = np.reshape(skel, (-1)) com_uvd = center[frmIdx] self.frmList.append(Frame(dm, skel, com_uvd)) self.frmList[-1].saveOnlyForTrain() pbar.update(pbIdx) pbIdx += 1 pbar.finish() if not os.path.exists(self.cache_base_path): os.makedirs(self.cache_base_path) f = open(pickleCachePath, 'wb') cPickle.dump((self.frmList), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() print 'loaded with {} frames'.format(len(self.frmList))
def computeByDay(self): self.loadOriginal() exp_res = pd.read_csv( "haikou-experiments/results/SIMULATION_RESULTS_ALL_DIDI_CHUXING_HAIKOU.csv" ) exp_res["real_start_time"] = pd.to_datetime(exp_res["real_start_time"]) self.all_ODs = {} bar = progressbar.ProgressBar(widgets=[ 'Days ', progressbar.Percentage(), ' (', progressbar.SimpleProgress(), ') ', ' (', progressbar.AbsoluteETA(), ') ', ]) all_days_str = exp_res["real_start_date"].unique() all_days = [] print("共计天数:", len(all_days_str)) for cur_date in bar(all_days_str): if self.date_week[cur_date] >= 5: continue sta_res = self.computeOneDay( exp_res[exp_res["real_start_date"] == cur_date], cur_date) all_days.append(sta_res) for sta_day in all_days: for period_index in range(len(PERIODS_MINUTES)): for key in sta_day[period_index].keys(): if sta_day[period_index][key]["num"] == 0: continue self.all_ODs[key][period_index]["num"].append( sta_day[period_index][key]["num"]) self.all_ODs[key][period_index]["matching_num"].append( sta_day[period_index][key]["matching_num"]) self.all_ODs[key][period_index][ "matching_probability"].append( sta_day[period_index][key]["matching_probability"]) self.all_ODs[key][period_index][ "aver_shared_distance"].append( sta_day[period_index][key]["aver_shared_distance"]) self.all_ODs[key][period_index][ "aver_final_distance"].append( sta_day[period_index][key]["aver_final_distance"]) with open("haikou-experiments/results/SIMULATION_STATISTIC.csv", "w") as csvfile: writer = csv.writer(csvfile) row = ["start_ver", "end_ver", "original_num", "original_days"] for i in range(len(PERIODS_MINUTES)): row += [ "num%s" % i, "matching_num%s" % i, "days%s" % i, "matching_probability%s" % i, "aver_shared_distance%s" % i, "aver_final_distance%s" % i ] writer.writerow(row) for i, key in enumerate(self.all_ODs.keys()): combined_id = getID(self.all_ODs[key][0]["start_ver"], self.all_ODs[key][0]["end_ver"]) if combined_id not in self.origianl_days: continue detail = [ self.all_ODs[key][0]["start_ver"], self.all_ODs[key][0]["end_ver"], self.origianl_orders[combined_id], self.origianl_days[combined_id] ] for j in range(len(PERIODS_MINUTES)): detail += [sum(self.all_ODs[key][j]["num"]),sum(self.all_ODs[key][j]["matching_num"]),len(self.all_ODs[key][j]["num"]),\ np.mean(self.all_ODs[key][j]["matching_probability"]), np.mean(self.all_ODs[key][j]["aver_shared_distance"]),\ np.mean(self.all_ODs[key][j]["aver_final_distance"])] writer.writerow(detail)
def main(): common = common_cli.GetCommonArguments() device = common_cli.GetDeviceArguments() device.add_argument( '--chunk_kb', type=int, default=1024, metavar='1024', help='Size of packets to write in Kb. For older devices, it may be ' 'required to use 4.') parents = [common, device] parser = argparse.ArgumentParser( description=sys.modules[__name__].__doc__, parents=[common]) subparsers = parser.add_subparsers(title='Commands', dest='command_name') subparser = subparsers.add_parser( name='help', help='Prints the commands available') subparser = subparsers.add_parser( name='devices', help='Lists the available devices', parents=[common]) common_cli.MakeSubparser( subparsers, parents, fastboot.FastbootCommands.Continue) common_cli.MakeSubparser( subparsers, parents, fastboot.FastbootCommands.Download, {'source_file': 'Filename on the host to push'}) common_cli.MakeSubparser( subparsers, parents, fastboot.FastbootCommands.Erase) common_cli.MakeSubparser( subparsers, parents, fastboot.FastbootCommands.Flash) common_cli.MakeSubparser( subparsers, parents, fastboot.FastbootCommands.Getvar) common_cli.MakeSubparser( subparsers, parents, fastboot.FastbootCommands.Oem) common_cli.MakeSubparser( subparsers, parents, fastboot.FastbootCommands.Reboot) if len(sys.argv) == 1: parser.print_help() return 2 args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) if args.command_name == 'devices': return Devices(args) if args.command_name == 'help': parser.print_help() return 0 kwargs = {} argspec = inspect.getargspec(args.method) if 'info_cb' in argspec.args: kwargs['info_cb'] = _InfoCb if 'progress_callback' in argspec.args and progressbar: bar = progressbar.ProgessBar( widgets=[progressbar.Bar(), progressbar.Percentage()]) bar.start() def SetProgress(current, total): bar.update(current / total * 100.0) if current == total: bar.finish() kwargs['progress_callback'] = SetProgress return common_cli.StartCli( args, fastboot.FastbootCommands, chunk_kb=args.chunk_kb, extra=kwargs)
def download_files(fileList, urlBase, outDir, verify=True): ''' Download a list of files from a URL to a directory ''' # Authors # ------- # Milena Veneziani # Xylar Asay-Davis session = requests.Session() if not verify: session.verify = False for fileName in fileList: outFileName = '{}/{}'.format(outDir, fileName) # outFileName contains full path, so we need to make the relevant # subdirectories if they do not exist already directory = os.path.dirname(outFileName) try: os.makedirs(directory) except OSError: pass url = '{}/{}'.format(urlBase, fileName) try: response = session.get(url, stream=True) totalSize = response.headers.get('content-length') except requests.exceptions.RequestException: print(' {} could not be reached!'.format(url)) continue try: response.raise_for_status() except requests.exceptions.HTTPError as e: print('ERROR while downloading {}:'.format(fileName)) print(e) continue if totalSize is None: # no content length header if not os.path.exists(outFileName): with open(outFileName, 'wb') as f: print('Downloading {}...'.format(fileName)) try: f.write(response.content) except requests.exceptions.RequestException: print(' {} failed!'.format(fileName)) else: print(' {} done.'.format(fileName)) else: # we can do the download in chunks and use a progress bar, yay! totalSize = int(totalSize) if os.path.exists(outFileName) and \ totalSize == os.path.getsize(outFileName): # we already have the file, so just continue continue print('Downloading {} ({})...'.format(fileName, sizeof_fmt(totalSize))) widgets = [ progressbar.Percentage(), ' ', progressbar.Bar(), ' ', progressbar.ETA() ] bar = progressbar.ProgressBar(widgets=widgets, maxval=totalSize).start() size = 0 with open(outFileName, 'wb') as f: try: for data in response.iter_content(chunk_size=4096): size += len(data) f.write(data) bar.update(size) bar.finish() except requests.exceptions.RequestException: print(' {} failed!'.format(fileName)) else: print(' {} done.'.format(fileName))
def run_halted_queue(self, params, frame_chunks): """Runs a queue with params for each of the frame chunks. The program halts while awaiting the completion of tasks in the queue and shows a progress bar meanwhile. Any frame chunks that have been previously completed will be marked as complete unless running with force_recompute. Args: params (dict[str, _]): Message to be published to RabbitMQ. frame_chunks (list[dict[str, str]]): List of frame chunk with keys "first" and "last" corresponding to the appropriate frame names for the chunk. """ connection = pika.BlockingConnection( pika.ConnectionParameters(self.master_ip, heartbeat=0)) channel = connection.channel() channel.queue_declare(queue=config.QUEUE_NAME) channel.queue_declare(queue=config.RESPONSE_QUEUE_NAME) self.purge_queue(config.QUEUE_NAME) self.purge_queue(config.RESPONSE_QUEUE_NAME) # force_recompute can be specified over the entire pipeline or particular stages frame_chunks = self._get_missing_chunks(params, frame_chunks) if len(frame_chunks) == 0: return for frame_chunk in frame_chunks: params.update(frame_chunk) msg = json.dumps(params) channel.basic_publish( exchange="", routing_key=config.QUEUE_NAME, body=msg, properties=pika.BasicProperties( delivery_mode=2), # make message persistent ) # Waits until the queue is empty before returning for next step queue_state = channel.queue_declare(config.RESPONSE_QUEUE_NAME) queue_size = queue_state.method.message_count progress = "█" widgets = [ f"{progress} ", f"{params['app']}:", progressbar.Bar(progress, "|", "|"), progressbar.Percentage(), " (Workers: ", progressbar.FormatLabel("0"), ") (", progressbar.FormatLabel("%(elapsed)s"), ")", ] bar = progressbar.ProgressBar(maxval=len(frame_chunks), widgets=widgets) bar.start() no_worker_period = None while queue_size != len(frame_chunks): time.sleep(1.0) queue_size = channel.queue_declare( config.RESPONSE_QUEUE_NAME).method.message_count num_workers = channel.queue_declare( config.QUEUE_NAME).method.consumer_count widgets[5] = str(num_workers) if num_workers != 0: no_worker_period = None if num_workers == 0: if no_worker_period is None: no_worker_period = time.time() if time.time() - no_worker_period > config.NO_WORKER_TIMEOUT: raise Exception( "No workers for extended time! Check worker logs for errors..." ) bar.update(queue_size) bar.finish()
def download(self): """Download the specified file""" def total_seconds(td): # Keep backward compatibility with Python 2.6 which doesn't have # this method if hasattr(td, 'total_seconds'): return td.total_seconds() else: return (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6 attempt = 0 if not os.path.isdir(self.directory): os.makedirs(self.directory) # Don't re-download the file if os.path.isfile(os.path.abspath(self.target)): self.logger.info("File has already been downloaded: %s" % (self.target)) return self.logger.info('Downloading from: %s' % (urllib.unquote(self.final_url))) self.logger.info('Saving as: %s' % self.target) tmp_file = self.target + ".part" while True: attempt += 1 try: start_time = datetime.now() # Enable streaming mode so we can download content in chunks r = requests.get(self.final_url, stream=True, auth=self.authentication) r.raise_for_status() content_length = r.headers.get('Content-length') # ValueError: Value out of range if only total_size given if content_length: total_size = int(content_length.strip()) max_value = ((total_size / CHUNK_SIZE) + 1) * CHUNK_SIZE bytes_downloaded = 0 log_level = self.logger.getEffectiveLevel() if log_level <= mozlog.INFO and content_length: widgets = [pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA(), ' ', pb.FileTransferSpeed()] pbar = pb.ProgressBar(widgets=widgets, maxval=max_value).start() with open(tmp_file, 'wb') as f: for chunk in iter(lambda: r.raw.read(CHUNK_SIZE), ''): f.write(chunk) bytes_downloaded += CHUNK_SIZE if log_level <= mozlog.INFO and content_length: pbar.update(bytes_downloaded) t1 = total_seconds(datetime.now() - start_time) if self.timeout_download and \ t1 >= self.timeout_download: raise TimeoutError if log_level <= mozlog.INFO and content_length: pbar.finish() break except (requests.exceptions.RequestException, TimeoutError), e: if tmp_file and os.path.isfile(tmp_file): os.remove(tmp_file) if self.retry_attempts > 0: # Log only if multiple attempts are requested self.logger.warning('Download failed: "%s"' % str(e)) self.logger.info('Will retry in %s seconds...' % (self.retry_delay)) time.sleep(self.retry_delay) self.logger.info("Retrying... (attempt %s)" % attempt) if attempt >= self.retry_attempts: raise time.sleep(self.retry_delay)
def analyse_all_genomes(genomes, dbpath, tmp_path, nbn, soft, logger, quiet=False): """ Parameters ---------- genomes : dict {genome: spegenus.date} dbpath : str path to folder containing genomes tmp_path : str path to put out files nbn : int minimum number of 'N' required to cut into a new contig soft : str soft used (prokka, prodigal, or None if called by prepare module) logger : logging.Logger logger object to write log information. Because this function can be called from prepare module, where sub logger name is different quiet : bool True if nothing must be written to stdout/stderr, False otherwise Returns ------- dict {genome: [spegenus.date, orig_name, path_to_seq_to_annotate, size, nbcont, l90]} """ cut = nbn > 0 pat = None ## To put pattern with which sequence must be cut if cut: pat = 'N' * nbn + "+" nbgen = len(genomes) bar = None curnum = None if cut: logger.info(("Cutting genomes at each time there are at least {} 'N' in a row, " "and then, calculating genome size, number of contigs and L90.").format(nbn)) else: logger.info("Calculating genome size, number of contigs, L90") if not quiet: # Create progressbar widgets = ['Analysis: ', progressbar.Bar(marker='█', left='', right=''), ' ', progressbar.Counter(), "/{}".format(nbgen), ' (', progressbar.Percentage(), ') - ', progressbar.Timer(), ' - ', progressbar.ETA() ] bar = progressbar.ProgressBar(widgets=widgets, max_value=nbgen, term_width=79).start() curnum = 1 toremove = [] # Analyse genomes 1 by 1 for genome, name in genomes.items(): # If not quiet option, show progress bar if not quiet: bar.update(curnum) curnum += 1 # analyse genome, and check everything went well. # exception if binary file try: res = analyse_genome(genome, dbpath, tmp_path, cut, pat, genomes, soft, logger=logger) except UnicodeDecodeError: logger.warning(f"'{genome}' does not seem to be a fasta file. It will be ignored.") res = False # Problem while analysing genome -> genome ignored if not res: toremove.append(genome) # If there are some genomes to remove (analysis failed), remove them from genomes dict. if toremove: for gen in toremove: del genomes[gen] if not genomes: logger.error(f"No genome was found in the database folder {dbpath}. See logfile " "for more information.") sys.exit(1) if not quiet: bar.finish() return 0
def gitsearch(): # This part contains the main code. path_place = '/home/shaaran/Downloads/Obama_out_-_President_Barack_Obama_s_hilarious_final_White_House_correspondents_dinner_speech-youtube-NxFkEj7KPC0-43-0-301.mp4' #file destination video_capture = cv2.VideoCapture( path_place ) #starts the web cam if you attach it externally use 1 or 2 , use trail and error .For using the downloaded video replace with path_place detector = dlib.get_frontal_face_detector( ) #pretrained model for detecting frontal face predict_path = '/home/shaaran/PycharmProjects/shape_predictor_68_face_landmarks.dat' predictor = dlib.shape_predictor(predict_path) # initialzing the predictor count = 0 # counter for loop tfms = tfms_from_model( resnet34, sz, aug_tfms=transforms_side_on, max_zoom=1.1 ) #transforamtions for getting a large and varied dataset from small datset data = ImageClassifierData.from_paths(PATH, tfms=tfms) #apply transforms to data print(data.classes) #prints the available emotions learn = ConvLearner.pretrained( arch, data, precompute=True) #Uses pretrianed in first case print('loading requirements......') print( 'This has been made by shaaran alias devshaaran, if you are using this code anywhere for research or educational purposes, please give reference.ENJOY!' ) learn.precompute = False #precomputation is made false for deeper recognition #learn.fit(1e-1, 1) learn.fit(1e-1, 3, cycle_len=1) #model is fit learn.load('224_all') print('loading done !') #progress bar for all emotions *Incomplete* bar_happy = progressbar.ProgressBar(maxval=1, widgets=[ progressbar.Bar('=', '[', ']'), 'happy', progressbar.Percentage() ]) bar_neutral = progressbar.ProgressBar(maxval=1, widgets=[ progressbar.Bar('=', '[', ']'), 'neutral', progressbar.Percentage() ]) bar_sad = progressbar.ProgressBar(maxval=1, widgets=[ progressbar.Bar('=', '[', ']'), 'sad', progressbar.Percentage() ]) bar_surprise = progressbar.ProgressBar(maxval=1, widgets=[ progressbar.Bar('=', '[', ']'), 'surprise', progressbar.Percentage() ]) bar_happy.start() bar_neutral.start() bar_sad.start() bar_surprise.start() # Initialize some variablesface_locations = [] while True: # Grab a single frame of video ret, frame = video_capture.read() # Resize frame of video to 1/4 size for faster face detection processing small_frame = cv2.resize(frame, (0, 0), fx=0.50, fy=0.50) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # detect faces in the grayscale image rects = detector(gray, 1) for (i, rect) in enumerate(rects): # determine the facial landmarks for the face region, then # convert the facial landmark (x, y)-coordinates to a NumPy # array shape = predictor(gray, rect) shape = face_utils.shape_to_np(shape) for (x, y) in shape: cv2.circle(frame, (x, y), 1, (0, 0, 255), -1) # Find all the faces and face encodings in the current frame of video face_locations = face_recognition.face_locations(small_frame, model="cnn") counts = 0 counts += 1 # Display the results for top, right, bottom, left in face_locations: # Scale back up face locanp.exp(preds[0][3])*100tions since the frame we detected in was scaled to 1/4 size top *= 2 right *= 2 bottom *= 2 left *= 2 lower_red = np.array([0, 0, 253]) upper_red = np.array([0, 0, 255]) # Extract the region of the image that contains the face face_image = frame[top:bottom, left:right] mask = cv2.inRange(face_image, lower_red, upper_red) res = cv2.bitwise_and(face_image, face_image, mask=mask) cv2.imshow('vid', face_image) cv2.imshow('res', res) count += 1 cv2.imwrite('0.jpg', res) #cv2.imwrite((output_loc + '\\' + str(count)+ str(counts) + '.jpg'), res) try: # learn = ConvLearner.pretrained(arch, data, precompute=True) trn_tfms, val_tfms = tfms_from_model(arch, sz) im = val_tfms(open_image('0.jpg')) learn.precompute = False preds = learn.predict_array(im[None]) #Use below only for debuggng ! #print(preds) #print(np.exp(preds)[0][0]) #qprint(data.classes[np.argmax(preds)]) #updating the percentages bar_happy.update(np.exp(preds[0][0])) bar_sad.update(np.exp(preds[0][2])) bar_neutral.update(np.exp(preds[0][1])) bar_surprise.update(np.exp(preds[0][3])) #put text on video cv2.putText( frame, 'happy : ' + str(int(np.exp(preds[0][0]) * 100)) + '%', (top - 40, left - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) cv2.putText( frame, 'neutral : ' + str(int(np.exp(preds[0][1]) * 100)) + '%', (top - 40, left), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) cv2.putText( frame, 'sad : ' + str(int(np.exp(preds[0][2]) * 100)) + '%', (top - 40, left + 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) cv2.putText( frame, 'surprise : ' + str(int(np.exp(preds[0][3]) * 100)) + '%', (top - 40, left + 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) except Exception as e: print(e) cv2.imshow('Video', frame) #shows image if cv2.waitKey(1) & 0xFF == ord('q'): bar_surprise.finish() bar_neutral.finish() bar_sad.finish() bar_happy.finish() break video_capture.release() cv2.destroyAllWindows()
def generate_fill(self, max_length=None, no_duration=False, verbose=1): """ :param max_length: :param no_duration: :param verbose: :return: """ # ----- Parameters ----- max_length = 300 / self.step_length if max_length is None else max_length # ----- Variables ----- if self.data_transformed_path is None and self.data_test_transformed_path is None: raise Exception( 'Some data need to be loaded before comparing the generation') path = self.data_transformed_path if self.data_test_transformed_path is None else self.data_test_transformed_path sequence = Sequences.KerasSequence( path=path, nb_steps=self.nb_steps, batch_size=1, work_on=self.work_on ) # Return array instead of list (for instruments) max_length = int(min(max_length, len(sequence))) nb_instruments = sequence.nb_instruments # ----- Seeds ----- truth = sequence[0][0] filled_list = [np.copy(truth) for inst in range(nb_instruments)] mask = np.ones((nb_instruments, nb_instruments, self.nb_steps)) for inst in range(nb_instruments): filled_list[inst][inst] = 0 mask[inst, inst] = 0 # ----- Generation ----- cprint('Start generating (fill) ...', 'blue') bar = progressbar.ProgressBar(maxval=max_length, widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' ', progressbar.ETA() ]) bar.start() # To see it working for l in range(max_length): s_input, s_output = sequence[l] to_fill_list = [np.copy(s_input) for inst in range(nb_instruments)] for inst in range(nb_instruments): to_fill_list[inst][inst] = 0 nn_input = np.concatenate( tuple(to_fill_list), axis=1 ) # (nb_instruments, batch=nb_instruments, nb_steps, step_size, input_size, channels) preds = self.keras_nn.generate(input=list(nn_input) + [mask]) preds = np.asarray(preds).astype( 'float64' ) # (nb_instruments, bath=nb_instruments, nb_steps=1, step_size, input_size, channels) if len(preds.shape ) == 5: # Only one instrument : output of nn not a list preds = np.expand_dims(preds, axis=0) if len(s_output.shape ) == 5: # Only one instrument : output of nn not a list s_output = np.expand_dims(s_output) preds = midi.create.normalize_activation( preds, mono=self.mono, use_binary=self.use_binary) truth = np.concatenate((truth, s_output), axis=2) for inst in range(nb_instruments): p = np.copy(s_output) p[inst] = np.take(preds, axis=1, indices=[inst])[inst] filled_list[inst] = np.concatenate( (filled_list[inst], p), axis=2 ) # (nb_instruments, batch=1, nb_steps, step_size, input_size, channels) bar.update(l + 1) bar.finish() # -------------------- Compute notes list -------------------- # ----- Reshape ----- truth = self.reshape_generated_array(truth) for inst in range(nb_instruments): filled_list[inst] = self.reshape_generated_array(filled_list[inst]) self.save_midis_path.mkdir(parents=True, exist_ok=True) accuracies, accuracies_inst = self.compute_generated_array( generated_array=truth, folder_path=self.save_midis_path, name='generated_fill_truth', no_duration=no_duration, verbose=verbose, save_images=True) accuracies, accuracies_inst = [accuracies], [accuracies_inst] for inst in range(nb_instruments): acc, acc_inst = self.compute_generated_array( generated_array=filled_list[inst], folder_path=self.save_midis_path, name=f'generated_fill_{inst}', no_duration=no_duration, array_truth=truth, verbose=verbose, save_truth=False, save_images=True) accuracies.append(acc) accuracies_inst.append(acc_inst) # Save the image of all in a subplot to allow easier comparaisons self.save_generated_arrays_cross_images( generated_arrays=[truth] + filled_list, folder_path=self.save_midis_path, name=f'generated_fill_all', replicate=False, titles=['Truth'] + [f'Fill Inst {i}' for i in range(nb_instruments)], subtitles=[ f'Acc: {accuracies_inst[i][int(max(0, i - 1))]}' for i in range(nb_instruments + 1) ] # Truth is in it ) # Save the summary of the generation summary.summarize( # Function parameters path=self.save_midis_path, title=self.full_name, file_name='generate_fill_summary.txt', # Summary parameters length=max_length, no_duration=no_duration, # Generic Summary **self.summary_dict) cprint('Done generating (fill)', 'green')
def runStrategy(self): if (debug): print 'Started computing trades' dataList = self.reader.dataList # Hardcoded due to the strategy used # Trading starts in January # startId = int(3 * 30 * jumpSize * 0.7) startId = 0 endId = dataList['open'].shape[0] # endId = startId + 50 currPos = 0 * dataList['open'].iloc[0] self.currQty = 0.0 * dataList['open'].iloc[0] self.currQty.fillna(0, inplace=True) logList = [] print dataList['open'].shape, startId, endId print 'Current trading progress:' bar = progressbar.ProgressBar(maxval = endId, \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() tradeId = startId while (tradeId < endId): # Since the first column (and index) is time currTime = dataList['open'].iloc[tradeId].name if (not (self.startTime <= currTime <= self.endTime)): tradeId += self.tradeFreq continue currTimeStamp = datetime.datetime.fromtimestamp(currTime) currDay = currTimeStamp.date() currHour = currTimeStamp.time().hour currMins = currTimeStamp.time().minute if (tradeId % 25000 == 0): print currTimeStamp # print 'Trade Cost:', self.tradeCost # print 'Current Qty:', np.array(self.currQty) print 'Current Budget:', self.currBudget print 'Current Val:', self.currVal print 'NumTrades:', self.cntTrades print 'Avg. Profit:', self.currVal, self.initBudget print 'Avg. Profit:', (( (self.currVal - self.initBudget) / 1000.0) / (self.cntTrades + eps)) * 1e4 if (self.dailyTrade): lowTime = deepcopy(currTimeStamp).replace(hour=9, minute=(15 + self.window)) highTime = deepcopy(lowTime) + datetime.timedelta( minutes=(self.holdPeriod * self.tradeFreq)) # print currTimeStamp, lowTime, highTime, (lowTime <= currTimeStamp <= highTime) if (not (lowTime <= currTimeStamp <= highTime)): tradeId += 1 continue if (traceFlag): print 'generateSignal: START' currOrder = self.strategy.generateSignal(currTime) if (traceFlag): print 'generateSignal: END' # Saving the current day price currPrice = pd.DataFrame(0, index=dataList['open'].columns, columns=['open', 'close']) currPrice['open'] = dataList['open'].iloc[tradeId] currPrice['close'] = dataList['close'].iloc[tradeId] self.cntTrades += (np.sum(np.abs(currOrder['qty']) > eps)) if (debug): print currOrder # If we want to manipulate positions and other quantities # currOrder = self.processOrders(currOrder, currPrice, currPos) currOrder = self.processOrdersOptimized(currOrder, currPrice, currPos) currPos = currOrder['position'] if (traceFlag): print 'loggingComputation: START' # Compute step statistics self.tradeCost = (currOrder['qty'] * currOrder['signal'] * currPrice['open']).sum() self.currQty += currOrder['qty'] * currOrder['signal'] self.currBudget -= self.tradeCost self.currVal = (self.currQty * currPrice['close']).sum() + self.currBudget if (debug): # print currTime # print currPrice # print currOrder # print 'Trade Cost:', self.tradeCost # print 'Current Qty:', self.currQty print 'Current Budget:', self.currBudget print 'Current Val:', self.currVal # raw_input('WAIT') logList.append([currTime, currOrder, currPrice]) bar.update(tradeId) tradeId += self.tradeFreq # Denotes whether we are currently holding any positions or not if daily trade # Helps in deciding how much to move at the end of the hold period if (self.dailyTrade): if (currTimeStamp == highTime): tradeId -= self.tradeFreq tradeId += 1 if (traceFlag): print 'loggingComputation: END' # Flush i.e. empty your positions if (self.flush): currTime = dataList['open'].iloc[endId - 1].name currPrice = pd.DataFrame(0, index=dataList['open'].columns, columns=['open', 'close']) currPrice['open'] = dataList['open'].iloc[endId - 1] currPrice['close'] = dataList['close'].iloc[endId - 1] currOrder = pd.DataFrame(0, index=dataList['open'].columns, columns=['signal', 'qty']) currOrder['signal'] = np.sign(-self.currQty) currOrder['qty'] = np.abs(-self.currQty) logList.append([currTime, currOrder, currPrice]) bar.finish() print 'Finished running the strategy on', len(logList), 'timestamps' self.logOrders(logList) print 'FINAL STATS:' # print 'Trade Cost:', self.tradeCost # print 'Current Qty:', np.array(self.currQty) print 'Current Budget:', self.currBudget print 'Current Val:', self.currVal print 'NumTrades:', self.cntTrades print 'Avg. Profit:', (((self.currVal - self.initBudget) / 100.0) / (self.cntTrades + eps)) * 1e4 if (debug): print 'Finished computing trades' raw_input('Finished Logging computed trades (Enter to continue):')
def compare_generation(self, max_length=None, no_duration=False, verbose=1): """ :return: """ # -------------------- Find informations -------------------- if self.data_transformed_path is None and self.data_test_transformed_path is None: raise Exception( 'Some data need to be loaded before comparing the generation') path = self.data_transformed_path if self.data_test_transformed_path is None else self.data_test_transformed_path self.sequence = Sequences.AllInstSequence(path=path, nb_steps=self.nb_steps, batch_size=1, work_on=self.work_on, noise=0) max_length = len(self.sequence) if max_length is None else min( max_length, len(self.sequence)) max_length = min(max_length, 10) # -------------------- Construct seeds -------------------- generated = np.array( self.sequence[0][0] ) # (nb_instrument, 1, nb_steps, step_size, input_size, 2) (1=batch) generated_helped = np.copy( generated) # Each step will take the truth as an input generated_truth = np.copy(generated) # The truth mask = self.get_mask(self.sequence.nb_instruments, batch_size=2) # -------------------- Generation -------------------- cprint('Start comparing generation ...', 'blue') bar = progressbar.ProgressBar(maxval=max_length, widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' ', progressbar.ETA() ]) bar.start() # To see it working for l in range(max_length): ms_input, ms_output = self.sequence[l] sample = np.concatenate( (generated[:, :, l:], np.array(ms_input)), axis=1 ) # (nb_instruments, 2, nb_steps, step_size, input_size, 2) # Generation preds = self.keras_nn.generate(input=list(sample) + mask) # Reshape preds = np.asarray(preds).astype( 'float64' ) # (nb_instruments, batch=2, nb_steps=1, length, 88, 2) preds_truth = np.array( ms_output) # (nb_instruments, 1, 1, step_size, input_size, 2) # if only one instrument if len(preds.shape ) == 5: # Only one instrument : output of nn not a list preds = np.expand_dims(preds, axis=0) if len(preds_truth.shape ) == 5: # Only one instrument : output of nn not a list preds_truth = np.expand_dims(preds_truth) preds = midi.create.normalize_activation( preds, mono=self.mono, use_binary=self.use_binary) # Normalize the activation part preds_helped = preds[:, [1]] # (nb_instruments, 1, 1, length, 88, 2) preds = preds[:, [0]] # Concatenation generated = np.concatenate( (generated, preds), axis=2) # (nb_instruments, 1, nb_steps, length, 88, 2) generated_helped = np.concatenate( (generated_helped, preds_helped), axis=2) # (nb_instruments, 1, nb_steps, length, 88, 2) generated_truth = np.concatenate((generated_truth, preds_truth), axis=2) bar.update(l + 1) bar.finish() # -------------------- Compute notes list -------------------- # Generated generated_midi_final = self.reshape_generated_array(generated) # Helped generated_midi_final_helped = self.reshape_generated_array( generated_helped) # Truth generated_midi_final_truth = self.reshape_generated_array( generated_truth) # ---------- find the name for the midi_file ---------- self.save_midis_path.mkdir(parents=True, exist_ok=True) accuracies, accuracies_inst = [], [] # Generated acc, acc_inst = self.compute_generated_array( generated_array=generated_midi_final, folder_path=self.save_midis_path, name='compare_generation_alone', no_duration=no_duration, array_truth=generated_midi_final_truth, verbose=verbose, save_truth=False, save_images=True) accuracies.append(acc) accuracies_inst.append(acc_inst) # Helped acc, acc_inst = self.compute_generated_array( generated_array=generated_midi_final_helped, folder_path=self.save_midis_path, name='compare_generation_helped', no_duration=no_duration, array_truth=generated_midi_final_truth, verbose=verbose, save_truth=False, save_images=True) accuracies.append(acc) accuracies_inst.append(acc_inst) # Truth self.compute_generated_array( generated_array=generated_midi_final_truth, folder_path=self.save_midis_path, name='compare_generation_truth', no_duration=no_duration, array_truth=None, verbose=verbose, save_truth=False, save_images=True) accuracies.append(acc) accuracies_inst.append(acc_inst) # Save the image of all in a subplot to allow easier comparaisons self.save_generated_arrays_cross_images( generated_arrays=[ generated_midi_final_truth, generated_midi_final_helped, generated_midi_final ], folder_path=self.save_midis_path, name=f'compare_generation_all', replicate=False, titles=['Truth', 'Helped', 'Alone'], subtitles=[ 'Acc : 1', f'Acc: {accuracies[1]:.3}, Acc_inst: [{", ".join([f"{a:.3}" for a in accuracies_inst[1]])}]', f'Acc: {accuracies[0]:.3}, Acc_inst: [{", ".join([f"{a:.3}" for a in accuracies_inst[0]])}]' ] # Truth is in it ) # ----- Summarize the generation ----- # Creation of the summary .txt file summary.summarize( # Function parameters path=self.save_midis_path, title=self.full_name, file_name='compare_generation_summary.txt', # Summary paramters, length=max_length, no_duration=no_duration, generated_accuracy=accuracies[0], generated_accuracies=accuracies_inst[0], helped_accuracy=accuracies[1], helped_accuracies=accuracies_inst[1], # Generic Summary **self.summary_dict) cprint('Done comparing generation', 'green')
args1 = parser.parse_args() print(args1) dt = [1, 3, 4, 6, 12, 24, 48] #[24, 48] # #discretization unit in hours dw = [0, 1, 3, 5, 7] #length of the sliding window of previous features, in units of dt create = False all_combs = list(itertools.product(dt, dw)) pdb.set_trace() bar = progressbar.ProgressBar( maxval=len(all_combs), widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() n_comb = 0 args = Args(0, 0, "", 0, "", False, "") for comb in all_combs: dt = comb[0] dw = comb[1] bar.update(n_comb + 1) n_comb += 1 if dt == 1 and dw == 0: continue #if dw==0: #and dt!=1: # create=True #if dt!=1:
def redo_song_generate(self, song_number=None, instrument_order=None, no_duration=False, save_images=True, noise=0): """ :param instrument_order: The order of the instruments to remplace :param song_number: The number of the song in the dataset :param no_duration: :param save_images: :param noise: :return: """ path = self.data_transformed_path if self.data_test_transformed_path is None else self.data_test_transformed_path self.sequence = Sequences.KerasSequence( path=path, nb_steps=self.nb_steps, batch_size=1, work_on=self.work_on, noise=noise, replicate=False, predict_offset=self.predict_offset) song_number = np.random.randint( self.sequence.nb_songs) if song_number is None else song_number instrument_order = np.random.permutation( self.nb_instruments ) if instrument_order is None else instrument_order all_arrays = [] # Construct the truth array x, y = self.sequence.get_all_song(song_number=song_number, in_batch_format=False) # x: (nb_instruments, batch=1, nb_steps, step_size, input_size, channels)] # y: (nb_instruments, batch=1, nb_steps, step_size, input_size, channels)] # x and y are the same except that in x, there is some noise length = self.sequence.get_song_len(song_number) if length == 0: # It means the len of the song is < nb_steps shape = (*(x.shape[:2]), self.nb_steps + 1, *(x.shape[3:])) zeros = np.zeros( shape ) # (nb_instruments, batch, nb_steps, step_size, input_size, channels) zeros[:, :, -x.shape[2]:] = x x = zeros length = 1 truth = x # truth: (nb_instruments, batch=1, len_song, step_size, input_size, channels) all_arrays.append(truth) cprint('Start redoing song (generate) ...', 'blue') bar = progressbar.ProgressBar(maxval=length * self.nb_instruments, widgets=[ progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage(), ' ', progressbar.ETA() ]) bar.start() # To see it working for instrument in range(len(instrument_order)): # We replace the instruments one by one instrument_to_remove = instrument_order[instrument] generated = np.copy(all_arrays[-1]) # generated : (nb_instruments, batch=1, nb_steps, step_size, input_size, channels) generated[instrument_to_remove] = 0 for step in range(length): inputs = np.take(generated, axis=2, indices=range(step, step + self.nb_steps)) # inputs = (nb_instruments, batch=1, nb_steps, step_size, input_size, channels)] mask = self.get_mask() # (batch=1, nb_instruments, nb_steps) # Remove the instrument from the input mask[0][:, instrument_to_remove] = 0 inputs[instrument_to_remove] = 0 preds = np.asarray( self.keras_nn.generate(input=list(inputs) + mask)).astype('float64') # preds: (nb_instruments, batch=1, nb_steps=1, step_size, input_size, channels) preds = midi.create.normalize_activation( preds, mono=self.mono, use_binary=self.use_binary) preds_index = step + self.nb_steps + (self.predict_offset - 1) generated[instrument_to_remove, :, preds_index:preds_index + 1] = preds[instrument_to_remove] bar.update(instrument * length + step) if self.mono: # If mono we say the first measures have no notes generated[instrument_to_remove, :, :self.nb_steps, :, -1] = 1 # generated : (nb_instruments, batch=1, nb_steps, step_size, input_size, channels) all_arrays.append(generated) # all_arrays: List(nb_instruments + 1)[(nb_instruments, batch=1, nb_steps, step_size, input_size, channels)] bar.finish() self.save_midis_path.mkdir(exist_ok=True, parents=True) generated_midi = [ self.reshape_generated_array(arr) for arr in all_arrays ] # Save the truth accuracies, accuracies_inst = self.compute_generated_array( generated_array=generated_midi[0], folder_path=self.save_midis_path, name='redo_song_generate_truth', no_duration=no_duration, save_images=save_images, ) accuracies, accuracies_inst = [accuracies], [accuracies_inst] for inst in range(self.nb_instruments): acc, acc_inst = self.compute_generated_array( generated_array=generated_midi[inst + 1], folder_path=self.save_midis_path, name= f'redo_song_generate_{inst}_(inst_{instrument_order[inst]})', no_duration=no_duration, array_truth=generated_midi[0], save_images=save_images, save_truth=False, ) accuracies.append(acc) accuracies_inst.append(acc_inst) if self.batch is not None: self.sequence.batch_size = self.batch self.save_generated_arrays_cross_images( generated_arrays=generated_midi, folder_path=self.save_midis_path, name='redo_song_all', titles=['Truth'] + [ f'Iteration {i}: change inst {instrument_order[i]}' for i in range(self.nb_instruments) ], subtitles=[ f'Acc: {accuracies[i]}, Acc inst: {accuracies_inst[i]}' for i in range(self.nb_instruments + 1) ]) summary.summarize( # Function params path=self.save_midis_path, title=self.full_name, file_name='redo_song_replicate_summary.txt', # Summary params song_number=song_number, instrument_order=instrument_order, no_duration=no_duration, noise=noise, # Generic summary **self.summary_dict) cprint('Done redo song generate', 'green')
def main(): browser = create_client() conn = sqlite3.connect('links.db') conn.row_factory = sqlite3.Row videos_info = conn.execute( f'select * from videos where downloaded = 0 and download_forbidden isnull' ).fetchall() widgets = [ progressbar.Percentage(), ' ', progressbar.Counter(), ' ', progressbar.Bar(), ' ', progressbar.FileTransferSpeed() ] pbar = progressbar.ProgressBar(widgets=widgets, max_value=len(videos_info)).start() for i, video_info in enumerate(videos_info): pbar.update(i) video_info = dict(video_info) video_id = video_info['video_id'] browser.visit(video_info['video_url']) while browser.is_element_present_by_css( '.recaptchaContent'): # sometimes wild captcha appears print("CAPTCHA NEEDED") sleep(60) if browser.is_element_present_by_css('.removed'): # video has been removed print('video has been removed\n') with conn: conn.execute( f'UPDATE videos SET download_forbidden = 1 where video_id = "{video_id}"' ) continue if not browser.is_element_present_by_css( '.premiumIconTitleOnVideo:visible' ) and not browser.is_element_present_by_css('#videoTitle'): # video has been removed print('video is somehow broken and not premiuzm\n') with conn: conn.execute( f'UPDATE videos SET download_forbidden = 1 where video_id = "{video_id}"' ) continue video_title = browser.find_by_css('#videoTitle').text # type: str # because of f*****g windows video_title = video_title.replace(':', '').replace('?', '').replace('*', '').replace('"', '').replace('/', '') \ .replace('\\', '') browser.find_by_id('player').click() # pausing video browser.find_by_tag('body')._element.send_keys('M') # muting video file_name = f'videos/{video_id}-{video_title}.mp4' if osp.exists(file_name): with conn: conn.execute( f'UPDATE videos SET downloaded = 1 where video_id = "{video_id}"' ) continue if browser.is_element_present_by_css( '.tab-menu-item.js-paidDownload[data-tab="download-tab"]'): # video has been removed print('video download is paid\n') with conn: conn.execute( f'UPDATE videos SET download_forbidden = 1 where video_id = "{video_id}"' ) continue download_tab_button_sel = '.tab-menu-item[data-tab="download-tab"]' vr_tab_button_sel = '.tab-menu-item[data-tab="vr-tab"]' if not browser.is_element_present_by_css(download_tab_button_sel) \ and browser.is_element_present_by_css(vr_tab_button_sel): # video has been removed print('video is vr, no download\n') with conn: conn.execute( f'UPDATE videos SET download_forbidden = 1 where video_id = "{video_id}"' ) continue click_download_tab(browser, download_tab_button_sel) if is_download_forbidden(browser, conn, video_id): continue download_link = get_download_link(browser) # must have here headers, otherwise it behaves as api and does not serve the video for _ in range(5): try: request.urlretrieve(download_link, file_name) break except URLError: print('connection failed, trying again\n') print(file_name, 'downloaded\n') with conn: conn.execute( f'UPDATE videos SET downloaded = 1 where video_id = "{video_id}"' ) pbar.finish() print('done')
# Print input file in human-readable format print("Input (.csv) file size: ", sizeof_fmt(statinfo_in.st_size)) # Determine number of lines in input file to be used for progress bar fname = inFile num_lines = 0 with open(fname, 'r') as f: for line in f: num_lines += 1 print("Number of lines in csv file:", num_lines) # Set widgets for progress bar widgets = [ 'Converting csv to json. Percentage completed:', pb.Percentage(), ' ', pb.Bar(marker='█'), ' ', pb.ETA() ] # Create progress bar and initialize bar = pb.ProgressBar(widgets=widgets, maxval=num_lines).start() def csv_to_json(csv_path, json_path): # Open connection to csv file with open(csv_path, 'r') as csv_file: # Create reader object; a dictionary of the csv file reader = csv.DictReader(csv_file) # Open output json file with open(json_path, 'w') as json_file:
START_IDX = 0 VERBOSE = False TEST = True output = 'auto_spaced_review_' + str(START_IDX) + '-%s.json' texts = texts[START_IDX:] start_idx = 0 data = "" next_text = "" import progressbar bar = progressbar.ProgressBar(maxval=len(texts), \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) is_saved = False try: for idx, text in enumerate(texts): bar.update(idx + 1) data += text + '\r\n' if len(data) < 1000000: continue new_text = auto_spacing(data).split('<br>') if len(new_text) == 1: new_text = new_text[0].split('<br/>') for i, review in enumerate(reviews[start_idx:idx + 1]):
def search_query(self): @retry(elasticsearch.exceptions.ConnectionError, tries=TIMES_TO_TRY) def next_scroll(scroll_id): return self.es_conn.scroll(scroll=self.scroll_time, scroll_id=scroll_id) search_args = dict(index=','.join(self.opts.index_prefixes), scroll=self.scroll_time, size=self.opts.scroll_size, terminate_after=self.opts.max_results) if self.opts.doc_types: search_args['doc_type'] = self.opts.doc_types if self.opts.query.startswith('@'): query_file = self.opts.query[1:] if os.path.exists(query_file): with open(query_file, 'r') as f: self.opts.query = f.read() else: print('No such file: %s' % query_file) exit(1) if self.opts.raw_query: try: query = json.loads(self.opts.query) except ValueError as e: print('Invalid JSON syntax in query. %s' % e) exit(1) search_args['body'] = query else: query = self.opts.query if not self.opts.tags else '%s AND tags:%s' % ( self.opts.query, '(%s)' % ' AND '.join(self.opts.tags)) search_args['q'] = query if '_all' not in self.opts.fields: search_args['_source_include'] = ','.join(self.opts.fields) self.csv_headers.extend( [field for field in self.opts.fields if '*' not in field]) if self.opts.debug_mode: print('Using these indices: %s' % ', '.join(self.opts.index_prefixes)) print('Query[%s]: %s' % (('Query DSL', json.dumps(query)) if self.opts.raw_query else ('Lucene', query))) print('Output field(s): %s' % ', '.join(self.opts.fields)) res = self.es_conn.search(**search_args) self.num_results = res['hits']['total'] print('Found %s results' % self.num_results) if self.opts.debug_mode: print(json.dumps(res)) if self.num_results > 0: open(self.opts.output_file, 'w').close() open(self.tmp_file, 'w').close() hit_list = [] total_lines = 0 widgets = [ 'Run query ', progressbar.Bar(left='[', marker='#', right=']'), progressbar.FormatLabel(' [%(value)i/%(max)i] ['), progressbar.Percentage(), progressbar.FormatLabel('] [%(elapsed)s] ['), progressbar.ETA(), '] [', progressbar.FileTransferSpeed(unit='docs'), ']' ] bar = progressbar.ProgressBar(widgets=widgets, maxval=self.num_results).start() while total_lines != self.num_results: if res['_scroll_id'] not in self.scroll_ids: self.scroll_ids.append(res['_scroll_id']) if not res['hits']['hits']: print( 'Scroll[%s] expired(multiple reads?). Saving loaded data.' % res['_scroll_id']) break for hit in res['hits']['hits']: total_lines += 1 bar.update(total_lines) hit_list.append(hit) if len(hit_list) == FLUSH_BUFFER: self.flush_to_file(hit_list) hit_list = [] if self.opts.max_results: if total_lines == self.opts.max_results: self.flush_to_file(hit_list) print('Hit max result limit: %s records' % self.opts.max_results) return res = next_scroll(res['_scroll_id']) self.flush_to_file(hit_list) bar.finish()