def init(): if not config.facebook_access_token: util.load_from_json() if not config.facebook_access_token: config.facebook_access_token = authorize( config.facebook_app_id, config.facebook_app_secret) util.save_to_json()
def init(): if not config.github_access_token: util.load_from_json() if not config.github_access_token: config.github_access_token = authorize( config.github_client_id, config.github_client_secret) util.save_to_json()
def init(): if not config.google_access_token or not config.google_id_token: util.load_from_json() if not config.google_access_token or not config.google_id_token: config.google_access_token, config.google_id_token, config.google_refresh_token \ = authorize(config.google_client_id, config.google_client_secret) util.save_to_json()
def init(app_only=False, force=False): if force: if app_only: config.twitter_bearer = application_only_authentication.get_assess_token( config.twitter_consumer_key, config.twitter_consumer_secret) else: config.twitter_access_token, config.twitter_access_token_secret = \ oauth.authorize(config.twitter_consumer_key, config.twitter_consumer_secret) util.save_to_json() if app_only: if not config.twitter_bearer: util.load_from_json() if not config.twitter_bearer: config.twitter_bearer = application_only_authentication.get_assess_token( config.twitter_consumer_key, config.twitter_consumer_secret) util.save_to_json() else: if not config.twitter_access_token or not config.twitter_access_token_secret: util.load_from_json() if not config.twitter_access_token or not config.twitter_access_token_secret: config.twitter_access_token, config.twitter_access_token_secret = \ oauth.authorize(config.twitter_consumer_key, config.twitter_consumer_secret) util.save_to_json()
def build_training_records(local_root='.', data_version=None, model_version=None): if data_version is None: raise ValueError("data_version cannot be None") if model_version is None: raise ValueError("model_version cannot be None") resolution_path = os.path.join(local_root, data_version, model_version) if not os.path.isdir(resolution_path): raise ValueError("Invalid resolution path: {}".format(resolution_path)) res_path = os.path.join(resolution_path, "res.js") if not os.path.isfile(res_path): raise ValueError("Invalid resolution file: {}".format(res_path)) res = load_from_json(json_path=res_path, json_prefix='res = ') training_records = res[ 'training_records'] if 'training_records' in res else None if training_records is None: raise ValueError( "missing 'training_records' from resolution file: {}".format( res_path)) category_map_path = os.path.join(resolution_path, 'category_map.js') if not os.path.isfile(category_map_path): raise ValueError( "Invalid class protobuf file: {}".format(category_map_path)) category_map = load_from_json(json_path=category_map_path, json_prefix='cat = ') product = res['product'] if 'product' in res else None if product is None: raise ValueError("No product in res file.") part_records = [ (k, v['filename'] if 'filename' in v else "{}.record".format(k), v['num_shards'] if 'num_shards' in v else 1) for k, v in training_records.items() ] for part, record, shards in part_records: image_root = os.path.join(resolution_path, part) dirs = [ d for d in [os.path.join(image_root, k) for k in product] if os.path.isdir(d) ] print("Reading directories: {}".format(dirs)) xsheet = xml_to_dataframe(dirs) if len(xsheet) == 0: print("No data found in part: {}".format(part)) continue # save the datasheet in the part directory xsheet.to_csv(os.path.join(image_root, 'data.csv')) tfr.processCsvFileShards(xsheet=xsheet, image_root=image_root, output_path=os.path.join( resolution_path, '{}.record'.format(part)), category_numbers=category_map, num_shards=shards) print("Created {} records: shards={}".format(part, shards))
def build_data(local_root='.', data_root=None, data_version=None, model_version=None, train_csv='train.csv', eval_csv='eval.csv', progress_every=500, overwrite=False): if data_root is None: raise ValueError("data_root cannot be None") if data_version is None: raise ValueError("data_version cannot be None") if model_version is None: raise ValueError("model_version cannot be None") resolution_path = os.path.join(local_root, data_version, model_version) if not os.path.isdir(resolution_path): raise ValueError("Invalid resolution path: {}".format(resolution_path)) res_path = os.path.join(resolution_path, "res.js") if not os.path.isfile(res_path): raise ValueError("Invalid resolution file: {}".format(res_path)) res = load_from_json(json_path=res_path, json_prefix='res = ') output_resolution = res[ 'output_resolution'] if 'output_resolution' in res else None if output_resolution is None: raise ValueError("No output_resolution in res file.") product = res['product'] if 'product' in res else None if product is None: raise ValueError("No product in res file.") product_parts = product['parts'] if 'parts' in product else [] product_scaled = product['scaled'] if 'scaled' in product else None product_tiled = product['tiled'] if 'tiled' in product else None parts_sheets = [] if 'eval' in product_parts: eval_csv_path = os.path.join(resolution_path, eval_csv) if not os.path.isfile(eval_csv_path): raise ValueError("Missing eval csv file: {}".format(eval_csv_path)) parts_sheets = parts_sheets + [ ('eval', pd.read_csv(eval_csv_path, index_col='index')) ] print("Loaded eval csv file: {}".format(eval_csv)) if 'train' in product_parts: train_csv_path = os.path.join(resolution_path, train_csv) if not os.path.isfile(train_csv_path): raise ValueError( "Missing train csv file: {}".format(train_csv_path)) parts_sheets = parts_sheets + [ ('train', pd.read_csv(train_csv_path, index_col='index')) ] print("Loaded train csv file: {}".format(train_csv)) background_classes = res[ 'background_classes'] if 'background_classes' in res else None # in this case we rely on this directory only containing images with no significant categories background_image_generator = production.get_background_image_generator( background_dir=os.path.join(data_root, 'unclassified')) if product_scaled is not None: print("Generating scaled products...") for scaling in product_scaled: prefix = scaling[0] container_size = scaling[1] area_ratio = scaling[2] for part, xsheet in parts_sheets: output_path = os.path.join(resolution_path, part, 'scaled') tile_prefix = 'scaled_{}_{}x{}@{}x{}'.format( prefix, container_size[0], container_size[1], output_resolution[0], output_resolution[1]) if os.path.isdir(output_path): num_existing = len([ f for f in os.listdir(output_path) if f.startswith(tile_prefix) and f.endswith('.xml') ]) if num_existing > 0: if overwrite: print(" [{}] clearing [{}] {} entries.".format( part, tile_prefix, num_existing)) for f in [ f for f in os.listdir(output_path) if f.startswith(tile_prefix) ]: os.remove(os.path.join(output_path, f)) else: print(" [{}] skipping [{}] {} entries.".format( part, tile_prefix, num_existing)) continue print(" building scaled product part: {}/{}*".format( part, tile_prefix)) num_items = len(xsheet) num_checked = 0 count = 0 for i, row in xsheet.iterrows(): values = [i] + row.values.tolist() folder = values[folder_index] image_filename = values[filename_index] meta_filename = image_filename.replace(".jpg", ".xml") image_tiles = tiling.get_image_tiles( meta_path=os.path.join(data_root, folder, meta_filename), crop_scale=container_size, padder=None, no_meta=background_classes, constraint=lambda box: production.area_within_limit( box, container_size, ratio=area_ratio)) num_checked = num_checked + 1 if len(image_tiles) > 0: num_tiles_created = tiling.export_tiles( tiles=image_tiles, seq=image_filename.split(".")[0], tile_prefix=tile_prefix, no_meta=background_classes, container_size=container_size, output_size=output_resolution, output_path=output_path) if num_tiles_created > 0: count = count + 1 if num_checked % progress_every == 0: print( " [{}] scaled products examined: {}/{}".format( part, num_checked, num_items)) print(" [{}] scaled products exported: {}/{}".format( part, count, num_items)) if product_tiled is not None: for tiling_details in product_tiled: crop_scale = tiling_details['crop'] key = tiling_details['key'] pad = tiling_details['pad'] apertures = tiling_details['apertures'] print("Generating tiled product: {}".format(key)) for part, xsheet in parts_sheets: tiles = [] output_path = os.path.join(resolution_path, part, 'tiled') print(" building tiles: {} crop={}".format(part, crop_scale)) num_items = len(xsheet) for i, row in xsheet.iterrows(): values = [i] + row.values.tolist() folder = values[folder_index] image_filename = values[filename_index] meta_filename = image_filename.replace(".jpg", ".xml") # including unclassified image_tiles = tiling.get_image_tiles( meta_path=os.path.join(data_root, folder, meta_filename), crop_scale=crop_scale, padder=lambda size: pad) tiles.extend(image_tiles) print(" obtained tiles: {} crop={}, num_tiles={}".format( part, crop_scale, len(tiles))) for aperture in apertures: prefix = aperture[0] container_size = aperture[1] area_ratio = aperture[2] repeats = aperture[3] tile_prefix = 'tiled_{}_{}x{}@{}x{}'.format( prefix, container_size[0], container_size[1], output_resolution[0], output_resolution[1]) if os.path.isdir(output_path): num_existing = len([ f for f in os.listdir(output_path) if f.startswith(tile_prefix) and f.endswith('.xml') ]) if num_existing > 0: if overwrite: print( " [{}] removing from [{}] {} entries.". format(part, tile_prefix, num_existing)) for f in [ f for f in os.listdir(output_path) if f.startswith(tile_prefix) ]: os.remove(os.path.join(output_path, f)) else: print(" [{}] skipping [{}] {} entries.". format(part, tile_prefix, num_existing)) continue for i in range(repeats): num_tiles_created = tiling.export_tiles( tiles=tiles, seq=i, tile_prefix=tile_prefix, no_meta=background_classes, container_size=container_size, tile_margin=(0, 0, 0, 0), image_acceptor_factors=(0, 0, 0, 0), output_size=output_resolution, output_path=output_path, background_image_generator= background_image_generator, constraint=lambda box: production. area_within_limit( box, container_size, ratio=area_ratio)) print(" [{}] tiled products exported: {}_{}={}". format(part, tile_prefix, i, num_tiles_created)) tiles = None