def get_total_data_size(): features = spec_manager.get_feature_specs() sizes = [] for feature in features: if feature['gdb_id'] == "" or feature['shape_id'] == "": continue name = feature['sgid_name'] print name size = user_drive.get_size(feature['gdb_id']) * 0.000001 size += user_drive.get_size(feature['shape_id']) * 0.000001 time.sleep(0.1) print '\t', size sizes.append(size) if len(sizes) == 10: return print 'Total feature MBs:', sum(sizes) packages = spec_manager.get_package_specs() for package in packages: if package['gdb_id'] == "" or package['shape_id'] == "": continue name = package['name'] print name size = user_drive.get_size(package['gdb_id']) * 0.000001 size += user_drive.get_size(package['shape_id']) * 0.000001 time.sleep(0.1) print '\t', size sizes.append(size) print 'total specs:', len(features) + len(packages) print 'total sizes:', len(sizes) print 'Total MBs:', sum(sizes)
def run_features(workspace, output_directory, feature_list_json=None, load=True, force=False, category=None, skip_packages=False, update_cycles=None): """ CLI option to update all features in spec_manager.FEATURE_SPEC_FOLDER or just those in feature_list_json. feature_list_json: json file with array named "features" """ run_all_lists = None features = [] if not feature_list_json: for feature_spec in spec_manager.get_feature_specs(update_cycles): if feature_spec['sgid_name'] != '' and\ (category is None or category.upper() == feature_spec['category'].upper()): features.append(feature_spec['sgid_name']) else: with open(feature_list_json, 'r') as json_file: run_all_lists = json.load(json_file) features = run_all_lists['features'] packages = [] for feature in features: packages.extend( update_feature(workspace, feature, output_directory, load_to_drive=load, force_update=force)) print '{} packages updated'.format(len(packages))
def get_feature_download_links(): features = spec_manager.get_feature_specs() feature_links = {} for feature in features: feature_links[feature['sgid_name'].lower()] = { 'gdb': driver.get_download_link(feature['gdb_id']), 'shp': driver.get_download_link(feature['shape_id']) } spec_manager.save_spec_json(feature_links, 'data/feature_downloads.json')
def add_permissions(category, user_email): features = spec_manager.get_feature_specs() for feature in features: if category is None or category.upper() == feature['category'].upper(): ids = [feature['gdb_id'], feature['hash_id'], feature['shape_id']] for file_id in ids: print user_drive.add_editor(file_id, user_email), feature['name'] time.sleep(0.2)
def set_cycle_by_date_in_name(): dated = re.compile(r'\d{4}') for feature in spec_manager.get_feature_specs(): sgid_name = feature['sgid_name'] matches = dated.findall(sgid_name) if len(matches) == 1 and feature['update_cycle'] == 'day': print sgid_name, matches feature['update_cycle'] = spec_manager.UPDATE_CYCLES.NEVER spec_manager.save_spec_json(feature)
def check_empty_gdb_ids(): features = spec_manager.get_feature_specs() for feature in features: if feature['gdb_id'] == "": cat_id = user_drive.get_file_id_by_name_and_directory( feature['category'], '0ByStJjVZ7c7mNlZRd2ZYOUdyX2M') f_id = user_drive.get_file_id_by_name_and_directory( feature['name'], cat_id) time.sleep(0.01) if f_id is None: print "'{}',".format(feature['sgid_name'])
def find_id(drive_id): features = spec_manager.get_feature_specs() for feature in features: try: ids = [ feature['gdb_id'], feature['hash_id'], feature['shape_id'], feature['parent_ids'][0] ] except IndexError: print 'no parents', feature['name'] if drive_id in ids: print feature['name']
def get_hash_size_csv(): features = spec_manager.get_feature_specs() out_csv = 'data/hash_sizes' hash_size_records = [['name', 'hash_size', 'cycle']] for feature in features: if feature['hash_id'] == "": continue name = feature['sgid_name'] print name size = user_drive.get_size(feature['hash_id']) time.sleep(0.5) cycle = feature['update_cycle'] print '\t', size hash_size_records.append([name, size, cycle]) with open(out_csv, 'wb') as out_table: table = csv.writer(out_table) table.writerows(hash_size_records)
def set_cycle_by_csv(): update_csv = 'data/update_cycle.csv' update_cycles = {} with open(update_csv, 'rb') as cycles: reader = csv.DictReader(cycles) for row in reader: name = row['SGID name'] update = row['Update frequency'] if update == 'on-demand': update = 'demand' update_cycles[name] = update for feature in spec_manager.get_feature_specs(): sgid_name = feature['sgid_name'] if sgid_name in update_cycles: feature['update_cycle'] = update_cycles[sgid_name] spec_manager.save_spec_json(feature) # print sgid_name, feature['update_cycle'], update_cycles[sgid_name] else: print sgid_name, 'not found!!!'
def get_spec_property_csv(properties): features = spec_manager.get_feature_specs() output_rows = [] out_csv = 'data/properties.csv' count = 0 for feature in features: if count % 50 == 0: print count count += 1 out_row = [feature[p] for p in properties] if feature['gdb_id'] == "": print feature['sgid_name'] continue out_row.append(float(user_drive.get_size(feature['gdb_id'])) / 1048576) time.sleep(0.01) output_rows.append(out_row) print count with open(out_csv, 'wb') as out_table: table = csv.writer(out_table) table.writerow(properties + ['MB']) table.writerows(output_rows)