def save_flavors_info(request, flavors_ids, path): path = u.join_path(path, "flavors.json") flavors = [] for f_id in flavors_ids: flavor_info = nova.flavor_get(request, f_id).to_dict() flavors.append(flavor_info) flavors_info = {"flavors": flavors} u.save_to_json(path, flavors_info)
def save_images_tag(request, image_ids, path): path = u.join_path(path, "images.json") images = [] for image_id in image_ids: image_info = sahara.image_get(request, image_id).to_dict() images.append(image_info) images_info = {"images": images} u.save_to_json(path, images_info)
def update_corpus_ids(doc_id): corpus_ids_file = "./visualization/corpus/corpus_ids.json" corpus_ids = get_json_data(corpus_ids_file) if os.path.isfile( corpus_ids_file) else [] if doc_id not in corpus_ids: corpus_ids = [doc_id] + corpus_ids save_to_json(corpus_ids_file, corpus_ids) print('corpus ids updated: %s' % doc_id)
def main(): for filename in sorted(glob.glob(directory)): language = filename.split("/")[-1].split(".")[0] if args.language: if language != args.language: continue print(f"\nLanguage:\t{language}") if args.check_os == "y": if os.path.isfile(f"data/covid19/processed/{language}.json"): print(f"{language} has already been analyzed, moving on...") continue if language not in set(countries_per_language.keys()): continue input_data = utils.read_from_json(filename) output_data = {} days = sorted(input_data.keys()) nr_days = len(days) if nr_days < 10: continue previous_links = [] previous_links_locations = {} previous_references = [] previous_references_locations = {} references_origins = Counter() links_origins = Counter() for n, day in enumerate(days): print("Processing day %s of %s:\t%s" % (n, nr_days, day)) timestamps_output = { "links": {}, "references": {} } links = sorted(input_data[day]["links"]) references = sorted(input_data[day]["references"]) links_countries = get_links_locations(links, previous_links, previous_links_locations, language) # dict timestamps_output["links"] = links_countries previous_links = links previous_links_locations = links_countries references_countries = get_reference_locations(references, previous_references, previous_references_locations) timestamps_output["references"] = references_countries previous_references = references previous_references_locations = references_countries #print("Completed day %s of %s" % (n, nr_days)) #print(timestamps_output, "\n\n") output_data[day] = timestamps_output utils.save_to_json(language, "processed", output_data)
def save_security_group_info(request, security_group_ids, path): path = u.join_path(path, "security_groups.json") sg = [] sg_manager = nova.SecurityGroupManager(request) for sg_id in security_group_ids: sg_info = sg_manager.get(sg_id).to_dict() sg.append(sg_info) sgs_info = {"security_groups": sg} u.save_to_json(path, sgs_info)
def save_cluster_info(request, cluster_id, path): cluster_info = sahara.cluster_get(request, cluster_id).to_dict() path = u.join_path(path, "cluster.json") u.save_to_json(path, cluster_info) ct_id = cluster_info['cluster_template_id'] ukp_id = cluster_info['user_keypair_id'] default_image_id = cluster_info['default_image_id'] instance_ids = get_instances_id(cluster_info['node_groups']) return ct_id, ukp_id, default_image_id, instance_ids
def save_cluster_template_info(request, ct_id, path): ct_info = sahara.cluster_template_get(request, ct_id).to_dict() path = u.join_path(path, "clusterTemplate.json") node_groups_template_ids = [] _node_groups = ct_info['node_groups'] for ng in _node_groups: node_groups_template_ids.append(ng['node_group_template_id']) u.save_to_json(path, ct_info) return node_groups_template_ids
def genrate_gold_gui_data(corpus_dir, doc_id, data_file): data_reader = jsonlines.open(data_file) # handle the case that the doc_id already exists. if check_duplicate_dir(corpus_dir): sys.exit() doc_ids = [] for doc_dict in data_reader.iter(): doc = Document(doc_dict) doc_ids.append(doc.doc_id) # doc data doc_data = doc.get_visualize_data() doc_data_file = "%s/span/%s.json" % (corpus_dir, doc.doc_id) save_to_json(doc_data_file, doc_data) # surface data surface_data = doc.get_surface_data() surface_data_file = "%s/detail/%s.json" % (corpus_dir, doc.doc_id) save_to_json(surface_data_file, surface_data) # cluster data cluster_data = doc.get_cluster_data() cluster_data_file = "%s/coref/%s.json" % (corpus_dir, doc.doc_id) save_to_json(cluster_data_file, cluster_data) # doc ids and corpus ids. doc_ids_file = "%s/doc_ids.json" % corpus_dir save_to_json(doc_ids_file, doc_ids) update_corpus_ids(doc_id)
def infer(self, image_np, crop_path, output_folder): # TODO see if we can get some batch parellism image_np = np.expand_dims(image_np, axis=0) preds = [ self.model.do_test(self.polySess, image_np, top_k) for top_k in range(_FIRST_TOP_K) ] # sort predictions based on the eval score and pick the best preds = sorted(preds, key=lambda x: x["scores"][0], reverse=True)[0] if FLAGS.Use_ggnn: polys = np.copy(preds["polys"][0]) feature_indexs, poly, mask = utils.preprocess_ggnn_input(polys) preds_gnn = self.ggnnModel.do_test(self.ggnnSess, image_np, feature_indexs, poly, mask) output = { "polys": preds["polys"], "polys_ggnn": preds_gnn["polys_ggnn"] } else: output = {"polys": preds["polys"]} # dumping to json files json_name = save_to_json(output_folder, crop_path, output) self.vis(json_name)
def save_node_groups_info(request, node_groups_template_ids, path): path = u.join_path(path, "node_groups.json") node_groups = [] flavor_ids = [] image_ids = [] security_group_ids = [] for ng_id in node_groups_template_ids: ng_info = sahara.nodegroup_template_get(request, ng_id).to_dict() node_groups.append(ng_info) flavor_ids = append_to_list(ng_info['flavor_id'], flavor_ids) image_ids = append_to_list(ng_info['image_id'], image_ids) for sgi in ng_info['security_groups']: security_group_ids = append_to_list(sgi, security_group_ids) ngs_info = {"node_groups": node_groups} u.save_to_json(path, ngs_info) return flavor_ids, image_ids, security_group_ids
def generate_test_gui_data(corpus_dir, exp_id, test_file, model_file, word_vectors_file): from eval import get_clusters sys_clusters = get_clusters(model_file, word_vectors_file, test_file, get_gold_clusters=False) for doc_id, doc_clusters in sys_clusters.items(): # save cluster to file. output_file = "%s/coref/%s.json" % (corpus_dir, doc_id) save_to_json(output_file, get_cluster_data_for_gui(doc_clusters)) # doc ids and corpus ids. doc_ids_file = "%s/doc_ids.json" % corpus_dir save_to_json(doc_ids_file, list(sys_clusters.keys())) update_corpus_ids(exp_id)
def main(): for filename in sorted(glob.glob(input_directory)): language = filename.split("/")[-1].split(".")[0] if args.language: if language != args.language: continue print("\nLanguage:\t", language) if args.check_os == "y": if os.path.isfile(f"data/weekly/{language}.png"): print(f"{language} has already been processed, moving on...") continue input_data = utils.read_from_json(filename) day_data = get_day_data(input_data) week_data = get_week_data(day_data) utils.save_to_json(language, "weekly", week_data) print("done")
def manipulate(self, strategy_func, strategy_name, pick_strategy, manipulation_clas, network_name): self.global_homophilies = [] class_partitions = [] nodes_with_manipulation_clas = [ node for node in self.G.nodes() if self.get_node_class(node) == manipulation_clas ] class_partitions.append(len(nodes_with_manipulation_clas) / self.size) homo_list_before = self.local_homophily() nodes_to_remove = [ node for node in self.G.nodes() if self.get_node_class(node) != manipulation_clas ] utils.save_to_file(homo_list_before, network_name, '{0}_homo_list_before'.format(strategy_name)) ''' add, remove or change node ''' strategy_func(nodes_to_remove, nodes_with_manipulation_clas, class_partitions, pick_strategy, manipulation_clas) homo_list_after = self.local_homophily() utils.save_to_file(homo_list_after, network_name, '{0}_homo_list_after'.format(strategy_name)) utils.save_to_file(self.global_homophilies, network_name, '{0}_global_homophilies'.format(strategy_name)) utils.plot_local_homophily(homo_list_before, homo_list_after, network_name, strategy_name) utils.plot_global_homophily(self.global_homophilies, network_name, strategy_name) utils.save_to_file(class_partitions, network_name, '{0}_class_partitions'.format(strategy_name)) utils.save_to_json(self.homophily_per_clas, network_name, '{0}_homophily_per_clas'.format(strategy_name)) utils.plot_all(class_partitions, self.global_homophilies, self.homophily_per_clas, manipulation_clas, network_name, strategy_name)
#!/usr/bin/python # -*- coding: utf-8 -*- import os import scraper import utils PRODUCTS = [ # http://www.vinbudin.is/heim/vorur/vorur.aspx/?text=Grevens '22006', '22837', '23282', '22004' ] if __name__ == '__main__': print 'Working ...' directory = 'products/' if not os.path.exists(directory): os.makedirs(directory) for product_id in PRODUCTS: print 'Fetching data for product %s ...' % (product_id, ) product_data = scraper.get_vinbudin_product_data(product_id) print 'Writing data to file ...' utils.save_to_json('%s%s.json' % (directory, product_id), product_data, pretty=True) print 'Done.' print 'All done.'
def save_final_results_procrutes(args): results = args.__dict__ utils.save_to_json(results, args.out_folder + "/final_results.json")
def save_final_results_ensemble(args): results = args.__dict__ utils.save_to_json(results, args.out_folder + "/final_results.json")
def save_final_results_compress(args, range_limit): results = args.__dict__ results["results"] = {"range_limit": range_limit} utils.save_to_json(results, args.out_folder + "/final_results.json")
def __init__(self, state_size, action_size, num_agents, agent_index, writer, random_seed, dirname, print_every=1000, model_path=None, saved_config=None, eval_mode=False): """Initialize an Agent object. Parameters: state_size (int): dimension of each state action_size (int): dimension of each action num_agents (int): number of agents agent_index (int): index (id) of current agent writer (object): visdom visualiser for realtime visualisations random_seed (int): random seed dirname (string): output directory to store config, losses print_every (int): how often to print progress model_path (string): if defined, load saved model to resume training eval_mode (bool): whether to use eval mode """ self.state_size = state_size self.action_size = action_size self.seed = random.seed(random_seed) self.agent_index = agent_index self.writer = writer self.dirname = dirname self.print_every = print_every # save config params if not saved_config: self.config = CONFIG save_to_json(self.config, '{}/hyperparams.json'.format(self.dirname)) else: self.config = json.load(open(saved_config, 'r')) logger.info( 'Loading config from saved location {}'.format(saved_config)) # Create Critic network self.local_critic = Critic(self.state_size * num_agents, self.action_size * num_agents, random_seed, fc1_units=self.config['FC1'], fc2_units=self.config['FC2']).to(device) self.target_critic = Critic(self.state_size * num_agents, self.action_size * num_agents, random_seed, fc1_units=self.config['FC1'], fc2_units=self.config['FC2']).to(device) # Optimizer self.critic_optimizer = optim.Adam( self.local_critic.parameters(), lr=self.config['LR_CRITIC'], weight_decay=self.config['WEIGHT_DECAY']) # Create Actor network self.local_actor = Actor(self.state_size, self.action_size, random_seed, fc1_units=self.config['FC1'], fc2_units=self.config['FC2']).to(device) self.target_actor = Actor(self.state_size, self.action_size, random_seed, fc1_units=self.config['FC1'], fc2_units=self.config['FC2']).to(device) self.actor_optimizer = optim.Adam(self.local_actor.parameters(), lr=self.config['LR_ACTOR']) # Load saved model (if available) if model_path: logger.info('Loading model from {}'.format(model_path)) self.local_actor.load_state_dict( torch.load('{}/checkpoint_actor_{}.pth'.format( model_path, self.agent_index))) self.target_actor.load_state_dict( torch.load('{}/checkpoint_actor_{}.pth'.format( model_path, self.agent_index))) self.local_critic.load_state_dict( torch.load('{}/checkpoint_critic_{}.pth'.format( model_path, self.agent_index))) self.target_critic.load_state_dict( torch.load('{}/checkpoint_critic_{}.pth'.format( model_path, self.agent_index))) if eval_mode: logger.info('agent {} set to eval mode') self.actor_local.eval() self.noise = OUNoise(self.action_size, random_seed, sigma=self.config['SIGMA']) self.learn_step = 0
'num_epochs': 10, 'learning_rate': params['learning_rate'], 'pos_weighting': 20.0 } dataset_params = { 'train_batch_size': params['batch_size'], 'eval_batch_size': 1, 'shuffle': True, 'num_workers': 1, 'num_of_slots': 35 } # model if torch.cuda.is_available(): model = DST(**model_params).cuda() else: model = DST(**model_params) utils.set_logger(os.path.join(args.model_dir, 'eval.log')) logging.info('Starting evalutation') utils.load_checkpoint( os.path.join(args.model_dir, args.model_checkpoint_name), model) eval_metrics, total_loss_eval, eval_avg_goal_acc, eval_joint_goal_acc, avg_slot_precision = evaluate( model, evaluation_data, args.model_dir, dataset_params, device) save_path = os.path.join(args.model_dir, "metrics_test.json") utils.save_to_json(eval_metrics, save_path)
def save_key_pair(request, user_key_pair_id, path): key_pair_info = nova.keypair_get(request, user_key_pair_id).to_dict() path = u.join_path(path, "keypair.json") u.save_to_json(path, key_pair_info) return True
def __init__(self, state_size, action_size, num_agents, writer, random_seed, dirname, print_every=100, model_path=None, eval_mode=False): """Initialize an Agent object. Params ====== state_size (int): dimension of each state action_size (int): dimension of each action num_agents (int): number of agents writer (object): visdom visualiser for realtime visualisations random_seed (int): random seed dirname (string): output directory to store config, losses print_every (int): how often to print progress model_path (string): if defined, load saved model to resume training """ self.state_size = state_size self.action_size = action_size self.num_agents = num_agents self.seed = random.seed(random_seed) self.dirname = dirname self.print_every = print_every # save config params save_to_json(config, '{}/hyperparams.json'.format(self.dirname)) # Actor Networks (w/ Target Networks) self.actor_local = [ Actor(state_size, action_size, random_seed, fc1_units=config['FC1'], fc2_units=config['FC2'], use_bn=config["USE_BATCHNORM"]).to(device) for _ in range(num_agents) ] self.actor_target = [ Actor(state_size, action_size, random_seed, fc1_units=config['FC1'], fc2_units=config['FC2'], use_bn=config["USE_BATCHNORM"]).to(device) for _ in range(num_agents) ] self.actor_optimizer = [ optim.Adam(self.actor_local[i].parameters(), lr=config["LR_ACTOR"]) for i in range(num_agents) ] # Critic Networks (w/ Target Networks) self.critic_local = [ Critic(state_size, action_size, random_seed, fc1_units=config['FC1'], fc2_units=config['FC2'], use_bn=config["USE_BATCHNORM"]).to(device) for _ in range(num_agents) ] self.critic_target = [ Critic(state_size, action_size, random_seed, fc1_units=config['FC1'], fc2_units=config['FC2'], use_bn=config["USE_BATCHNORM"]).to(device) for _ in range(num_agents) ] self.critic_optimizer = [ optim.Adam(self.critic_local[i].parameters(), lr=config["LR_CRITIC"], weight_decay=config["WEIGHT_DECAY"]) for i in range(num_agents) ] # Load saved model (if available) if model_path: logger.info('Loading model from {}'.format(model_path)) for i in range(self.num_agents): self.actor_local[i].load_state_dict( torch.load('{}/checkpoint_actor_{}.pth'.format( model_path, i))) self.actor_target[i].load_state_dict( torch.load('{}/checkpoint_actor_{}.pth'.format( model_path, i))) self.critic_local[i].load_state_dict( torch.load('{}/checkpoint_critic_{}.pth'.format( model_path, i))) self.critic_target[i].load_state_dict( torch.load('{}/checkpoint_critic_{}.pth'.format( model_path, i))) if eval_mode: logger.info('agent {} set to eval mode') self.actor_local[i].eval() # Noise process self.noise = [ OUNoise(action_size, random_seed, sigma=config['SIGMA']) for _ in range(num_agents) ] # Replay memory self.memory = ReplayBuffer(action_size, config["BUFFER_SIZE"], config["BATCH_SIZE"], random_seed) # Record losses self.actor_losses = [] self.critic_losses = [] self.learn_count = [] self.learn_step = 0 # Initialise visdom writer self.writer = writer logger.info("Initialised with random seed: {}".format(random_seed))
def main(): current_dir = os.path.dirname(os.path.realpath(__file__)) companies = [ { 'name': glob.ATLANTSOLIA, 'stations': '../stations/atlantsolia.json' }, { 'name': glob.COSTCO, 'stations': '../stations/costco.json' }, { 'name': glob.N1, 'stations': '../stations/n1.json' }, { 'name': glob.DAELAN, 'stations': '../stations/daelan.json' }, { 'name': glob.OB, 'stations': '../stations/ob.json' }, { 'name': glob.OLIS, 'stations': '../stations/olis.json' }, { 'name': glob.ORKAN, 'stations': '../stations/orkan.json' }, { 'name': glob.ORKAN_X, 'stations': '../stations/orkanx.json' } ] all_stations = {} for company in companies: filepath = os.path.join(current_dir, company['stations']) stations = utils.load_json(filepath) for key in stations: station = stations[key] station['company'] = company['name'] all_stations[key] = station # station prices atlantsolia_prices = scraper.get_individual_atlantsolia_prices() costco_prices = scraper.get_global_costco_prices() n1_prices = scraper.get_global_n1_prices() daelan_prices = scraper.get_global_daelan_prices() ob_prices = scraper.get_individual_ob_prices() olis_prices = scraper.get_global_olis_prices() orkan_prices = scraper.get_individual_orkan_prices() prices_map = { glob.ATLANTSOLIA: { 'data': atlantsolia_prices, 'type': glob.PRICETYPE.INDIVIDUAL }, glob.COSTCO: { 'data': costco_prices, 'type': glob.PRICETYPE.GLOBAL }, glob.N1: { 'data': n1_prices, 'type': glob.PRICETYPE.GLOBAL }, glob.DAELAN: { 'data': daelan_prices, 'type': glob.PRICETYPE.GLOBAL }, glob.OB: { 'data': ob_prices, 'type': glob.PRICETYPE.INDIVIDUAL }, glob.OLIS: { 'data': olis_prices, 'type': glob.PRICETYPE.GLOBAL }, glob.ORKAN: { 'data': orkan_prices, 'type': glob.PRICETYPE.INDIVIDUAL }, glob.ORKAN_X: { 'data': orkan_prices, 'type': glob.PRICETYPE.INDIVIDUAL } } list_of_stations = [] price_keys = ['bensin95', 'bensin95_discount', 'diesel', 'diesel_discount'] for key, station in sorted(all_stations.items()): station['key'] = key if prices_map[station['company']]['type'] == glob.PRICETYPE.INDIVIDUAL: for price_key in price_keys: if key.startswith('dn') and key not in prices_map[station['company']]['data']: # <TEMPORARY DAELAN MEASURE> # # Daelan has received two new stations from N1 and new owners have now # taken over its business, however, for now it seems they will continue # to use the N1 backend to provide online fuel price on daelan.is webpage # but yet these two new stations are not shown and propably won't show up # until the new Daelan owners have renovated their website. # # Until then we tie the price on the two new stations to the price in # Daelan Fellsmuli # # </TEMPORARY DAELAN MEASURE> station[price_key] = prices_map[station['company']]['data']['dn_000'][price_key] else: station[price_key] = prices_map[station['company']]['data'][key][price_key] elif prices_map[station['company']]['type'] == glob.PRICETYPE.GLOBAL: for price_key in price_keys: station[price_key] = prices_map[station['company']]['data'][price_key] if station['company'] == glob.N1 and key in glob.N1_PRICE_DIFF: # Some N1 stations have been observed in real life to have fixed # different prices from the most common price which is shown # on N1 webpage. for price_key in price_keys: station[price_key] += glob.N1_PRICE_DIFF[key][price_key] # Note: hardcoded price deviances, in no way guaranteed to # be permanently correct. list_of_stations.append(station) data = {'stations': list_of_stations} data_json_pretty_file = os.path.join(current_dir, '../vaktin/gas.json') data_json_mini_file = os.path.join(current_dir, '../vaktin/gas.min.json') utils.save_to_json(data_json_pretty_file, data, pretty=True) utils.save_to_json(data_json_mini_file, data, pretty=False)
print('Finalizado...') def main(): # for page in string.ascii_uppercase: # extract_shoppings(f"https://abrasce.com.br/guia-de-shoppings/?letter={page}") # extract_shoppings("https://abrasce.com.br/guia-de-shoppings/strip-mall/",) # extract_shoppings("https://abrasce.com.br/guia-de-shoppings/outlet-center/") extract_details() JSONtoExcel() if __name__ == "__main__": start = timeit.default_timer() try: main() tempo_estimado(start) except KeyboardInterrupt: save_to_json(extracted_info) tempo_estimado(start) except Exception as error: save_to_json(extracted_info) tempo_estimado(start) raise
def main(): current_dir = os.path.dirname(os.path.realpath(__file__)) companies = [{ 'name': glob.ATLANTSOLIA, 'stations': '../stations/atlantsolia.json' }, { 'name': glob.COSTCO, 'stations': '../stations/costco.json' }, { 'name': glob.N1, 'stations': '../stations/n1.json' }, { 'name': glob.DAELAN, 'stations': '../stations/daelan.json' }, { 'name': glob.OB, 'stations': '../stations/ob.json' }, { 'name': glob.OLIS, 'stations': '../stations/olis.json' }, { 'name': glob.ORKAN, 'stations': '../stations/orkan.json' }, { 'name': glob.ORKAN_X, 'stations': '../stations/orkanx.json' }] all_stations = {} for company in companies: filepath = os.path.join(current_dir, company['stations']) stations = utils.load_json(filepath) for key in stations: station = stations[key] station['company'] = company['name'] all_stations[key] = station # station prices atlantsolia_prices = scraper.get_individual_atlantsolia_prices() costco_prices = scraper.get_global_costco_prices() n1_prices = scraper.get_global_n1_prices() daelan_prices = scraper.get_global_daelan_prices() ob_prices = scraper.get_individual_ob_prices() olis_prices = scraper.get_global_olis_prices() orkan_prices = scraper.get_individual_orkan_prices() prices_map = { glob.ATLANTSOLIA: { 'data': atlantsolia_prices, 'type': glob.PRICETYPE.INDIVIDUAL }, glob.COSTCO: { 'data': costco_prices, 'type': glob.PRICETYPE.GLOBAL }, glob.N1: { 'data': n1_prices, 'type': glob.PRICETYPE.GLOBAL }, glob.DAELAN: { 'data': daelan_prices, 'type': glob.PRICETYPE.GLOBAL }, glob.OB: { 'data': ob_prices, 'type': glob.PRICETYPE.INDIVIDUAL }, glob.OLIS: { 'data': olis_prices, 'type': glob.PRICETYPE.GLOBAL }, glob.ORKAN: { 'data': orkan_prices, 'type': glob.PRICETYPE.INDIVIDUAL }, glob.ORKAN_X: { 'data': orkan_prices, 'type': glob.PRICETYPE.INDIVIDUAL } } list_of_stations = [] price_keys = ['bensin95', 'bensin95_discount', 'diesel', 'diesel_discount'] for key, station in sorted(all_stations.items()): station['key'] = key if prices_map[station['company']]['type'] == glob.PRICETYPE.INDIVIDUAL: for price_key in price_keys: if key.startswith('dn') and key not in prices_map[ station['company']]['data']: # <TEMPORARY DAELAN MEASURE> # # Daelan has received two new stations from N1 and new owners have now # taken over its business, however, for now it seems they will continue # to use the N1 backend to provide online fuel price on daelan.is webpage # but yet these two new stations are not shown and propably won't show up # until the new Daelan owners have renovated their website. # # Until then we tie the price on the two new stations to the price in # Daelan Fellsmuli # # </TEMPORARY DAELAN MEASURE> station[price_key] = prices_map[ station['company']]['data']['dn_000'][price_key] else: station[price_key] = prices_map[ station['company']]['data'][key][price_key] elif prices_map[station['company']]['type'] == glob.PRICETYPE.GLOBAL: for price_key in price_keys: station[price_key] = prices_map[ station['company']]['data'][price_key] if station['company'] == glob.N1 and key in glob.N1_PRICE_DIFF: # Some N1 stations have been observed in real life to have fixed # different prices from the most common price which is shown # on N1 webpage. for price_key in price_keys: station[price_key] += glob.N1_PRICE_DIFF[key][price_key] # Note: hardcoded price deviances, in no way guaranteed to # be permanently correct. list_of_stations.append(station) data = {'stations': list_of_stations} data_json_pretty_file = os.path.join(current_dir, '../vaktin/gas.json') data_json_mini_file = os.path.join(current_dir, '../vaktin/gas.min.json') utils.save_to_json(data_json_pretty_file, data, pretty=True) utils.save_to_json(data_json_mini_file, data, pretty=False)
def extract_details(): print(f"{len(read_link())} links achados") contador = 1 for shopping_page in read_link(): print(f'Extraindo {contador} link') details = {} # inicializar drivers dynamic_result = dynamic_html(shopping_page) if dynamic_html == False: extracted_info.append(details) save_to_json(details) continue crawler = init_parser(dynamic_result) details['Nome'] = crawler.find('span', class_="post post-shopping current-item").text details['Tipo'] = crawler.find('a', class_="taxonomy operacao").text details['link'] = shopping_page details_container = crawler.find('div',class_="specs") # PERFIL DE CONSUMIDORES perfil_title = details_container.find(text="PERFIL DE CONSUMIDORES") class_content = perfil_title.findNext('div') class_perfil = [] for p in class_content.find_all('p'): class_perfil.append(p.text) details['Classe A'] = class_perfil[0] details['Classe B'] = class_perfil[1] details['Classe C'] = class_perfil[2] details['Classe D'] = class_perfil[3] # details[perfil_title] = format_text(class_content.text) # ENTRETENIMENTO enterteiment_title = details_container.find(text="ENTRETENIMENTO") enterteiment_content = enterteiment_title.findNext('div') # print(enterteiment_title) details[enterteiment_title] = format_text(enterteiment_content.text) # ÁREA TOTAL DO TERRENO area_title = details_container.find(text="ÁREA TOTAL DO TERRENO") area_content = area_title.findNext('div') # print(area_title) details[area_title] = format_text(area_content.text) # CONTATO contact_title = details_container.find(text="CONTATO") contact_content = contact_title.findNext('ul') # print(contact_title) details[contact_title] = format_text(contact_content.text) # Icones aditional_info = crawler.find('div', class_="icons shoppings mt-4 mb-4") box = aditional_info.find_all('div', class_="box") for box_info in box: title = box_info.find('p', class_='mb-0') detail_content = box_info.find('p', class_="number") details[title.text] = detail_content.text extracted_info.append(details) contador += 1 print('Finalizado!') print('Salvando em json...') save_to_json(extracted_info) print('Finalizado...')
dataset[name] = selected_sounds print 'selected %i sounds out of %i!' % (len(selected_sounds), len(filtered_results)) else: print 'not enough sounds were found for current class (%i sounds found).' % len(filtered_results) # TIP ON KEYWORD EXTRACTION: we could extract some keywords from the textual descriptions using functions # provided in ELVIS (see https://github.com/sergiooramas/elvis and run_entity_linking.py file in utils folder) # For each selected sound in our dataset we could do something like: # # from utils.run_entity_linking import spotlight # # sound_textual_description = "One of the English summer storms of 2014 recorded on a condenser mic. The neighbor's dog barks at it at some point. \r\n\r\nNaturalistic, no processing done to it whatsoever." # results = spotlight(sound_textual_description.split('\n')) # keywords = list() # for element in results: # for entity in element['entities']: # keywords.append(entity['label']) # Save dataset to file so we can work with it later on utils.save_to_json('%s.json' % DATASET_NAME, dataset) # 2) Know your dataset # ******************** # Generate html files with sound examples and show most common tags per class for class_name, sounds in dataset.items(): print class_name utils.generate_html_file_with_sound_examples([sound['id'] for sound in sounds][:15], 'html/%s_%s.html' % (DATASET_NAME, class_name)) class_tags = utils.get_all_tags_from_class(class_name, dataset) utils.print_most_common_tags(class_tags)
else: repo_path = my_args.repository if not os.path.exists(repo_path): fail_nicely(parser, 'Path "%s" seems to not exist.' % (repo_path, )) try: repo = git.Repo(repo_path) except Exception: error_msg = 'Could not read git repo from "%s".' % (repo_path, ) fail_nicely(parser, error_msg) if my_args.from_date is not None: try: datetime.datetime.strptime(my_args.from_date, '%Y-%m-%d') except ValueError: fail_nicely(parser, '--from-date not in format YYYY-MM-DD') if my_args.to_date is not None: try: datetime.datetime.strptime(my_args.to_date, '%Y-%m-%d') except ValueError: fail_nicely(parser, '--to-date not in format YYYY-MM-DD') price_changes = read_price_changes(repo, fromdate=my_args.from_date, todate=my_args.to_date) if my_args.output_directory is None: output_directory = os.path.join(current_dir, '../vaktin/') else: output_directory = my_args.output_directory data_json_pretty_file = os.path.join(output_directory, 'trends.json') data_json_mini_file = os.path.join(output_directory, 'trends.min.json') utils.save_to_json(data_json_pretty_file, price_changes, pretty=True) utils.save_to_json(data_json_mini_file, price_changes, pretty=False)