示例#1
0
def save_flavors_info(request, flavors_ids, path):
    path = u.join_path(path, "flavors.json")
    flavors = []
    for f_id in flavors_ids:
        flavor_info = nova.flavor_get(request, f_id).to_dict()
        flavors.append(flavor_info)
    flavors_info = {"flavors": flavors}
    u.save_to_json(path, flavors_info)
示例#2
0
def save_images_tag(request, image_ids, path):
    path = u.join_path(path, "images.json")
    images = []
    for image_id in image_ids:
        image_info = sahara.image_get(request, image_id).to_dict()
        images.append(image_info)
    images_info = {"images": images}
    u.save_to_json(path, images_info)
示例#3
0
def update_corpus_ids(doc_id):
    corpus_ids_file = "./visualization/corpus/corpus_ids.json"
    corpus_ids = get_json_data(corpus_ids_file) if os.path.isfile(
        corpus_ids_file) else []

    if doc_id not in corpus_ids:
        corpus_ids = [doc_id] + corpus_ids
        save_to_json(corpus_ids_file, corpus_ids)
        print('corpus ids updated: %s' % doc_id)
示例#4
0
def main():

	for filename in sorted(glob.glob(directory)):

		language = filename.split("/")[-1].split(".")[0]
		if args.language:
			if language != args.language: continue

		print(f"\nLanguage:\t{language}")

		if args.check_os == "y":
			if os.path.isfile(f"data/covid19/processed/{language}.json"):
				print(f"{language} has already been analyzed, moving on...")
				continue
	
		if language not in set(countries_per_language.keys()): 
			continue

		input_data = utils.read_from_json(filename)
		output_data = {}
		days = sorted(input_data.keys())
		nr_days = len(days)
		if nr_days < 10: continue

		previous_links = []
		previous_links_locations = {}
		previous_references = []
		previous_references_locations = {}

		references_origins = Counter()
		links_origins = Counter()

		for n, day in enumerate(days):
			print("Processing day %s of %s:\t%s" % (n, nr_days, day))

			timestamps_output = {
				"links": {},
				"references": {}
			}

			links = sorted(input_data[day]["links"])
			references = sorted(input_data[day]["references"])

			links_countries = get_links_locations(links, previous_links, previous_links_locations, language) # dict
			timestamps_output["links"] = links_countries
			previous_links = links
			previous_links_locations = links_countries

			references_countries = get_reference_locations(references, previous_references, previous_references_locations)
			timestamps_output["references"] = references_countries
			previous_references = references
			previous_references_locations = references_countries
	
			#print("Completed day %s of %s" % (n, nr_days))
			#print(timestamps_output, "\n\n")
			output_data[day] = timestamps_output
		utils.save_to_json(language, "processed", output_data)
示例#5
0
def save_security_group_info(request, security_group_ids, path):
    path = u.join_path(path, "security_groups.json")
    sg = []
    sg_manager = nova.SecurityGroupManager(request)
    for sg_id in security_group_ids:
        sg_info = sg_manager.get(sg_id).to_dict()
        sg.append(sg_info)
    sgs_info = {"security_groups": sg}
    u.save_to_json(path, sgs_info)
示例#6
0
def save_cluster_info(request, cluster_id, path):
    cluster_info = sahara.cluster_get(request, cluster_id).to_dict()
    path = u.join_path(path, "cluster.json")
    u.save_to_json(path, cluster_info)
    ct_id = cluster_info['cluster_template_id']
    ukp_id = cluster_info['user_keypair_id']
    default_image_id = cluster_info['default_image_id']
    instance_ids = get_instances_id(cluster_info['node_groups'])
    return ct_id, ukp_id, default_image_id, instance_ids
示例#7
0
def save_cluster_template_info(request, ct_id, path):
    ct_info = sahara.cluster_template_get(request, ct_id).to_dict()
    path = u.join_path(path, "clusterTemplate.json")
    node_groups_template_ids = []
    _node_groups = ct_info['node_groups']
    for ng in _node_groups:
        node_groups_template_ids.append(ng['node_group_template_id'])
    u.save_to_json(path, ct_info)
    return node_groups_template_ids
示例#8
0
def genrate_gold_gui_data(corpus_dir, doc_id, data_file):
    data_reader = jsonlines.open(data_file)

    # handle the case that the doc_id already exists.
    if check_duplicate_dir(corpus_dir):
        sys.exit()

    doc_ids = []
    for doc_dict in data_reader.iter():
        doc = Document(doc_dict)
        doc_ids.append(doc.doc_id)

        # doc data
        doc_data = doc.get_visualize_data()
        doc_data_file = "%s/span/%s.json" % (corpus_dir, doc.doc_id)
        save_to_json(doc_data_file, doc_data)

        # surface data
        surface_data = doc.get_surface_data()
        surface_data_file = "%s/detail/%s.json" % (corpus_dir, doc.doc_id)
        save_to_json(surface_data_file, surface_data)

        # cluster data
        cluster_data = doc.get_cluster_data()
        cluster_data_file = "%s/coref/%s.json" % (corpus_dir, doc.doc_id)
        save_to_json(cluster_data_file, cluster_data)

    # doc ids and corpus ids.
    doc_ids_file = "%s/doc_ids.json" % corpus_dir
    save_to_json(doc_ids_file, doc_ids)
    update_corpus_ids(doc_id)
    def infer(self, image_np, crop_path, output_folder):
        # TODO see if we can get some batch parellism
        image_np = np.expand_dims(image_np, axis=0)
        preds = [
            self.model.do_test(self.polySess, image_np, top_k)
            for top_k in range(_FIRST_TOP_K)
        ]

        # sort predictions based on the eval score and pick the best
        preds = sorted(preds, key=lambda x: x["scores"][0], reverse=True)[0]

        if FLAGS.Use_ggnn:
            polys = np.copy(preds["polys"][0])
            feature_indexs, poly, mask = utils.preprocess_ggnn_input(polys)
            preds_gnn = self.ggnnModel.do_test(self.ggnnSess, image_np,
                                               feature_indexs, poly, mask)
            output = {
                "polys": preds["polys"],
                "polys_ggnn": preds_gnn["polys_ggnn"]
            }
        else:
            output = {"polys": preds["polys"]}

        # dumping to json files
        json_name = save_to_json(output_folder, crop_path, output)
        self.vis(json_name)
示例#10
0
def save_node_groups_info(request, node_groups_template_ids, path):
    path = u.join_path(path, "node_groups.json")
    node_groups = []
    flavor_ids = []
    image_ids = []
    security_group_ids = []
    for ng_id in node_groups_template_ids:
        ng_info = sahara.nodegroup_template_get(request, ng_id).to_dict()
        node_groups.append(ng_info)
        flavor_ids = append_to_list(ng_info['flavor_id'], flavor_ids)
        image_ids = append_to_list(ng_info['image_id'], image_ids)
        for sgi in ng_info['security_groups']:
            security_group_ids = append_to_list(sgi, security_group_ids)
    ngs_info = {"node_groups": node_groups}
    u.save_to_json(path, ngs_info)
    return flavor_ids, image_ids, security_group_ids
示例#11
0
def generate_test_gui_data(corpus_dir, exp_id, test_file, model_file,
                           word_vectors_file):
    from eval import get_clusters
    sys_clusters = get_clusters(model_file,
                                word_vectors_file,
                                test_file,
                                get_gold_clusters=False)

    for doc_id, doc_clusters in sys_clusters.items():
        # save cluster to file.
        output_file = "%s/coref/%s.json" % (corpus_dir, doc_id)
        save_to_json(output_file, get_cluster_data_for_gui(doc_clusters))

    # doc ids and corpus ids.
    doc_ids_file = "%s/doc_ids.json" % corpus_dir
    save_to_json(doc_ids_file, list(sys_clusters.keys()))

    update_corpus_ids(exp_id)
示例#12
0
def main():
    for filename in sorted(glob.glob(input_directory)):

        language = filename.split("/")[-1].split(".")[0]
        if args.language:
            if language != args.language: continue

        print("\nLanguage:\t", language)

        if args.check_os == "y":
            if os.path.isfile(f"data/weekly/{language}.png"):
                print(f"{language} has already been processed, moving on...")
                continue

        input_data = utils.read_from_json(filename)

        day_data = get_day_data(input_data)
        week_data = get_week_data(day_data)
        utils.save_to_json(language, "weekly", week_data)
        print("done")
    def manipulate(self, strategy_func, strategy_name, pick_strategy,
                   manipulation_clas, network_name):
        self.global_homophilies = []
        class_partitions = []
        nodes_with_manipulation_clas = [
            node for node in self.G.nodes()
            if self.get_node_class(node) == manipulation_clas
        ]
        class_partitions.append(len(nodes_with_manipulation_clas) / self.size)
        homo_list_before = self.local_homophily()
        nodes_to_remove = [
            node for node in self.G.nodes()
            if self.get_node_class(node) != manipulation_clas
        ]
        utils.save_to_file(homo_list_before, network_name,
                           '{0}_homo_list_before'.format(strategy_name))
        ''' add, remove or change node '''
        strategy_func(nodes_to_remove, nodes_with_manipulation_clas,
                      class_partitions, pick_strategy, manipulation_clas)

        homo_list_after = self.local_homophily()
        utils.save_to_file(homo_list_after, network_name,
                           '{0}_homo_list_after'.format(strategy_name))
        utils.save_to_file(self.global_homophilies, network_name,
                           '{0}_global_homophilies'.format(strategy_name))
        utils.plot_local_homophily(homo_list_before, homo_list_after,
                                   network_name, strategy_name)
        utils.plot_global_homophily(self.global_homophilies, network_name,
                                    strategy_name)
        utils.save_to_file(class_partitions, network_name,
                           '{0}_class_partitions'.format(strategy_name))
        utils.save_to_json(self.homophily_per_clas, network_name,
                           '{0}_homophily_per_clas'.format(strategy_name))
        utils.plot_all(class_partitions, self.global_homophilies,
                       self.homophily_per_clas, manipulation_clas,
                       network_name, strategy_name)
示例#14
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os

import scraper
import utils

PRODUCTS = [  # http://www.vinbudin.is/heim/vorur/vorur.aspx/?text=Grevens
    '22006', '22837', '23282', '22004'
]

if __name__ == '__main__':
    print 'Working ...'
    directory = 'products/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    for product_id in PRODUCTS:
        print 'Fetching data for product %s ...' % (product_id, )
        product_data = scraper.get_vinbudin_product_data(product_id)
        print 'Writing data to file ...'
        utils.save_to_json('%s%s.json' % (directory, product_id),
                           product_data,
                           pretty=True)
        print 'Done.'
    print 'All done.'
示例#15
0
def save_final_results_procrutes(args):
    results = args.__dict__
    utils.save_to_json(results, args.out_folder + "/final_results.json")
示例#16
0
def save_final_results_ensemble(args):
    results = args.__dict__
    utils.save_to_json(results, args.out_folder + "/final_results.json")
示例#17
0
def save_final_results_compress(args, range_limit):
    results = args.__dict__
    results["results"] = {"range_limit": range_limit}
    utils.save_to_json(results, args.out_folder + "/final_results.json")
示例#18
0
    def __init__(self,
                 state_size,
                 action_size,
                 num_agents,
                 agent_index,
                 writer,
                 random_seed,
                 dirname,
                 print_every=1000,
                 model_path=None,
                 saved_config=None,
                 eval_mode=False):
        """Initialize an Agent object.
        
        Parameters:    
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            num_agents (int): number of agents
            agent_index (int): index (id) of current agent
            writer (object): visdom visualiser for realtime visualisations            
            random_seed (int): random seed
            dirname (string): output directory to store config, losses
            print_every (int): how often to print progress
            model_path (string): if defined, load saved model to resume training
            eval_mode (bool): whether to use eval mode
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(random_seed)
        self.agent_index = agent_index
        self.writer = writer
        self.dirname = dirname
        self.print_every = print_every
        # save config params
        if not saved_config:
            self.config = CONFIG
            save_to_json(self.config,
                         '{}/hyperparams.json'.format(self.dirname))
        else:
            self.config = json.load(open(saved_config, 'r'))
            logger.info(
                'Loading config from saved location {}'.format(saved_config))

        # Create Critic network
        self.local_critic = Critic(self.state_size * num_agents,
                                   self.action_size * num_agents,
                                   random_seed,
                                   fc1_units=self.config['FC1'],
                                   fc2_units=self.config['FC2']).to(device)
        self.target_critic = Critic(self.state_size * num_agents,
                                    self.action_size * num_agents,
                                    random_seed,
                                    fc1_units=self.config['FC1'],
                                    fc2_units=self.config['FC2']).to(device)
        # Optimizer
        self.critic_optimizer = optim.Adam(
            self.local_critic.parameters(),
            lr=self.config['LR_CRITIC'],
            weight_decay=self.config['WEIGHT_DECAY'])

        # Create Actor network
        self.local_actor = Actor(self.state_size,
                                 self.action_size,
                                 random_seed,
                                 fc1_units=self.config['FC1'],
                                 fc2_units=self.config['FC2']).to(device)
        self.target_actor = Actor(self.state_size,
                                  self.action_size,
                                  random_seed,
                                  fc1_units=self.config['FC1'],
                                  fc2_units=self.config['FC2']).to(device)
        self.actor_optimizer = optim.Adam(self.local_actor.parameters(),
                                          lr=self.config['LR_ACTOR'])

        # Load saved model (if available)
        if model_path:
            logger.info('Loading model from {}'.format(model_path))
            self.local_actor.load_state_dict(
                torch.load('{}/checkpoint_actor_{}.pth'.format(
                    model_path, self.agent_index)))
            self.target_actor.load_state_dict(
                torch.load('{}/checkpoint_actor_{}.pth'.format(
                    model_path, self.agent_index)))
            self.local_critic.load_state_dict(
                torch.load('{}/checkpoint_critic_{}.pth'.format(
                    model_path, self.agent_index)))
            self.target_critic.load_state_dict(
                torch.load('{}/checkpoint_critic_{}.pth'.format(
                    model_path, self.agent_index)))
            if eval_mode:
                logger.info('agent {} set to eval mode')
                self.actor_local.eval()

        self.noise = OUNoise(self.action_size,
                             random_seed,
                             sigma=self.config['SIGMA'])
        self.learn_step = 0
        'num_epochs': 10,
        'learning_rate': params['learning_rate'],
        'pos_weighting': 20.0
    }

    dataset_params = {
        'train_batch_size': params['batch_size'],
        'eval_batch_size': 1,
        'shuffle': True,
        'num_workers': 1,
        'num_of_slots': 35
    }
    # model
    if torch.cuda.is_available():
        model = DST(**model_params).cuda()
    else:
        model = DST(**model_params)

    utils.set_logger(os.path.join(args.model_dir, 'eval.log'))

    logging.info('Starting evalutation')

    utils.load_checkpoint(
        os.path.join(args.model_dir, args.model_checkpoint_name), model)

    eval_metrics, total_loss_eval, eval_avg_goal_acc, eval_joint_goal_acc, avg_slot_precision = evaluate(
        model, evaluation_data, args.model_dir, dataset_params, device)

    save_path = os.path.join(args.model_dir, "metrics_test.json")
    utils.save_to_json(eval_metrics, save_path)
示例#20
0
def save_key_pair(request, user_key_pair_id, path):
    key_pair_info = nova.keypair_get(request, user_key_pair_id).to_dict()
    path = u.join_path(path, "keypair.json")
    u.save_to_json(path, key_pair_info)
    return True
示例#21
0
    def __init__(self,
                 state_size,
                 action_size,
                 num_agents,
                 writer,
                 random_seed,
                 dirname,
                 print_every=100,
                 model_path=None,
                 eval_mode=False):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            num_agents (int): number of agents
            writer (object): visdom visualiser for realtime visualisations            
            random_seed (int): random seed
            dirname (string): output directory to store config, losses
            print_every (int): how often to print progress
            model_path (string): if defined, load saved model to resume training
        """
        self.state_size = state_size
        self.action_size = action_size
        self.num_agents = num_agents
        self.seed = random.seed(random_seed)
        self.dirname = dirname
        self.print_every = print_every

        # save config params
        save_to_json(config, '{}/hyperparams.json'.format(self.dirname))

        # Actor Networks (w/ Target Networks)
        self.actor_local = [
            Actor(state_size,
                  action_size,
                  random_seed,
                  fc1_units=config['FC1'],
                  fc2_units=config['FC2'],
                  use_bn=config["USE_BATCHNORM"]).to(device)
            for _ in range(num_agents)
        ]

        self.actor_target = [
            Actor(state_size,
                  action_size,
                  random_seed,
                  fc1_units=config['FC1'],
                  fc2_units=config['FC2'],
                  use_bn=config["USE_BATCHNORM"]).to(device)
            for _ in range(num_agents)
        ]

        self.actor_optimizer = [
            optim.Adam(self.actor_local[i].parameters(), lr=config["LR_ACTOR"])
            for i in range(num_agents)
        ]

        # Critic Networks (w/ Target Networks)
        self.critic_local = [
            Critic(state_size,
                   action_size,
                   random_seed,
                   fc1_units=config['FC1'],
                   fc2_units=config['FC2'],
                   use_bn=config["USE_BATCHNORM"]).to(device)
            for _ in range(num_agents)
        ]
        self.critic_target = [
            Critic(state_size,
                   action_size,
                   random_seed,
                   fc1_units=config['FC1'],
                   fc2_units=config['FC2'],
                   use_bn=config["USE_BATCHNORM"]).to(device)
            for _ in range(num_agents)
        ]
        self.critic_optimizer = [
            optim.Adam(self.critic_local[i].parameters(),
                       lr=config["LR_CRITIC"],
                       weight_decay=config["WEIGHT_DECAY"])
            for i in range(num_agents)
        ]

        # Load saved model (if available)
        if model_path:
            logger.info('Loading model from {}'.format(model_path))
            for i in range(self.num_agents):
                self.actor_local[i].load_state_dict(
                    torch.load('{}/checkpoint_actor_{}.pth'.format(
                        model_path, i)))
                self.actor_target[i].load_state_dict(
                    torch.load('{}/checkpoint_actor_{}.pth'.format(
                        model_path, i)))
                self.critic_local[i].load_state_dict(
                    torch.load('{}/checkpoint_critic_{}.pth'.format(
                        model_path, i)))
                self.critic_target[i].load_state_dict(
                    torch.load('{}/checkpoint_critic_{}.pth'.format(
                        model_path, i)))
                if eval_mode:
                    logger.info('agent {} set to eval mode')
                    self.actor_local[i].eval()

        # Noise process
        self.noise = [
            OUNoise(action_size, random_seed, sigma=config['SIGMA'])
            for _ in range(num_agents)
        ]

        # Replay memory
        self.memory = ReplayBuffer(action_size, config["BUFFER_SIZE"],
                                   config["BATCH_SIZE"], random_seed)

        # Record losses
        self.actor_losses = []
        self.critic_losses = []
        self.learn_count = []
        self.learn_step = 0
        # Initialise visdom writer
        self.writer = writer
        logger.info("Initialised with random seed: {}".format(random_seed))
示例#22
0
def main():
    current_dir = os.path.dirname(os.path.realpath(__file__))
    companies = [
        {
            'name': glob.ATLANTSOLIA,
            'stations': '../stations/atlantsolia.json'
        },
        {
            'name': glob.COSTCO,
            'stations': '../stations/costco.json'
        },
        {
            'name': glob.N1,
            'stations': '../stations/n1.json'
        },
        {
            'name': glob.DAELAN,
            'stations': '../stations/daelan.json'
        },
        {
            'name': glob.OB,
            'stations': '../stations/ob.json'
        },
        {
            'name': glob.OLIS,
            'stations': '../stations/olis.json'
        },
        {
            'name': glob.ORKAN,
            'stations': '../stations/orkan.json'
        },
        {
            'name': glob.ORKAN_X,
            'stations': '../stations/orkanx.json'
        }
    ]

    all_stations = {}
    for company in companies:
        filepath = os.path.join(current_dir, company['stations'])
        stations = utils.load_json(filepath)
        for key in stations:
            station = stations[key]
            station['company'] = company['name']
            all_stations[key] = station

    # station prices
    atlantsolia_prices = scraper.get_individual_atlantsolia_prices()
    costco_prices = scraper.get_global_costco_prices()
    n1_prices = scraper.get_global_n1_prices()
    daelan_prices = scraper.get_global_daelan_prices()
    ob_prices = scraper.get_individual_ob_prices()
    olis_prices = scraper.get_global_olis_prices()
    orkan_prices = scraper.get_individual_orkan_prices()
    prices_map = {
        glob.ATLANTSOLIA: {
            'data': atlantsolia_prices,
            'type': glob.PRICETYPE.INDIVIDUAL
        },
        glob.COSTCO: {
            'data': costco_prices,
            'type': glob.PRICETYPE.GLOBAL
        },
        glob.N1: {
            'data': n1_prices,
            'type': glob.PRICETYPE.GLOBAL
        },
        glob.DAELAN: {
            'data': daelan_prices,
            'type': glob.PRICETYPE.GLOBAL
        },
        glob.OB: {
            'data': ob_prices,
            'type': glob.PRICETYPE.INDIVIDUAL
        },
        glob.OLIS: {
            'data': olis_prices,
            'type': glob.PRICETYPE.GLOBAL
        },
        glob.ORKAN: {
            'data': orkan_prices,
            'type': glob.PRICETYPE.INDIVIDUAL
        },
        glob.ORKAN_X: {
            'data': orkan_prices,
            'type': glob.PRICETYPE.INDIVIDUAL
        }
    }

    list_of_stations = []
    price_keys = ['bensin95', 'bensin95_discount', 'diesel', 'diesel_discount']

    for key, station in sorted(all_stations.items()):
        station['key'] = key
        if prices_map[station['company']]['type'] == glob.PRICETYPE.INDIVIDUAL:
            for price_key in price_keys:
                if key.startswith('dn') and key not in prices_map[station['company']]['data']:
                    # <TEMPORARY DAELAN MEASURE>
                    #
                    # Daelan has received two new stations from N1 and new owners have now
                    # taken over its business, however, for now it seems they will continue
                    # to use the N1 backend to provide online fuel price on daelan.is webpage
                    # but yet these two new stations are not shown and propably won't show up
                    # until the new Daelan owners have renovated their website.
                    #
                    # Until then we tie the price on the two new stations to the price in
                    # Daelan Fellsmuli
                    #
                    # </TEMPORARY DAELAN MEASURE>
                    station[price_key] = prices_map[station['company']]['data']['dn_000'][price_key]
                else:
                    station[price_key] = prices_map[station['company']]['data'][key][price_key]
        elif prices_map[station['company']]['type'] == glob.PRICETYPE.GLOBAL:
            for price_key in price_keys:
                station[price_key] = prices_map[station['company']]['data'][price_key]
            if station['company'] == glob.N1 and key in glob.N1_PRICE_DIFF:
                # Some N1 stations have been observed in real life to have fixed
                # different prices from the most common price which is shown
                # on N1 webpage.
                for price_key in price_keys:
                    station[price_key] += glob.N1_PRICE_DIFF[key][price_key]
                # Note: hardcoded price deviances, in no way guaranteed to
                # be permanently correct.
        list_of_stations.append(station)

    data = {'stations': list_of_stations}

    data_json_pretty_file = os.path.join(current_dir, '../vaktin/gas.json')
    data_json_mini_file = os.path.join(current_dir, '../vaktin/gas.min.json')

    utils.save_to_json(data_json_pretty_file, data, pretty=True)
    utils.save_to_json(data_json_mini_file, data, pretty=False)
示例#23
0
    print('Finalizado...')


def main():
    # for page in string.ascii_uppercase:
    #     extract_shoppings(f"https://abrasce.com.br/guia-de-shoppings/?letter={page}")
    
    # extract_shoppings("https://abrasce.com.br/guia-de-shoppings/strip-mall/",)
    # extract_shoppings("https://abrasce.com.br/guia-de-shoppings/outlet-center/")

    extract_details()
    JSONtoExcel()


if __name__ == "__main__":
    start = timeit.default_timer()

    try:
        main()

        tempo_estimado(start)

    except KeyboardInterrupt:
        save_to_json(extracted_info)
        tempo_estimado(start)

    except Exception as error:
        save_to_json(extracted_info)
        tempo_estimado(start)
        raise
示例#24
0
def main():
    current_dir = os.path.dirname(os.path.realpath(__file__))
    companies = [{
        'name': glob.ATLANTSOLIA,
        'stations': '../stations/atlantsolia.json'
    }, {
        'name': glob.COSTCO,
        'stations': '../stations/costco.json'
    }, {
        'name': glob.N1,
        'stations': '../stations/n1.json'
    }, {
        'name': glob.DAELAN,
        'stations': '../stations/daelan.json'
    }, {
        'name': glob.OB,
        'stations': '../stations/ob.json'
    }, {
        'name': glob.OLIS,
        'stations': '../stations/olis.json'
    }, {
        'name': glob.ORKAN,
        'stations': '../stations/orkan.json'
    }, {
        'name': glob.ORKAN_X,
        'stations': '../stations/orkanx.json'
    }]

    all_stations = {}
    for company in companies:
        filepath = os.path.join(current_dir, company['stations'])
        stations = utils.load_json(filepath)
        for key in stations:
            station = stations[key]
            station['company'] = company['name']
            all_stations[key] = station

    # station prices
    atlantsolia_prices = scraper.get_individual_atlantsolia_prices()
    costco_prices = scraper.get_global_costco_prices()
    n1_prices = scraper.get_global_n1_prices()
    daelan_prices = scraper.get_global_daelan_prices()
    ob_prices = scraper.get_individual_ob_prices()
    olis_prices = scraper.get_global_olis_prices()
    orkan_prices = scraper.get_individual_orkan_prices()
    prices_map = {
        glob.ATLANTSOLIA: {
            'data': atlantsolia_prices,
            'type': glob.PRICETYPE.INDIVIDUAL
        },
        glob.COSTCO: {
            'data': costco_prices,
            'type': glob.PRICETYPE.GLOBAL
        },
        glob.N1: {
            'data': n1_prices,
            'type': glob.PRICETYPE.GLOBAL
        },
        glob.DAELAN: {
            'data': daelan_prices,
            'type': glob.PRICETYPE.GLOBAL
        },
        glob.OB: {
            'data': ob_prices,
            'type': glob.PRICETYPE.INDIVIDUAL
        },
        glob.OLIS: {
            'data': olis_prices,
            'type': glob.PRICETYPE.GLOBAL
        },
        glob.ORKAN: {
            'data': orkan_prices,
            'type': glob.PRICETYPE.INDIVIDUAL
        },
        glob.ORKAN_X: {
            'data': orkan_prices,
            'type': glob.PRICETYPE.INDIVIDUAL
        }
    }

    list_of_stations = []
    price_keys = ['bensin95', 'bensin95_discount', 'diesel', 'diesel_discount']

    for key, station in sorted(all_stations.items()):
        station['key'] = key
        if prices_map[station['company']]['type'] == glob.PRICETYPE.INDIVIDUAL:
            for price_key in price_keys:
                if key.startswith('dn') and key not in prices_map[
                        station['company']]['data']:
                    # <TEMPORARY DAELAN MEASURE>
                    #
                    # Daelan has received two new stations from N1 and new owners have now
                    # taken over its business, however, for now it seems they will continue
                    # to use the N1 backend to provide online fuel price on daelan.is webpage
                    # but yet these two new stations are not shown and propably won't show up
                    # until the new Daelan owners have renovated their website.
                    #
                    # Until then we tie the price on the two new stations to the price in
                    # Daelan Fellsmuli
                    #
                    # </TEMPORARY DAELAN MEASURE>
                    station[price_key] = prices_map[
                        station['company']]['data']['dn_000'][price_key]
                else:
                    station[price_key] = prices_map[
                        station['company']]['data'][key][price_key]
        elif prices_map[station['company']]['type'] == glob.PRICETYPE.GLOBAL:
            for price_key in price_keys:
                station[price_key] = prices_map[
                    station['company']]['data'][price_key]
            if station['company'] == glob.N1 and key in glob.N1_PRICE_DIFF:
                # Some N1 stations have been observed in real life to have fixed
                # different prices from the most common price which is shown
                # on N1 webpage.
                for price_key in price_keys:
                    station[price_key] += glob.N1_PRICE_DIFF[key][price_key]
                # Note: hardcoded price deviances, in no way guaranteed to
                # be permanently correct.
        list_of_stations.append(station)

    data = {'stations': list_of_stations}

    data_json_pretty_file = os.path.join(current_dir, '../vaktin/gas.json')
    data_json_mini_file = os.path.join(current_dir, '../vaktin/gas.min.json')

    utils.save_to_json(data_json_pretty_file, data, pretty=True)
    utils.save_to_json(data_json_mini_file, data, pretty=False)
示例#25
0
def extract_details():
    print(f"{len(read_link())} links achados")

    contador = 1

    for shopping_page in read_link():
        print(f'Extraindo {contador} link')

        details = {}

        # inicializar drivers
        dynamic_result = dynamic_html(shopping_page)
        
        if dynamic_html == False:
            extracted_info.append(details)
            save_to_json(details)
            continue

        crawler = init_parser(dynamic_result)

        details['Nome'] = crawler.find('span', class_="post post-shopping current-item").text

        details['Tipo'] = crawler.find('a', class_="taxonomy operacao").text

        details['link'] = shopping_page

        details_container = crawler.find('div',class_="specs")

        # PERFIL DE CONSUMIDORES
        perfil_title = details_container.find(text="PERFIL DE CONSUMIDORES")
        class_content = perfil_title.findNext('div')

        class_perfil = []
        for p in class_content.find_all('p'):
            class_perfil.append(p.text)


        details['Classe A'] = class_perfil[0]
        details['Classe B'] = class_perfil[1]
        details['Classe C'] = class_perfil[2]
        details['Classe D'] = class_perfil[3]
        # details[perfil_title] = format_text(class_content.text)

        # ENTRETENIMENTO
        enterteiment_title = details_container.find(text="ENTRETENIMENTO")
        enterteiment_content = enterteiment_title.findNext('div')

        # print(enterteiment_title)
        details[enterteiment_title] = format_text(enterteiment_content.text)

        # ÁREA TOTAL DO TERRENO
        area_title = details_container.find(text="ÁREA TOTAL DO TERRENO")
        area_content = area_title.findNext('div')

        # print(area_title)
        details[area_title] = format_text(area_content.text)

        # CONTATO
        contact_title = details_container.find(text="CONTATO")
        contact_content = contact_title.findNext('ul')

        # print(contact_title)
        details[contact_title] = format_text(contact_content.text)

        # Icones

        aditional_info = crawler.find('div', class_="icons shoppings mt-4 mb-4")

        box = aditional_info.find_all('div', class_="box") 

        for box_info in box:
            title = box_info.find('p', class_='mb-0')
            detail_content = box_info.find('p', class_="number")
            
            details[title.text] = detail_content.text


        extracted_info.append(details)
        contador += 1

    print('Finalizado!')

    print('Salvando em json...')

    save_to_json(extracted_info)
    print('Finalizado...')
		dataset[name] = selected_sounds
		print 'selected %i sounds out of %i!' % (len(selected_sounds), len(filtered_results))
	else:
		print 'not enough sounds were found for current class (%i sounds found).' % len(filtered_results)

	# TIP ON KEYWORD EXTRACTION: we could extract some keywords from the textual descriptions using functions
	# provided in ELVIS (see https://github.com/sergiooramas/elvis and run_entity_linking.py file in utils folder)
	# For each selected sound in our dataset we could do something like:
	#
	# from utils.run_entity_linking import spotlight
	#
	# sound_textual_description = "One of the English summer storms of 2014 recorded on a condenser mic. The neighbor's dog barks at it at some point. \r\n\r\nNaturalistic, no processing done to it whatsoever."
	# results = spotlight(sound_textual_description.split('\n'))
	# keywords = list()
	# for element in results:
	# 	 for entity in element['entities']:
	#		 keywords.append(entity['label'])

# Save dataset to file so we can work with it later on
utils.save_to_json('%s.json' % DATASET_NAME, dataset)

# 2) Know your dataset
# ********************

# Generate html files with sound examples and show most common tags per class
for class_name, sounds in dataset.items():
	print class_name
	utils.generate_html_file_with_sound_examples([sound['id'] for sound in sounds][:15], 'html/%s_%s.html' % (DATASET_NAME, class_name))
	class_tags = utils.get_all_tags_from_class(class_name, dataset)
	utils.print_most_common_tags(class_tags)
示例#27
0
    else:
        repo_path = my_args.repository
    if not os.path.exists(repo_path):
        fail_nicely(parser, 'Path "%s" seems to not exist.' % (repo_path, ))
    try:
        repo = git.Repo(repo_path)
    except Exception:
        error_msg = 'Could not read git repo from "%s".' % (repo_path, )
        fail_nicely(parser, error_msg)
    if my_args.from_date is not None:
        try:
            datetime.datetime.strptime(my_args.from_date, '%Y-%m-%d')
        except ValueError:
            fail_nicely(parser, '--from-date not in format YYYY-MM-DD')
    if my_args.to_date is not None:
        try:
            datetime.datetime.strptime(my_args.to_date, '%Y-%m-%d')
        except ValueError:
            fail_nicely(parser, '--to-date not in format YYYY-MM-DD')
    price_changes = read_price_changes(repo,
                                       fromdate=my_args.from_date,
                                       todate=my_args.to_date)
    if my_args.output_directory is None:
        output_directory = os.path.join(current_dir, '../vaktin/')
    else:
        output_directory = my_args.output_directory
    data_json_pretty_file = os.path.join(output_directory, 'trends.json')
    data_json_mini_file = os.path.join(output_directory, 'trends.min.json')
    utils.save_to_json(data_json_pretty_file, price_changes, pretty=True)
    utils.save_to_json(data_json_mini_file, price_changes, pretty=False)