def main(): dr = data_manager.DataManager() dp = displayer.Displayer() cities = dr.read_data_from_file(setup.path_to_file) dp.plot_cities(cities) alg = Algorithm(cities, setup.instances, setup.parents_amount, setup.mutation_chance) alg.proceed() dp.plot_route(alg.children[31].city_route) print(alg.children[31].cost) plt.show()
def main(unused_argv): hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=4, enc_layers=4, enc_timesteps=80, dec_timesteps=20, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096, num_kw=3) # If 0, no sampled softmax. vocab = data_manager.Vocab('pubmed') batcher = data_manager.DataManager(vocab, hps) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': eval_hps = hps._replace(mode='eval') eval_batcher = data_manager.DataManager(vocab, eval_hps) dropout = .9 model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher, eval_batcher, dropout) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
def __init__(self): # Create data manager object self.dm = data_manager.DataManager() self.start_urls = [ 'https://www.zapimoveis.com.br/lancamento/venda-apartamento-2-quartos-liberdade-centro-sao-paulo-sp-37m2-id-2476413410/' ] #[self.dm.add_https_scheme(domain) for domain in self.allowed_domains] self.dm.set_save_file_settings(self.save_file, self.save_dir) self.dm.set_domains(self.allowed_domains, self.domain_country) # get urls to crawl self.next_urls = set() self.next_urls.update(self.dm.get_urls_to_crawl(list(self.next_urls)))
def main(_): """ main function """ dataset = data_manager.DataManager() model_dir = '../../runs/bag/{}'.format(FLAGS.model) time_str = datetime.datetime.now().isoformat() print('{}: start test'.format(time_str)) print('reading wordembedding') wordembedding = np.load('../../data/bag_data/vec.npy') settings = NETwork.Settings() settings.vocab_size = len(wordembedding) settings.filter_sizes = list(map(int, FLAGS.filter_sizes.split(','))) settings.pattern_num = FLAGS.pattern_num settings.l2_reg_omega = FLAGS.l2_reg_omega #checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir + FLAGS.model + '/checkpoints/') checkpoint_file = FLAGS.checkpoint_dir + FLAGS.model + '/model-best' graph = tf.Graph() with graph.as_default(): gpu_options = tf.GPUOptions(allow_growth=True) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables time_str = datetime.datetime.now().isoformat() print('{}: construct network...'.format(time_str)) # saver = tf.train.import_meta_graph('{}.meta'.format(checkpoint_file)) # saver.restore(sess, checkpoint_file) network = NETwork.CNN(word_embeddings=dataset.wordembedding, settings=settings, is_training=False, is_evaluating=True, use_types=FLAGS.use_types) saver = tf.train.Saver() time_str = datetime.datetime.now().isoformat() print('{}: restore checkpoint file: {}'.format(time_str, checkpoint_file)) saver.restore(sess, checkpoint_file) # test one entity relation mentions time_str = datetime.datetime.now().isoformat() print('{}: testing...'.format(time_str)) # Get Evaluator for evaluation evaluator = evaluation.Evaluator(dataset, sess, network, model_dir, settings) evaluator.test()
def __init__(self, conv): self.conv_id = conv ConvParameter.dmanager = data_manager.DataManager(conv_config_path) raw_data = ConvParameter.dmanager.getRessource(self.conv_id, 'config') if raw_data == "": self.data = {} self.data["AUTOREPLY_ENABLE"] = "True" self.data["TEXT_ENABLE"] = "True" self.data["STICKER_ENABLE"] = "True" self.data["VIDEO_ENABLE"] = "True" self.data["PROBAS"] = "100,100,100,100,100" self.__save() else: self.data = {} for i in raw_data.split('\n'): l = i.split('£') if len(l) == 2: self.data[l[0]] = l[1]
def auth(self, code): """ Exchanges the temporary auth code for an OAuth token and sets current team """ # After the user has authorized this app for use in their Slack team, # Slack returns a temporary authorization code that we'll exchange for # an OAuth token using the oauth.access endpoint auth_response = self.client.api_call( "oauth.access", client_id=self.oauth["client_id"], client_secret=self.oauth["client_secret"], code=code ) self.current_team = auth_response["team_id"] self.authed_teams[self.current_team] = {"bot_token": auth_response["bot"]["bot_access_token"]} # Then we'll reconnect to the Slack Client with the correct team's bot token self.client = SlackClient(self.authed_teams[self.current_team]["bot_token"]) if not self.dbs.get(self.current_team): self.dbs[self.current_team] = data_manager.DataManager(self.current_team) # TODO: save authed teams data_manager.save_dbs(teams=self.authed_teams)
future_date = get_date(today, days=SIX_MONTHS_IN_DAYS) flight_params = { 'apikey': fs.API_KEY, 'fly_from': DEPARTURE_IATA, 'date_from': tomorrow, 'date_to': future_date, 'nights_in_dst_from': 7, 'nights_in_dst_to': 28, 'flight_type': 'round', 'one_for_city': 1, 'adults': 1, 'curr': 'GBP' } data_manager = dm.DataManager(dm.ENDPOINT, dm.SHEET, dm.TOKEN) for row in data_manager.data['prices']: if len(row['iataCode']) != 0: continue city = row['city'] params = {'apikey': fs.API_KEY, 'location_types': 'city', 'term': city} flight_search = fs.FlightSearch(fs.API_LOCATIONS, fs.ENDPOINT, params) row['iataCode'] = flight_search.search_locations('name', city, 'code') data_manager.put_data(row) iata_codes = [row['iataCode'] for row in data_manager.data['prices']] notification_manager = nm.NotificationManager(nm.ACCOUNT_SID, nm.AUTH_TOKEN) for idx, iata_code in enumerate(iata_codes): flight_params.update(fly_to=iata_code, max_stopovers=0) flight_search = fs.FlightSearch(fs.API_SEARCH, fs.ENDPOINT, flight_params)
def main(_): """ main function """ dataset = data_manager.DataManager(init_data=FLAGS.allow_init_data) model_dir = '../../runs/bag/{}'.format(FLAGS.model) settings = NETwork.Settings() settings.vocab_size = len(dataset.wordembedding) settings.num_classes = len(dataset.train_y[0]) settings.filter_sizes = list(map(int, FLAGS.filter_sizes.split(','))) settings.pattern_num = FLAGS.pattern_num settings.l2_reg_omega = FLAGS.l2_reg_omega with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.4) gpu_options = tf.GPUOptions(allow_growth=True) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options) sess = tf.Session(config=session_conf) with sess.as_default(): # Output directory for models and summaries # timestamp = str(int(time.time())) timestamp = FLAGS.model out_dir = os.path.abspath( os.path.join(os.path.pardir, os.path.pardir + '/runs/bag', timestamp)) print('Construct network for train......') network = NETwork.CNN(word_embeddings=dataset.wordembedding, settings=settings, is_training=True, is_evaluating=False, use_types=FLAGS.use_types) # Get Evaluator for evaluation if FLAGS.allow_evaluation: print('Construct network for evaluation......') e_network = NETwork.CNN(word_embeddings=dataset.wordembedding, settings=settings, is_training=True, is_evaluating=True, use_types=FLAGS.use_types) lastest_score = utils.read_pr(out_dir) evaluator = evaluation.Evaluator(dataset, sess, e_network, model_dir, settings, lastest_score) # Define training procedure global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(0.001) grads_and_vars = optimizer.compute_gradients(network.final_loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Summaries for loss and accuracy loss_summary = tf.summary.scalar('loss', network.final_loss) acc_summary = tf.summary.scalar('accuracy', network.accuracy) pr_summary = tf.summary.scalar('pr_curve', evaluator.highest_score) # Train summaries train_summary_op = tf.summary.merge_all() train_summary_dir = os.path.join(out_dir, 'summaries', 'train') train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Checkpoint derectory, tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, 'checkpoints')) checkpoint_prefix = os.path.join(checkpoint_dir, 'model') if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize all variables sess.run(tf.global_variables_initializer()) # train_step def train_step(word_batch, pos1_batch, pos2_batch, type_batch, y_batch, mask_batch): """ A single training step """ total_word = [] total_pos1 = [] total_pos2 = [] total_type = [] total_shape = [] total_mask = [] total_num = 0 for i in range(len(word_batch)): total_shape.append(total_num) total_num += len(word_batch[i]) for j in range(len(word_batch[i])): total_word.append(word_batch[i][j]) total_pos1.append(pos1_batch[i][j]) total_pos2.append(pos2_batch[i][j]) total_type.append(type_batch[i][j]) total_mask.append(mask_batch[i][j]) # Here total_word and y_batch are not equal, total_word[total_shape[i]:total_shape[i+1]] is related to y_batch[i] total_shape.append(total_num) total_shape = np.array(total_shape) total_word = np.array(total_word) total_pos1 = np.array(total_pos1) total_pos2 = np.array(total_pos2) total_type = np.array(total_type) total_mask = np.array(total_mask) feed_dict = { network.input_word: total_word, network.input_pos1: total_pos1, network.input_pos2: total_pos2, network.input_type: total_type, network.input_y: y_batch, network.total_shape: total_shape, network.dropout_keep_prob: FLAGS.dropout_keep_prob, # network.input_mask: total_mask } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, network.final_loss, network.accuracy ], feed_dict) train_summary_writer.add_summary(summaries, step) """ if step % 100 == 0: time_str = datetime.datetime.now().isoformat() print('{}: step {}, loss {:g}, acc {:g}'.format(time_str, step, loss, accuracy)) """ """ Train epochs """ print('Start training......') for epoch in range(FLAGS.num_epochs): # Randomly shuffle data shuffle_indices = np.random.permutation( np.arange(len(dataset.train_y))) num_batches_per_epoch = int( (len(dataset.train_y) - 1) / settings.batch_size) + 1 #num_batches_per_epoch = int(len(shuffle_indices)/float(settings.batch_size)) epoch_last_step = 0 for batch_num in range(num_batches_per_epoch): start_index = batch_num * settings.batch_size end_index = min((batch_num + 1) * settings.batch_size, len(dataset.train_y)) if (end_index - start_index) != settings.batch_size: start_index = end_index - settings.batch_size batch_index = shuffle_indices[start_index:end_index] word_batch = dataset.train_word[batch_index] pos1_batch = dataset.train_pos1[batch_index] pos2_batch = dataset.train_pos2[batch_index] type_batch = dataset.train_type[batch_index] mask_batch = dataset.train_mask[batch_index] y_batch = dataset.train_y[batch_index] train_step(word_batch, pos1_batch, pos2_batch, type_batch, y_batch, mask_batch) if epoch % FLAGS.checkpoint_every == 0: epoch_last_step = tf.train.global_step(sess, global_step) path = saver.save(sess, checkpoint_prefix, global_step=epoch_last_step) print( 'Epoch {} batch {} Saved model checkpoint to {}, pattern_num {}' .format(epoch, batch_num, path, FLAGS.pattern_num)) if FLAGS.allow_evaluation and epoch % FLAGS.evaluate_every == 0: new_highest = evaluator.test() print( 'Best precision recall area now is {}, progress: {}\n'. format( evaluator.highest_score, utils.calculate_progress(epoch, FLAGS.pattern_num))) if new_highest: utils.copy_model(out_dir, epoch_last_step) utils.store_pr(out_dir, evaluator.highest_score) print('final best precision recall: {} pattern_num: {}\n'.format( evaluator.highest_score, FLAGS.pattern_num))
import tensorflow as tf from tensorflow import keras import numpy as np from model import MessageModelStep, MessageModel import data_manager DATA_SET = "messages.csv" LARGE_VOCAB_SIZE = 32 TRAIN_TEST_SPLIT = 0.75 physical_devices = tf.config.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) data_m = data_manager.DataManager(file=DATA_SET, skip_first_line=False, large_vocab=LARGE_VOCAB_SIZE, train_split=TRAIN_TEST_SPLIT) model = MessageModel(len(data_m.get_vocab())) optimizer = keras.optimizers.Adam(learning_rate=0.0001) ckpt = tf.train.Checkpoint(optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(ckpt, './logs/checkpoints', max_to_keep=3) ckpt.restore(manager.latest_checkpoint) # set up the model x_train, _ = list(data_m.sample_test(1).as_numpy_iterator())[0] model(np.expand_dims(x_train, axis=0)) stepModel = MessageModelStep(model, data_m.decode, data_m.encode, temperature=0.01)
len(params.classes) + 2 + 2 + 2 ): # The extra two are for the mean class accuracy and voxelwise accuracy writer.add_summary(results[category], epoch) # Concatinates the protein weight masks to produce an extra dimention suitable for the one-hot-encoding output def concatinate_masks(weight_mask): weight_mask_extended = tf.expand_dims(weight_mask, 4) list_of_masks = [] for index in range(len(params.classes)): list_of_masks.append(weight_mask_extended) return tf.concat(list_of_masks, 4) ############################################ main ################################################# trainManager = dm.DataManager() trainManager.openHDF5(params.data_path + params.model + '_train_data.hdf5') trainManager.set_batch_size(batch_size=params.batch_size) testManager = dm.DataManager() testManager.openHDF5(params.data_path + params.model + '_test_data.hdf5') testManager.set_batch_size(batch_size=params.batch_size) # This launches Tensorboard for monitoring the traning. subprocess.Popen('tensorboard --logdir=' + params.tensorboard_path) ###### Train Model ###### train_network() ######################### # Cleanup del trainManager
def setUp(self): self.data_manager = data_manager.DataManager(':memory:') self.data_manager.connexion.execute("INSERT INTO users (uuid) VALUES ('test_user')") self.data_manager.connexion.commit()
def __init__(self, csv_path, batch_size): self.data_manager = data_manager.DataManager(csv_path, batch_size) self.queue = queue.Queue(self.data_manager) self.batch_size = batch_size
def run(self): print('Thread: {:<25} - '.format(self._iden) + 'Initializing and starting child threads: ') # ------------------------- Data Grid Initialization ------------------------- # self.data_grid = None # -------------------------- Threads Initialization -------------------------- # # Data Manager Thread self._thread_data_manager = data_manager.DataManager() self._thread_data_manager.start() self.data_queue = self._thread_data_manager.data_queue self._thread_dict[1] = self._thread_data_manager # Gui Thread self._thread_front_end = front_end_manager.FrontEndManager( self.command_queue) self._thread_front_end.start() self._thread_dict[2] = self._thread_front_end # Connectivity Threads # # QCX self._thread_connectivity_qcx = connectivity_qcx.ConnectivityQCX( self.data_queue) self._thread_connectivity_qcx.start() self._thread_dict[3] = self._thread_connectivity_qcx # BFX self._thread_connectivity_bfx = connectivity_bfx.ConnectivityBFX( self.data_queue) self._thread_connectivity_bfx.start() self._thread_dict[4] = self._thread_connectivity_bfx print('Thread: {:<25} - '.format(self._iden) + 'Child Thread Initialization complete!') # ------------------------------- MAIN LOOP ------------------------------- # while not self.stopped.is_set(): if not self.command_queue.empty(): command, payload = self.command_queue.get() if payload is None: self.command_handlers[command]() else: self.command_handlers[command](payload) self.__idle_state = False elif self.__idle_state: time.sleep(self.__sleep) else: if self.__idle_count == self.__idle_limit: self.__idle_count = 0 self.__idle_state = True else: self.__idle_count += 1 # ------------------------------------------------------------------------- # # Stop main has been triggered. Shut down all child threads. if self.stopped.is_set(): for x in range(len(self._thread_dict), 0, -1): self._thread_dict[x].stop_thread()
# Flight Deal Finder import datetime as dt import data_manager as dm import flight_search as fs import notification_manager as nm ORIGIN_CODE = "LON" # initialization data_manager = dm.DataManager() flight_search = fs.FlightSearch() notification_manager = nm.NotificationManager() # load the spreadsheet sheet = data_manager.get_sheet() # add missing codes for city in sheet: if city["iataCode"] == "": query_result = flight_search.find_city_code(city["city"]) city["iataCode"] = query_result[0]["code"] # update the row in the spreadsheet data_manager.update_entry(city) for city in sheet: # using the default time zone today = dt.datetime.now() tomorrow = today + dt.timedelta(days=1) in_six_months = today + dt.timedelta(days=(6 * 30))
def main(argv): del argv # Unused. tf_lib.config_gpu() asset_measures.TAX_ON_DIVIDENDS_PERCENTAGE = FLAGS.tax_on_dividends dense_measures.MAX_DAYS = FLAGS.max_days # Select and sort symbols. symbols = _get_symbols(FLAGS.symbols) # Download data or reload it from disk cache. dmgr = data_manager.DataManager(FLAGS.data) if not FLAGS.batch_size: symbols = dmgr.DownloadRawDataForList( symbols, max_age_days=FLAGS.max_age_days) else: n = FLAGS.batch_size batches = [ symbols[ii * n:(ii + 1) * n] for ii in range((len(symbols) + n - 1) // n)] symbols = [] for ii, batch in enumerate(batches): print(f'\n(Down)Loading data for {batch} (batch {ii} out of {len(batches)})') symbols = symbols + dmgr.DownloadRawDataForList( batch, max_age_days=FLAGS.max_age_days) # Calculate independent (from each other) derived information if not loaded from cache. num_symbols = len(symbols) for ii, symbol in enumerate(symbols): if not asset_measures.HasDerivedValues(dmgr.data[symbol]) or FLAGS.force_recalc: description = config_ib.SYMBOL_TO_INFO[symbol]['description'] logging.info(f'Calculating derived values for {symbol}: {description} - ({num_symbols - ii} missing)') dmgr.data[symbol] = asset_measures.AddDerivedValues( dmgr.data[symbol], dmgr.dividends[symbol], symbol) dmgr.SaveData(symbol) # Calculate dense ordered arrays. fields, mask, all_serials = dense_measures.DenseMeasureMatrices( dmgr.data, symbols) # Extra metrics calculated in Tensorflow fields['AdjustedLogDailyGain'] = optimizations.adjusted_log_gains( fields['LogDailyGain'], FLAGS.loss_cost, FLAGS.gain_power) # Get total assets and mask for only symbols with total_assets. total_assets = _get_total_assets(dmgr, symbols, mask) # Print out gains for each symbol. # Header of all outputs. print('Symbol,Gain,Adjusted Gain,Initial,Final,Total Assets,Description') if 'average' in FLAGS.stats: average(symbols, mask, fields, all_serials) if 'commons' in FLAGS.stats: average(symbols, mask, fields, all_serials, total_assets=total_assets) # if 'greedy' in FLAGS.stats: # greedy(symbols, mask, fields) if 'mix' in FLAGS.stats: mix_previous_period(symbols, mask, fields, all_serials) if 'per_asset' in FLAGS.stats: per_asset_gains(symbols, mask, fields, total_assets) if 'selection' in FLAGS.stats: assets_selection(symbols, mask, fields, all_serials)
# Create a Pub/sub channel to blast out new generation signals. logging.info("Listening for ZeroMQ pub/sub clients on port %d.", constants.ZMQ_PUBSUB_PORT) zmq_publisher = zmq_publisher.ZmqPublisher(zmq_factory, constants.ZMQ_PUBSUB_PORT, data_manager) # Create a Data Manager instance that changes the sql backend's # pointers for which db is queried and which db is updated. data_manager = data_manager.DataManager( options.db_file, db_dir=options.db_dir, signal_channel=zmq_publisher, archive_db_dir=options.archive_db_dir, generation_duration_s=options.generation_duration_s) def reread_views(views_file): try: mtime = os.path.getmtime(views_file) except: return if reread_views.last_mtime < mtime: logging.info("Reading views file '%s'.", views_file) reread_views.last_mtime = mtime data_manager.read_views(views_file) reread_views.last_mtime = 0
def main(_): """ main function """ dataset = data_manager.DataManager() model_dir = '../../runs/bag/{}'.format(FLAGS.model) time_str = datetime.datetime.now().isoformat() print('{}: start test'.format(time_str)) print('reading wordembedding') wordembedding = np.load('../../data/bag_data/vec.npy') settings = NETwork.Settings() settings.vocab_size = len(wordembedding) settings.filter_sizes = list(map(int, FLAGS.filter_sizes.split(','))) settings.pattern_num = FLAGS.pattern_num settings.l2_reg_omega = FLAGS.l2_reg_omega #checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir + FLAGS.model + '/checkpoints/') checkpoint_file = FLAGS.checkpoint_dir + FLAGS.model + '/model-best' graph = tf.Graph() with graph.as_default(): gpu_options = tf.GPUOptions(allow_growth=True) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options) sess = tf.Session(config=session_conf) with sess.as_default(): # Load the saved meta graph and restore variables time_str = datetime.datetime.now().isoformat() print('{}: construct network...'.format(time_str)) # saver = tf.train.import_meta_graph('{}.meta'.format(checkpoint_file)) # saver.restore(sess, checkpoint_file) network = NETwork.CNN(word_embeddings=dataset.wordembedding, settings=settings, is_training=False, is_evaluating=True, use_types=FLAGS.use_types) saver = tf.train.Saver() time_str = datetime.datetime.now().isoformat() print('{}: restore checkpoint file: {}'.format( time_str, checkpoint_file)) saver.restore(sess, checkpoint_file) # test one entity relation mentions time_str = datetime.datetime.now().isoformat() print('{}: testing...'.format(time_str)) def eval_step(test_word, test_pos1, test_pos2, test_type, test_y): num_batches_per_epoch = int( (len(test_y) - 1) / settings.batch_size) + 1 for batch_num in range(num_batches_per_epoch): start_index = batch_num * settings.batch_size end_index = min((batch_num + 1) * settings.batch_size, len(test_y)) if (end_index - start_index) != settings.batch_size: start_index = end_index - settings.batch_size word_batch = test_word[start_index:end_index] pos1_batch = test_pos1[start_index:end_index] pos2_batch = test_pos2[start_index:end_index] type_batch = test_type[start_index:end_index] y_batch = test_y[start_index:end_index] attentions = eval_op(word_batch, pos1_batch, pos2_batch, type_batch, y_batch) print(attentions) def eval_op(word_batch, pos1_batch, pos2_batch, type_batch, y_batch): """ evaluate a batch """ total_word = [] total_pos1 = [] total_pos2 = [] total_type = [] total_shape_batch = [] total_num = 0 for i in range(len(word_batch)): total_shape_batch.append(total_num) total_num += len(word_batch[i]) for j in range(len(word_batch[i])): total_word.append(word_batch[i][j]) total_pos1.append(pos1_batch[i][j]) total_pos2.append(pos2_batch[i][j]) total_type.append(type_batch[i][j]) # Here total_word and y_batch are not equal, total_word[total_shape[i]:total_shape[i+1]] is related to y_batch[i] total_shape_batch.append(total_num) total_shape_batch = np.array(total_shape_batch) total_word = np.array(total_word) total_pos1 = np.array(total_pos1) total_pos2 = np.array(total_pos2) total_type = np.array(total_type) feed_dict = { network.input_word: total_word, network.input_pos1: total_pos1, network.input_pos2: total_pos2, network.input_type: total_type, #self.network.input_y: y_batch, network.total_shape: total_shape_batch, network.dropout_keep_prob: 1.0 } attentions = sess.run([network.attention], feed_dict) return attentions eval_step(dataset.test_word, dataset.test_pos1, dataset.test_pos2, dataset.test_type, dataset.test_y)
# This file will need to use the DataManager,FlightSearch, FlightData, NotificationManager classes to achieve the program requirements. import requests import os import data_manager import notification_manager import flight_search # Initialise class references flight_finder = flight_search.FlightSearch() flight_data = data_manager.DataManager() twilio = notification_manager.NotificationManager() # Load up current flight data flight_data.get_data() # Update all city codes for row in flight_data.data: #1. Update IATA Code iata_code = row['iataCode'] if len(iata_code) == 0: print(row['city']) this_code = flight_finder.get_iata_code(row['city']) print(row) row['iataCode'] = this_code print(row) flight_data.set_data(row) # 2. Get lowest price search_results = flight_finder.get_price(iata_code)
"""This module marks the starting point of the program (which means people should run the program here).""" import data_manager as dm import user_interface as ui import input_processor as ip if __name__ == "__main__": ip.InputProcessor(ui.UserInterface(), dm.DataManager()).execute_program()
def __init__(self, telegramBot, remote_service): self.telegramBot = telegramBot self.remote_service = remote_service self.data_manager = data_manager.DataManager() self.conv_parameter_list = ConvParameterList()
class Reportclient: __report_data__ = None __data_manager = dm.DataManager() # externally referenced constants # report config props CONFIG_REPORT_TYPE = 'report_type' CONFIG_REPORT_FORMAT = 'report_format' CONFIG_START_DATE = 'start_date' CONFIG_END_DATE = 'end_date' CONFIG_REPORT_NAME = 'report_name' # data keys REPORT_TERM_SUIT = 'suit' REPORT_TERM_RANK = 'rank' REPORT_TERM_REVERSE = 'reversed' REPORT_TERM_SUIT_RANK_DATE = 'suit_rank_date' REPORT_TERM_DATE = 'date' # report types REPORT_TITLE_SUITS = 'Count by Suit' REPORT_TITLE_REVERSE = 'Count of Reverses' REPORT_TITLE_SUIT_RANK_DATE = 'Suits by Rank Over Time' REPORT_TYPES = { REPORT_TERM_SUIT: REPORT_TITLE_SUITS, REPORT_TERM_REVERSE: REPORT_TITLE_REVERSE, REPORT_TERM_SUIT_RANK_DATE: REPORT_TITLE_SUIT_RANK_DATE } def __init__(self): logging.basicConfig(format='%(asctime)s %(message)s') logging.basicConfig(filename='tarottracker.log', level=logging.DEBUG) self.__report_data__ = self.__data_manager.get_cache() def get_daily_tt_list(self): data = self.get_report_data() daily_tt_list = [] for daily_tt_id in data: daily_tt_list.append(data[daily_tt_id]) # sort objects by date daily_tt_list.sort(key=lambda x: x['date']) return daily_tt_list # returns all members of data_set whose dates # are within the range defined by start_date and end_date def filter_by_date(self, start_date, end_date, data_set={}): from datetime import date filtered_data = {} sd = None # start_date ed = None # end_date # assumes input format: mm/dd/yyyy # returns (yyyy, mm, dd) def get_date_vals(date_str): dtlst = date_str.split('/') return (int(dtlst[2]), int(dtlst[0]), int(dtlst[1])) # loop data_set and then by filter dates for daily_key in data_set: test_date = None daily = None if self.REPORT_TERM_DATE in data_set[daily_key]: daily = data_set[daily_key] test_date = daily[self.REPORT_TERM_DATE] try: # convert start to datetime if sd is None: (y, m, d) = get_date_vals(start_date) sd = date(y, m, d) # convert end_date to datetime if ed is None: (y, m, d) = get_date_vals(end_date) ed = date(y, m, d) # convert test_date to date time (y, m, d) = get_date_vals(test_date) test_date = date(y, m, d) except ValueError as verr: print('date input error: y - {} m - {} d - {}'.format( y, m, d)) print('db id: {}'.format(daily_key)) print('data model: {}'.format(str(daily))) raise verr # true if start_date <= test <= end_date if sd <= test_date <= ed: filtered_data[daily_key] = daily return filtered_data def get_report_data(self): return self.__report_data__ def set_report_data(self, new_data): self.__report_data__ = new_data # TODO: make these generic: get bar chart, get piechart # get chart of a specific number by date # specific card by date # returns a reference to a generated bar chart in svg format def get_suit_chart(self, config): data = self.get_report_data() chart_title = "Number of each suit for entire date range of the data set" if self.CONFIG_START_DATE in config: data = self.filter_by_date(config[self.CONFIG_START_DATE], config[self.CONFIG_END_DATE], data) chart_title = "Number of each suit for the date range of {} -- {}".format( config[self.CONFIG_START_DATE], config[self.CONFIG_END_DATE]) daily_tt_list = [] for daily_tt_id in data: daily_tt_list.append(data[daily_tt_id]) # sort objects by date daily_tt_list.sort(key=lambda x: x['date']) suit_count = {} # loop the objects and build a count of suits for daily_tt in daily_tt_list: # get daily_tt date daily_date = daily_tt[self.REPORT_TERM_DATE] if self.REPORT_TERM_SUIT in daily_tt: if daily_tt[self.REPORT_TERM_SUIT] in suit_count: suit_count[daily_tt[self.REPORT_TERM_SUIT]] += 1 else: suit_count[daily_tt[self.REPORT_TERM_SUIT]] = 1 # prep data set for the pygal chgart chart = pygal.Bar() chart.title = chart_title for suit in suit_count: chart.add(suit, suit_count[suit]) path_to_chart = 'suit_pie_chart.svg' chart.render_to_file(path_to_chart) return path_to_chart # returns a reference to a pie chart of reverses v upright in svg format def get_reverse_chart(self, config): path_to_chart = '' daily_tt_data = self.get_report_data() key_rev = 'reversed' key_up = 'upright' reversed_count = {key_rev: 0, key_up: 0} chart_title = "Number of each suit for entire date range of the data set" if self.CONFIG_START_DATE in config: daily_tt_data = self.filter_by_date(config[self.CONFIG_START_DATE], config[self.CONFIG_END_DATE], daily_tt_data) chart_title = "Number of each suit for the date range of {} -- {}".format( config[self.CONFIG_START_DATE], config[self.CONFIG_END_DATE]) # loop the objects and build a count of reverses for daily_tt_key in daily_tt_data: testme = daily_tt_data[daily_tt_key] if self.REPORT_TERM_REVERSE in testme: if testme[self.REPORT_TERM_REVERSE] == 't': reversed_count[key_rev] += 1 else: reversed_count[key_up] += 1 # prep data set for the pygal chgart chart = pygal.Pie() chart.title = chart_title for card_align in reversed_count: chart.add(card_align, reversed_count[card_align]) path_to_chart = 'reversed_chart.svg' chart.render_to_file(path_to_chart) return path_to_chart # x is date # y is rank # group by suit def get_suit_rank_date_chart(self, config): path_to_chart = 'suit_rank_date_chart' daily_tt_data = self.get_report_data() chart_title = "Suit/Rank Clustering by Date" if self.CONFIG_START_DATE in config: daily_tt_data = self.filter_by_date(config[self.CONFIG_START_DATE], config[self.CONFIG_END_DATE], daily_tt_data) chart_title = "Suit and Rank Plot for the Date Range of {} -- {}".format( config[self.CONFIG_START_DATE], config[self.CONFIG_END_DATE]) # each suit is a key in a dict, each val is a list containing these as items: # (datetime(2013, 1, 2), 300) # (yyyy,m,d), rank chart_data_set = {} for dtt in daily_tt_data: data = daily_tt_data[dtt] (month, day, year) = data['date'].split("/") # datetime and card obj import datetime dailytt_date = datetime.date(int(year), int(month), int(day)) card = Card() try: card.suit = data[self.REPORT_TERM_SUIT] card.rank = int(data[self.REPORT_TERM_RANK]) except KeyError as kex: print('caught bad key {} for data {} with id {}'.format( kex, data, dtt)) dt = Daily_TT(dailytt_date, card) chart_keys = chart_data_set.keys() if card.suit in chart_keys: chart_data_set[card.suit].append((dt.date, dt.card.rank)) else: chart_data_set[card.suit] = [(dt.date, dt.card.rank)] logging.info('suits by date data looks like: {}'.format( str(chart_data_set))) return self.get_date_xy_chart(data=chart_data_set) def get_date_xy_chart(self, **kwargs): default_data = {} default_path = 'date_xy_chart.svg' data = kwargs.pop('data', default_data) path = kwargs.pop('path', default_path) datetimechart = pygal.DateTimeLine(stroke=False, dots_size=10) for category in data: datetimechart.add(category, data[category]) datetimechart.render_to_file(path) return path def get_pie_chart(self, config): pass def get_bar_chart(self): pass def get_line_chart(self): pass # takes a report class object and generates a pdf file def get_pdf(self, report): pass def get_html(self, report): doc, tag, text = Doc().tagtext() doc.asis('<!DOCTYPE html>') with tag('html'): with tag('body'): with tag('h1'): text(report.title) #loop report sections and output for section in report.sections: if section.title: with tag('h4'): text(section.title) if section.media: with tag('figure'): doc.stag('embed', type='image/svg+xml', src=section.media) if section.comments: with tag('div'): for comment in section.comments: text(comment) doc.stag('br') return doc.getvalue() # returns a report object that can be rendered in various formats def get_report(self, config={}): report = Report() report.title = 'no config or report type requested' if self.CONFIG_REPORT_TYPE in config: report_section = {} if config[self.CONFIG_REPORT_TYPE] == self.REPORT_TERM_SUIT: report.title = self.REPORT_TITLE_SUITS chart = self.get_suit_chart(config) report_section['comments'] = ['Distribution of suits.'] report_section['media'] = chart report.make_section( report_section) # return {'message': message} elif config[self.CONFIG_REPORT_TYPE] == self.REPORT_TERM_REVERSE: report.title = self.REPORT_TITLE_REVERSE rev_chart = self.get_reverse_chart(config) report_section['comments'] = [ 'Distribution of reverse v upright cards.' ] report_section['media'] = rev_chart report.make_section(report_section) elif config[self. CONFIG_REPORT_TYPE] == self.REPORT_TERM_SUIT_RANK_DATE: report.title = self.REPORT_TITLE_SUIT_RANK_DATE srd_chart = self.get_suit_rank_date_chart(config) report_section['comments'] = [ 'Distribution of suits over time.' ] report_section['media'] = srd_chart report.make_section(report_section) else: report.title = 'report type ' + config[ self.CONFIG_REPORT_TYPE] + ' not found' return report
'''function to concatenate 2 Data data1 first Data data2 second Data ''' def addFeatures(self, Data1, Data2): newData = np.column_stack((Data1, Data2)) return newData if __name__ == "__main__": if len(argv)==1: # Use the default input and output directories if no arguments are provided input_dir = "../public_data" output_dir = "../res" else: input_dir = argv[1] output_dir = argv[2] basename = 'movierec' D = data_manager.DataManager(basename, input_dir) # Load data print("*** Original data ***") print D d1 = D.data['X_train'] # Here show something that proves that the preprocessing worked fine print("*** Transformed data ***") #print d2
def __init__(self): self.dm = data_manager.DataManager()
"""Asks the user to input an email address and returns it as a STR.""" while True: email = input("> ") # make sure something was entered if email == "": print("Please enter an email address.") # check that it looks like an email address # just a simple validation, without using regular expressions elif email.find("@") == -1 or email.find(".") == -1: print("Please enter a valid email address.") else: return email # initialization prices = dm.DataManager("prices") # using a different object to handle users users = dm.DataManager("users") flight_search = fs.FlightSearch() notification_manager = nm.NotificationManager() # load the worksheets prices_sheet = prices.get_sheet() users_sheet = users.get_sheet() # just for some basic interaction print("Enter \"y\" if you want to add a new user.") choice = input("> ").lower() if choice == "y": add_new_user()
def main(): parser = argparse.ArgumentParser(description='Websites Dynamic Rating') parser.add_argument('-b', '--basepath', help='The base path to store the data set', type=str, required=False, default=str( os.path.join(os.path.expanduser('~'), 'dataset')), dest='basepath') parser.add_argument('-i', '--init', action='store_true', default=False, dest='init', help='initail environment') parser.add_argument('-u', '--pages', action='store_true', default=False, dest='pages', help='prepare pages by url') parser.add_argument('-d', '--dataset', action='store_true', default=False, dest='dataset', help='prepare dataset') parser.add_argument('-P', '--prepareall', action='store_true', default=False, dest='prepareall', help='inital, prepare pages, prepare dataset') parser.add_argument('-t', '--train', help='The base path to store the data set', type=str, required=False, default='SGD', dest='train') parser.add_argument('-T', '--trainall', action='store_true', default=False, dest='trainall', help='train all data with all methods') parser.add_argument('-A', '--all', action='store_true', default=False, dest='all', help='do all') args = parser.parse_args() dm = data_manager.DataManager(base_path=args.basepath) if args.init or args.prepareall or args.all: logger.info("Initail Environment") dm.init_environment() if args.pages or args.prepareall or args.all: logger.info("preparing websites pages by url") dm.prepare_websites_pages_by_url_files() if args.dataset or args.prepareall or args.all: logger.info("preparing data set") dm.prepare_train_data() dm.prepare_valid_data() dm.load_categories_names() wrm = website_rating_model.WebsiteRatingModel(dm.train_path, dm.valid_path) wrm.load_data() if args.train.upper() == 'SGD' or args.trainall or args.all: logger.info('training with SGD') wrm.SGDClassifier_train_model() if args.train.upper() == 'MNB' or args.trainall or args.all: logger.info('training with MNB') wrm.MultinomialNB_train_model() if args.train.upper() == 'BNB' or args.trainall or args.all: logger.info('training with BNB') wrm.BernoulliNB_train_model() if args.train.upper() == 'SVC' or args.trainall or args.all: logger.info('training with SVC') wrm.SVC_train_model() if args.train.upper() == 'DTC' or args.trainall or args.all: logger.info('training with dtc') wrm.DecisionTreeClassifier_train_model()
def __init__(self, timeout=None, learning_rate=None, momentum=None, vc_min=None, nb_epoch=None, train_batch_size=None, vc_batch_size=None, test_batch_size=None): # Set logging config logging.basicConfig( stream=sys.stderr, level=logging.INFO) # DEBUG to debug, INFO to turn off self.logger = logging.getLogger(__name__) self.data_manager = data_manager.DataManager() # Loading trainer_mlp config file with open( os.path.join(self.data_manager.paths["abs_config_path"], "trainer_mlp.yaml"), "r") as stream: self.config = list(yaml.load_all(stream)) # Hyperparameters from config file for index, param in enumerate(self.config): if "timeout" in param: self.timeout = self.config[index]["timeout"] self.logger.info("Timeout set to: %f", self.timeout) else: self.logger.error("'timeout' parameter not found") if "learning_rate" in param: self.learning_rate = self.config[index]["learning_rate"] self.logger.info("learning_rate set to: %f", self.learning_rate) else: self.logger.error("'learning_rate' parameter not found") if "momentum" in param: self.momentum = self.config[index]["momentum"] self.logger.info("momentum set to: %f", self.momentum) else: self.logger.error("'momentum' parameter not found") if "train_batch_size" in param: self.train_batch_size = self.config[index]["train_batch_size"] self.logger.info("train_batch_size set to: %f", self.train_batch_size) else: self.logger.error("'train_batch_size' parameter not found") if "vc_batch_size" in param: self.vc_batch_size = self.config[index]["vc_batch_size"] self.logger.info("vc_batch_size set to: %f", self.vc_batch_size) else: self.logger.error("'vc_batch_size' parameter not found") if "test_batch_size" in param: self.test_batch_size = self.config[index]["test_batch_size"] self.logger.info("test_batch_size set to: %f", self.test_batch_size) else: self.logger.error("'test_batch_size' parameter not found") if "nb_epoch" in param: self.nb_epoch = self.config[index]["nb_epoch"] self.logger.info("nb_epoch set to: %f", self.nb_epoch) else: self.logger.error("'nb_epoch' parameter not found") if "vc_min" in param: self.vc_min = self.config[index]["vc_min"] self.logger.info("vc_min set to: %f", self.vc_min) else: self.logger.error("'vc_min' parameter not found") # Loading MLP config file with open( os.path.join(self.data_manager.paths["abs_config_path"], "mlp.yaml"), "r") as stream: self.config = yaml.load(stream) # MLP Hyperparameters from config file for index, param in enumerate(self.config): if "nb_layer" in param: self.nb_layer = self.config[index]["nb_layer"] self.logger.info("nb_layer set to: %f", self.nb_layer) else: self.logger.error("'nb_layer' parameter not found") if "nb_input" in param: self.nb_input = self.config[index]["nb_input"] self.logger.info("nb_input set to: %f", self.nb_input) else: self.logger.error("'nb_input' parameter not found") if "nb_hidden" in param: self.nb_hidden = self.config[index]["nb_hidden"] self.logger.info("nb_hidden set to: %f", self.nb_hidden) else: self.logger.error("'nb_hidden' parameter not found") if "nb_output" in param: self.nb_output = self.config[index]["nb_output"] self.logger.info("nb_output set to: %f", self.nb_output) else: self.logger.error("'nb_output' parameter not found") if "activation" in param: self.activation = self.config[index]["activation"] self.logger.info("activation set to: %s", str(self.activation)) else: self.logger.error("'activation' parameter not found") if "filter" in param: self.filter = self.config[index]["filter"] self.logger.info("filter set to: %s", str(self.filter)) else: self.logger.error("'filter' parameter not found") # Variable self.data_tree = None self.batchs = [] self.Ys = [] self.E = [] self.dW = [] self.vc_pourcents = [] self.test_pourcents = []
future_date = get_date(today, days=SIX_MONTHS_IN_DAYS) flight_params = { 'apikey': fs.API_KEY, 'fly_from': DEPARTURE_IATA, 'date_from': tomorrow, 'date_to': future_date, 'nights_in_dst_from': 7, 'nights_in_dst_to': 28, 'flight_type': 'round', 'one_for_city': 1, 'adults': 1, 'curr': 'GBP' } data_manager = dm.DataManager(dm.ENDPOINT, dm.SHEET, dm.TOKEN) for row in data_manager.data['prices']: if len(row['iataCode']) != 0: continue city = row['city'] params = {'apikey': fs.API_KEY, 'location_types': 'city', 'term': city} flight_search = fs.FlightSearch(fs.API_LOCATIONS, fs.ENDPOINT, params) row['iataCode'] = flight_search.search_locations('name', city, 'code') data_manager.put_data(row) iata_codes = [row['iataCode'] for row in data_manager.data['prices']] notification_manager = nm.NotificationManager(nm.EMAIL, nm.PASSWORD) for idx, iata_code in enumerate(iata_codes): flight_params.update(fly_to=iata_code, max_stopovers=0) flight_search = fs.FlightSearch(fs.API_SEARCH, fs.ENDPOINT, flight_params)
import data_manager, flight_data, flight_search, json #This file will need to use the DataManager,FlightSearch, FlightData, NotificationManager classes to achieve the program requirements. dm = data_manager.DataManager() data = dm.get_data() print(data) fs = flight_search.FlightSearch(max_stay=28, min_stay=7, currency="AUD") for d in data: sample = fs.get_flight(from_city="MEL", to_city=d['iataCode'], lowest_price=d['lowestPrice'])
def new_data_manager(self): import data_manager data_manager = data_manager.DataManager() log.debug('loaded data manager {!r}'.format(data_manager)) return data_manager