def update_job_status(self, job_id): """Updates a spark job's status based on information from getJobInfo().""" v = self.jobs[job_id] jobinfo = self.status.getJobInfo(v["spark_id"]) done_string = 'false' if jobinfo.status == 'RUNNING' else 'true' total_total = v["chunk_size"] * v["chunks"] if v["finished"] == v["started"] and done_string == "true": v["chunks_done"] += 1 if v["chunks_done"] == v["chunks"]: v["finished"] = datetime.now() total_done = v["chunk_size"] * v["chunks_done"] else: (nTasks, nActive, nComplete) = (0, 0, 0) for sid in jobinfo.stageIds: stageinfo = self.status.getStageInfo(sid) if stageinfo: nTasks += stageinfo.numTasks nActive += stageinfo.numActiveTasks nComplete += stageinfo.numCompletedTasks if v["chunks"] > 0 and v["chunk_size"] == 0: v["chunk_size"] = nTasks total_done = v["chunk_size"] * v["chunks_done"] + nComplete total_done = min(total_done, total_total) my_print("Setting job totals: %d %d %d %d %d" % (v["chunk_size"], v["chunks"], v["chunks_done"], total_total, total_done)) self.db.query(''' UPDATE jobs SET tasks_done=%d, tasks_total=%d, status='%s', done=%s, finish='%s' WHERE id=%d ''' % (total_done, total_total, jobinfo.status, done_string, str(self.jobs[job_id]["finished"]), job_id))
def get_webpage_content(self, url): # return str html try: req = urllib.request.Request(url) response = urllib.request.urlopen(req) webpage_str = response.read() return str(webpage_str) except Exception as e: my_print("error reading:" + url) return None
def update_all_jobs_callback(self): """For each job, checks whether its status has changed.""" try: my_print("updating spark jobs status...") for job_id, v in self.jobs.iteritems(): if v["finished"] == v["started"]: self.update_job_status(job_id) finally: tornado.ioloop.IOLoop.instance().add_timeout( timedelta(seconds=5), self.update_all_jobs_callback)
def get_acc(param): if len(data.training_input) < config.training_segment: print_error("training segment > data number") return False d = 0.0 # Run all seq between [training_segment, training_input_len) for i in range(config.training_segment, data.training_input_len + 1, RNN.config.batch_num): c = data.training_input_sequential_selection(RNN.config.batch_num, i) d += RNN.train.cost(c[0], c[1]) # print(RNN.train.dimshuffle(c[0])) my_print("Cost", d)
def train(self, X, Y, ordered_topics, balance='p', max_per_class=100): topic = '' for k in xrange(self.K): topic = ordered_topics[k] Y_k = Y[:,k:k+1] self.svms[k].train(X, Y_k, balance=balance, max_per_class=max_per_class, topic=topic) util.my_print('%d . . .' % (k+1), same_line=True)
def get_lastest_page_info(self, latest_webpage_str): my_print("get page info") date = re.findall(r"Date:\s+(.*),\s+2020<br/>", latest_webpage_str) positive_cases = re.findall(r"([,\d]+)[\\\d\w\s]+Positive cases", latest_webpage_str) death = re.findall(r"([,\d]+)[\\\d\w\s]+Deaths", latest_webpage_str) pending = re.findall(r" least ([\+,\d]+) results have been received and another ([\+,\d]+) are pending", latest_webpage_str) return { 'date': date[0] if len(date) > 0 else None, 'positive_cases': positive_cases[0] if len(positive_cases) > 0 else None, 'death': death[0] if len(death) > 0 else None, 'results received': pending[0][0] if len(pending) else None, 'results pending': pending[0][1] if len(pending) else None }
def main(): """Creates tornado application, handles keyboard interrupts (to release the http socket).""" global args, config parser = argparse.ArgumentParser(description='IMS webserver.') parser.add_argument('--config', dest='config', type=str, help='config file name') parser.add_argument('--port', dest='port', type=int, help='port on which to access the web server') parser.add_argument('--profile', dest='time_profiling_enabled', action='store_true') parser.add_argument('--use-deprecated', dest='use_deprecated', action='store_true') parser.add_argument('--debug', dest='debug', action='store_true') parser.set_defaults(spark=False, config='config.json', port=8080, time_profiling_enabled=False, use_deprecated=False) args = parser.parse_args() # handlers.args = args with open(args.config) as f: config = json.load(f) port = args.port torn_app = Application(args.debug) http_server = tornado.httpserver.HTTPServer(torn_app) http_server.listen(port) my_print("Starting server, listening to port %d..." % port) try: # set periodic updates if args.spark: tornado.ioloop.IOLoop.instance().add_timeout( timedelta(seconds=5), torn_app.update_all_jobs_callback) # start loop tornado.ioloop.IOLoop.instance().start() except KeyboardInterrupt: my_print('^C received, shutting down server') if args.spark: torn_app.sc.stop() http_server.stop()
def __init__(self, args, qry_file, printDet=False): self.queries, self.thres_10, self.thres_100, self.thres_1000, self.qids, self.qterms, _ = read_queries_and_thres( qry_file, 0, printDet) self.tensor_queries = create_tensors_from_np(self.queries) self.qlens = [] for qt in self.qterms: self.qlens.append(len(qt)) if args.k == 10: self.tensor_thres = create_thresholds(self.thres_10) if args.k == 100: self.tensor_thres = create_thresholds(self.thres_100) if args.k == 1000: self.tensor_thres = create_thresholds(self.thres_1000) my_print("dataset statistics:", qry_file) my_print("\tqueries =", len(self.queries))
def main(): scrapper = Scrapper() # find all occurences of /Programs/OPA/Pages/NR.*.aspx all_urls = re.findall(r"(/Programs/OPA/Pages/NR20-\d+.aspx)", scrapper.get_webpage_content(main_page_url)) all_urls = sorted(all_urls, key=cmp_to_key(scrapper.get_latest_date)) # get latest page content latest = all_urls[-3] latest_webpage_str = scrapper.get_webpage_content(website_url + latest) dedup_url = {} for url in all_urls[-10:]: if url not in dedup_url: dedup_url[url] = True my_print(url) content = scrapper.get_webpage_content(website_url + url) if content: # print(content) print(scrapper.get_lastest_page_info(content))
def train(model, epoch, data, quant): dataloader = DataLoader(data, batch_size=args.batch_size, shuffle=True) quantiles = torch.tensor([[quant]]).view(1, -1).float().to(args.device) model.train() optim = torch.optim.Adam(model.parameters(), lr=lr) my_print("epoch {} start training {} instances with lr {}".format( epoch, len(dataloader), lr)) with tqdm(total=len(dataloader), unit='batches', desc='train') as pbar: losses = [] for batch_num, batch in enumerate(dataloader): optim.zero_grad() queries, thres = batch scores = model(queries.to(args.device)) loss = quantile_loss(scores, thres.to(args.device), quantiles) losses.append(loss.item()) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optim.step() pbar.set_postfix(loss=np.mean(losses[-128:])) pbar.update(1) pbar.close()
help='quantiles', type=float, required=True) parser.add_argument('--device', default="cpu", type=str, required=False, help='compute device') parser.add_argument('--debug', default=False, dest='debug', action='store_true') args = parser.parse_args() init_log(args) torch.set_num_threads(hyperparams.default_threads) my_print("Parameters:") for k, v in sorted(vars(args).items()): my_print("\t{0}: {1}".format(k, v)) # Set the random seed manually for reproducibility. torch.manual_seed(hyperparams.random_seed) ############################################################################### # Load data ############################################################################### if torch.cuda.is_available(): if args.device != "cpu": torch.cuda.set_device(int(args.device.split(":")[1])) args.device = torch.device(args.device) else: args.device = torch.device('cuda')
def print_status(param): if len(param) == 0 or param[0] == "testbench": print("========== TestBench Status ==========") my_print("Train input file", tc.train_input_file) my_print("Train answer file", tc.train_answer_file) my_print("Test input file", tc.test_input_file) my_print("Train input file", tc.test_output_file) if len(param) == 0 or param[0] == "rnn": print("========== RNN Status ================") my_print("Input dimension", rc.input_dim) my_print("Output dimension", rc.output_dim) my_print("Hidden layer list", rc.hidden_layer_dim_list) my_print("Batch size", rc.batch_num) my_print("Learning rate", rc.learning_rate) print("======================================")