def start(self): # Decide how many parallel processes to use available_cpus = multiprocessing.cpu_count() if self.Nprocesses == 'max': self.Nprocesses = available_cpus elif self.Nprocesses > available_cpus: self.Nprocesses = available_cpus # Initialize lists keeping track of read/write pipes pipes_end_host = list(np.zeros(self.Nprocesses)) pipes_end_worker = list(np.zeros(self.Nprocesses)) processes = list(np.zeros(self.Nprocesses)) # Start multiple processes for i in range(self.Nprocesses): pipes_end_host[i], pipes_end_worker[i] = multiprocessing.Pipe() processes[i] = multiprocessing.Process(target=_worker_call, args=( self.loopfunc, pipes_end_worker[i], )) processes[i].start() # Variables to keep track of jobs started/done Njobs_done = 0 Njobs_started = 0 # Send initial jobs for r in pipes_end_host: r.send(self.next()) Njobs_started += 1 # Some parameters for diagnostics t_start = time.time() message = 'Datarate %.2f Hz; job %i/%i; process' # This is the main loop, it waits for new jobs and sends the input while Njobs_done < self.Njobs: for r in pipes_end_host: if r.poll(): result = r.recv() if Njobs_started < self.Njobs: r.send(self.next()) Njobs_started += 1 self.save(result) Njobs_done += 1 # Give some feedback to the command line once in a while if ((Njobs_done + 1) % self.framerate_update) == 0: progress = float(Njobs_done) / self.Njobs datarate = (Njobs_done + 1) / (time.time() - t_start) utils.progressbar( progress, message % (datarate, Njobs_done + 1, self.Njobs), t_start) # Close all processes for i in range(self.Nprocesses): pipes_end_host[i].send('fika') processes[i].join() pipes_end_host[i].close()
def new(data_path: str) -> Index: """ Construct new search Index. Read all files that match pattern *.xml from provided data_path and pass them to Index instance. @params: data_path - Required: path in the filesystem with data files """ print("Reading data files ...") docs = {} listdir = os.listdir(data_path) total = len(listdir) for i, path in enumerate(listdir, start=1): if path.endswith('.xml'): doc_id = path.split(".")[0] full_path = os.path.join(data_path, path) with codecs.open(full_path, "r", encoding='utf-8', errors='ignore') as f: data = f.read() docs[doc_id] = (path, data) if not i % 100 or i == total: progressbar(i, total) print("Reading data files done.") return Index(docs=docs)
def __call__(self, url_req, segments=2): url_size = self.flitter.get_url_size(url_req) ranges = self.split_segment(url_size, segments) output = self.flitter.get_url_file_name(url_req) filename = ["%s_tmp_%d.pfb" % (output, i) for i in xrange(segments)] tasks = [] for i in xrange(segments): task = SegmentingThread(self._opener, url_req, filename[i], ranges[i]) task.start() tasks.append(task) time.sleep(0.5) while self._islive(tasks): fetched = sum([task.fetched for task in tasks]) utils.progressbar(url_size, fetched) fileobj = open(output, 'wb+') try: for i in filename: with open(i, 'rb') as f: shutil.copyfileobj(f, fileobj) os.remove(i) finally: fileobj.close() finished_size = os.path.getsize(output) if abs(url_size - finished_size) <= 10: utils.progressbar(url_size, finished_size, 100)
def do_epoch(sess, model, iterator, mode, epoch): total_steps = len(iterator) total_loss = np.float64(0.0) gt_next_state, au_next_state = None, None prefix_str = "{epoch:03d}/{epochs} {mode: <10} |{avrg_loss:.4f}| " records = {} for step, batch in enumerate(iterator): # print("train step", step) imfiles, commands = batch feed_list = [imfiles, commands, args.sequence_length, args.batch_size, args.keep_prob] feed_dict = dict(zip(model.placeholders, feed_list)) if gt_next_state is not None: feed_dict.update({model.ground_truth_prev_state: gt_next_state}) if au_next_state is not None: feed_dict.update({model.autoregressive_prev_state: au_next_state}) if mode == "Training": # options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() au_next_state, gt_next_state, loss, _ = sess.run( [model.autoregressive_next_state, model.ground_truth_next_state, model.loss, model.optimization], feed_dict=feed_dict) #, options=options, run_metadata=run_metadata) # Create the Timeline object, and write it to a json file # fetched_timeline = timeline.Timeline(run_metadata.step_stats) # chrome_trace = fetched_timeline.generate_chrome_trace_format() # with open('timeline_01.json', 'w') as f: # f.write(chrome_trace) # break elif mode == "Validation": # Keep probability is 1.0 for validation feed_dict.update({model.placeholders[-1]: 1.0}) au_next_state, loss, predictions = sess.run( [model.autoregressive_next_state, model.loss, model.predictions], feed_dict=feed_dict) imfiles = imfiles[:, args.lookback_length:].flatten() shape = commands.shape commands = np.reshape(commands, (shape[0]*shape[1], -1)) predictions = np.reshape(predictions, commands.shape) record = np.stack([commands, predictions, np.square(commands - predictions)]) for index, imfile in enumerate(imfiles): records[imfile] = record[:, index] total_loss += loss prefix = prefix_str.format(epoch=epoch, epochs=args.num_epochs, mode=mode, avrg_loss=total_loss / (step + 1)) utils.progressbar(total_steps, step + 1, prefix) avrg_loss = total_loss / total_steps return (avrg_loss, records) if mode == "Validation" else avrg_loss
def Newey_West_by_time(self, q=2, tao=252): ''' 逐时间点计算协方差并进行Newey West调整 q: 假设因子收益为q阶MA过程 tao: 算协方差时的半衰期 ''' if self.factor_ret is None: raise Exception( 'please run reg_by_time to get factor returns first') Newey_West_cov = [] print( '\n\n===================================逐时间点进行Newey West调整=================================' ) for t in range(1, self.T + 1): try: Newey_West_cov.append(Newey_West(self.factor_ret[:t], q, tao)) except: Newey_West_cov.append(pd.DataFrame()) progressbar(t, self.T, ' date: ' + str(self.sorted_dates[t - 1])[:10]) self.Newey_West_cov = Newey_West_cov return (Newey_West_cov)
def update(self, observes, actions, advantages, use_lr_adjust, ada_kl_penalty): """ Update policy based on observations, actions and advantages Args: observes: observations, shape = (N, obs_dim) actions: actions, shape = (N, act_dim) advantages: advantages, shape = (N,) phi_value: phi_value, shape = (N,) phi_act_g: phi_act_g, shape = (N, act_dim) """ feed_dict = {self.obs_ph: observes, self.act_ph: actions, self.advantages_ph: advantages, self.beta_ph: self.beta, self.eta_ph: self.eta, self.lr_ph: self.lr * self.lr_multiplier, self.lr_phi_ph: self.lr_phi} old_means_np, old_log_vars_np = self.sess.run([self.means, self.log_vars], feed_dict) feed_dict[self.old_log_vars_ph] = old_log_vars_np feed_dict[self.old_means_ph] = old_means_np loss, kl, entropy = 0, 0, 0 if self.c_ph == 1.: # Update phi function & policy network logger.log("Training Phi for %d epochs"%self.phi_epochs) for _ in progressbar(range(self.phi_epochs), "Train Phi:", 25): self.sess.run(self.phi_train_op, feed_dict) phi_loss = self.sess.run(self.phi_loss, feed_dict) logger.record_tabular("Phi_loss", phi_loss) # Training policy logger.log("Training Policy for %d epochs"%self.epochs) for _ in progressbar(range(self.epochs), "Train Policy", 25): self.sess.run(self.train_op, feed_dict) loss, kl, entropy = self.sess.run([self.loss, self.kl, self.entropy], feed_dict) if kl > self.kl_targ * 4: # early stopping if D_KL diverges badly break if (ada_kl_penalty): if kl > self.kl_targ * 2: # servo beta to reach D_KL target self.beta = np.minimum(35, 1.5 * self.beta) # max clip beta if (use_lr_adjust): if self.beta > 30 and self.lr_multiplier > 0.1: self.lr_multiplier /= 1.5 elif kl < self.kl_targ / 2: self.beta = np.maximum(1 / 35, self.beta / 1.5) # min clip beta if (use_lr_adjust): if self.beta < (1 / 30) and self.lr_multiplier < 10: self.lr_multiplier *= 1.5 logger.record_dicts({ 'PolicyLoss': loss, 'PolicyEntropy': entropy, 'KL': kl, 'Beta': self.beta, '_lr_multiplier': self.lr_multiplier})
def start(self): # Decide how many parallel processes to use available_cpus = multiprocessing.cpu_count() if self.Nprocesses == 'max': self.Nprocesses = available_cpus elif self.Nprocesses > available_cpus: self.Nprocesses = available_cpus # Initialize lists keeping track of read/write pipes pipes_end_host = list(np.zeros(self.Nprocesses)) pipes_end_worker = list(np.zeros(self.Nprocesses)) processes = list(np.zeros(self.Nprocesses)) # Start multiple processes for i in range(self.Nprocesses): pipes_end_host[i], pipes_end_worker[i] = multiprocessing.Pipe() processes[i] = multiprocessing.Process(target=_worker_call, args=(self.loopfunc, pipes_end_worker[i],) ) processes[i].start() # Variables to keep track of jobs started/done Njobs_done = 0 Njobs_started = 0 # Send initial jobs for r in pipes_end_host: r.send(self.next()) Njobs_started += 1 # Some parameters for diagnostics t_start = time.time() message = 'Datarate %.2f Hz; job %i/%i; process' # This is the main loop, it waits for new jobs and sends the input while Njobs_done < self.Njobs: for r in pipes_end_host: if r.poll(): result = r.recv() if Njobs_started < self.Njobs: r.send(self.next()) Njobs_started += 1 self.save(result) Njobs_done += 1 # Give some feedback to the command line once in a while if ((Njobs_done + 1) % self.framerate_update) == 0: progress = float(Njobs_done) / self.Njobs datarate = (Njobs_done + 1) / (time.time() - t_start) utils.progressbar(progress, message %(datarate, Njobs_done + 1, self.Njobs), t_start) # Close all processes for i in range(self.Nprocesses): pipes_end_host[i].send('fika') processes[i].join() pipes_end_host[i].close()
def do_epoch(sess, model, iterator, mode, epoch): total_steps = len(iterator) total_loss = np.float128(0.0) gt_next_state, au_next_state = None, None prefix_str = "{epoch:03d}/{epochs} {mode: <10} |{avrg_loss:.4f}| " records = {} for step, batch in enumerate(iterator): imfiles, commands = batch feed_list = [imfiles, commands, args.sequence_length, args.batch_size, args.keep_prob] feed_dict = dict(zip(model.placeholders, feed_list)) if gt_next_state is not None: feed_dict.update({model.ground_truth_prev_state: gt_next_state}) if au_next_state is not None: feed_dict.update({model.autoregressive_prev_state: au_next_state}) if mode == "Training": au_next_state, gt_next_state, loss, _ = sess.run( [model.autoregressive_next_state, model.ground_truth_next_state, model.loss, model.optimization], feed_dict=feed_dict) elif mode == "Validation": # Keep probability is 1.0 for validation feed_dict.update({model.placeholders[-1]: 1.0}) au_next_state, loss, predictions = sess.run( [model.autoregressive_next_state, model.loss, model.predictions], feed_dict=feed_dict) imfiles = imfiles[:, args.lookback_length:].flatten() shape = commands.shape commands = np.reshape(commands, (shape[0]*shape[1], -1)) predictions = np.reshape(predictions, commands.shape) record = np.stack([commands, predictions, np.square(commands - predictions)]) for index, imfile in enumerate(imfiles): records[imfile] = record[:, index] total_loss += loss prefix = prefix_str.format(epoch=epoch, epochs=args.num_epochs, mode=mode, avrg_loss=total_loss / (step + 1)) utils.progressbar(total_steps, step + 1, prefix) avrg_loss = total_loss / total_steps return (avrg_loss, records) if mode == "Validation" else avrg_loss
def coco_tags(extractor): """ estimates tags from COCO captions Args: extractor (obj): Tag estimator Return: dict with all the info (see impl.) """ res = {} for set_ in ( 'train2014', 'val2014', ): res[set_] = {} coco_instances = COCO( join(settings.COCO_PATH, 'annotations/instances_{}.json'.format(set_))) coco_captions = COCO( join(settings.COCO_PATH, 'annotations/captions_{}.json'.format(set_))) print('generating tags for \'{}\' ...'.format(set_)) for im in progressbar(coco_instances.getImgIds()): # load captions cpt_anns = coco_captions.loadAnns( coco_captions.getAnnIds(imgIds=im)) cpt = [a['caption'] for a in cpt_anns] # load categories inst_anns = coco_instances.loadAnns( coco_instances.getAnnIds(imgIds=im)) cat_ids = list(set([a['category_id'] for a in inst_anns])) cat = sorted([coco_instances.cats[i]['name'] for i in cat_ids]) # tags and frequency counts tags, scores = extractor.process(cpt) res[set_][im] = { 'file_name': coco_instances.imgs[im]['file_name'], 'category_ids': cat_ids, 'category_names': cat, 'captions': cpt, 'tags': tags, 'scores': scores, } print('done') all_tags = [] for imdata in res['train2014'].values(): all_tags += [w for w in imdata['tags']] for imdata in res['val2014'].values(): all_tags += [w for w in imdata['tags']] all_tags = list(set(all_tags)) res['tags'] = all_tags return res
def wait(self, timeout, show_progress, **progressbar_args): iterator = self._wait(timeout) if show_progress: if progressbar_args.get('total') is not None: progressbar_args['total'] += 1 iterator = progressbar(iterator, **progressbar_args) yield from iterator
def create_thumbnail(size=(128, 128)): """ create resized version of the image path given, with the same name extended with _thumbnail. """ images_list, json_files = _get_image_files_list() Image.MAX_IMAGE_PIXELS = None # print information print('Found {0} images in {1} files. Starting for processing...'.format( len(images_list), len(json_files))) print('This may take a while.') # processing - this is where processing of an image happens for image in utils.progressbar(it=images_list, prefix='Processing '): id = image['id'] image_path = pathlib.Path(f'data/images/{id}.jpg') if image_path.exists(): # create thumbnail image = Image.open(image_path.absolute()) image.thumbnail(size) # save thumbnail new_filename = image_path.parent.joinpath( '{0}_thumbnail{1}'.format(image_path.stem, image_path.suffix)) image.convert('RGB').save(new_filename) # final print('Done!')
def filecmp(self, fileindices, condition): #self.reset() for indexno, fileindex in enumerate( fileindices[1:]): # filedata in enumerate(data[1:]): print(indexno, fileindex) filedata = data[fileindex] print(len(filedata)) if len(self.kv) == 0: bytelist = list(enumerate(filedata)) else: bytelist = [(i, b) for i, b in enumerate(filedata) if i in self.kv] for byteindex, byte in progressbar(bytelist, filepaths[fileindex] + ": ", 40): previousbyte = self.getByte(fileindices[indexno], byteindex) if previousbyte is None: #delete continue if condition(byte, previousbyte): if fileindex == 0: self.kv[byteindex].append(previousbyte) self.kv[byteindex].append(byte) else: if fileindex > 0: if byteindex in self.kv: del self.kv[byteindex] print(len(self.kv))
def export_groupby(self, source, destination): schema = source.schema.copy() pbar = progressbar(len(source)) print "Collection info for %s feature(s)..." % pbar.total if self.dest_format == 'GeoJSON': self.ext = '.geojson' dico = [] dico_field = self.get_dico_field(schema=schema) for ft in source: if ft['properties'][dico_field] not in dico: d = ft['properties'][dico_field][0:2] f = ft['properties'][dico_field] mkgroupdir(destination, d) self.export_file(source=None, target=path.join(destination, d, f + self.ext), schema=schema, pbar=pbar, group=True, feature=ft, dicolist=dico, diconame=dico_field) dico.append(f) source.close() pbar.close() stdout.flush() print "Successfully exported %s feature(s)!" % pbar.total
def wait_all_jobs(status: str) -> None: """Waits for Job status with progress bar""" curr = utils.query( f"SELECT * from jobs ORDER BY id") for row in utils.progressbar(curr): wait_single_job_status(job_id=row['id'], status=status)
def download_images(quality='regular'): """ Downloads images from given image Parameters: quality : Options are raw | full | regular | small | thumb For more information about quality, check unsplash documentation at https://unsplash.com/documentation#example-image-use """ images_list, json_files = _get_image_files_list() # print information print('Found {0} images in {1} files. Starting to download...'.format( len(images_list), len(json_files))) print('This may take a while.') # download images - this is where downloading happens for image in utils.progressbar(it=images_list, prefix='Downloading '): id = image['id'] url_quality = image['urls'][quality] image_path = pathlib.Path(f'data/images/{id}.jpg') if not image_path.exists(): response = requests.get(url_quality, stream=True) if response.status_code == 200: with open(image_path, 'wb') as f: f.write(response.content)
def train_KD(train_loader, model, optimizer, scheduler, teacher_outs, device="cuda"): """ Run one train epoch """ # switch to train mode model.train() start_time = time.time() train_loss = 0 total = 0 correct = 0 # n_batch = len(train_loader) for i, (inputs, targets) in enumerate(progressbar(train_loader, prefix="Training")): inputs = inputs.to(device) targets = targets.to(device) if device == 'cuda': inputs = inputs.half() # compute output outputs = model(inputs) teacher_outputs = teacher_outs[i] if device == 'cuda': teacher_outputs.to(device) teacher_outputs.half() loss = loss_fn_kd(outputs, targets, teacher_outputs, Config.temp, Config.alpha) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() outputs = outputs.float() loss = loss.float() # measure accuracy and record loss train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() # update the learning rate scheduler.step() # print statistics train_loss = train_loss / len(train_loader) acc = 100 * correct / total used_time = time.time() - start_time print('Train Time used: %d \t Loss: %.3f | Train Acc: %.3f%% (%d/%d)' % (used_time, train_loss, acc, correct, total))
def bloch(s, tend=1, nsteps=1000, backend='vode', pulse_params={}, B0=3, dw_rot=0, dw_rf=0, rtol=1e-6): ''' solve Bloch equations for spin `s` in the ROTATING FRAME OF REFERENCE rotating with the Larmor frequency plus a shift `dw_rot` (default: 0) setting dw_rot = None (-> -w0) corresponds to the laboratory frame. dw_fr: frequency shift for off resonance excitation ''' w0 = -s['gm']*B0 # RF freq in rotating frame of reference is `w - w_fr`, # so just the "off resonance" freq (=w_0-w_rf) plus the # difference in frequency between wf_fr and w_0 if dw_rot is None: dw_rot = -w0 pulse_params['w0'] = dw_rot + dw_rf def rhs(t, y, s, pulse_params, B0, w0, dw_rot, it): B = np.array([0, 0, B0]) # static B = B + pulseseq(t, s, pulse_params, it) # RF # rotating frame with w+dw B = B + np.array([0, 0, (w0+dw_rot)/s['gm']]) # relax R = np.array([y[0]/s['T2'], y[1]/s['T2'], (y[2]-s['M0'])/s['T1']]) return s['gm']*np.cross(y, B) - R ''' VAR 1 ## automatic step size control ''' it = 1 sol = [] t = [] dt = tend/nsteps solver = ode(rhs).set_integrator(backend, rtol=rtol) solver.set_initial_value(s['Minit'], 0) solver.set_f_params(s, pulse_params, B0, w0, dw_rot, it) while solver.successful() and solver.t < tend: # works only with vode!! not recommended: # solver.integrate(tend, step=True) solver.integrate(solver.t+dt) t.append(solver.t) sol.append(solver.y) it = it + 1 progressbar(solver.t, tend, 'solve') return np.array(t), np.array(sol)
def read_glove(self, glovef): for line in utils.progressbar( open(glovef), maxval=utils.get_line_num(glovef), message='read glove'): word = line[:line.find(' ')] if self.__vocab.has_word(word): index = self.__vocab.word2id(word) strvec = ' '.join(line.split()[1:]) vec = np.fromstring(strvec, dtype=np.float32, sep=' ') self.embed.W.data[index] = vec
def get_comprehensive_train_data(train_split, process): dances = utils.get_unique_dance_names(csv_data_dir) comprehensive_train = np.array([]) for dance in utils.progressbar(dances,"{}-{}".format(train_split, process)): data = get_data(dance, process, train_split) train_data = data.copy()[0:int(len(data)*train_split)] if(len(comprehensive_train)==0): comprehensive_train = train_data else: comprehensive_train = np.vstack((comprehensive_train,train_data)) return comprehensive_train
def export_all(self, source): schema = source.schema.copy() pbar = progressbar(len(source)) print "Starting to export %s feature(s)..." % pbar.total self.export_file(source=source, target=self.dest_file, schema=schema, pbar=pbar) pbar.close() stdout.flush() print "Successfully exported %s feature(s)!" % pbar.total
def plot_3Dtime(t, M, skip=10): from mpl_toolkits.mplot3d import Axes3D import time plt.ion() fig = plt.figure() ax = fig.gca(projection='3d') ax.axis([-1, 1, -1, 1]) ax.plot([0, 0], [0, 0], [-1, 1], '-.k') ax.plot([-1, 1], [0, 0], [0, 0], '-.k') ax.plot([0, 0], [-1, 1], [0, 0], '-.k') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') for i in range(0, len(t), skip): ax.plot([0, M[i, 0]], [0, M[i, 1]], [0, M[i, 2]], '-<r') # print('%i \t t = %g s' % (i, t[i])) progressbar(t[i], t.max(), s='plot') plt.draw() time.sleep(0.05)
def build_keyword_document_index(docs: Dict[DocumentID, Tuple[DocumentPath, DocumentBody]]): """ Build a mapping from keyword to all document ids where this keyword is present. @params: docs - Required: dictionary with documents data """ # Prints are useful, but in real projects logging.Logger is better option print("Building document index ...") total = len(docs) keywords: Dict[str, Set[DocumentID]] = defaultdict(set) for i, (doc_id, (path, text)) in enumerate(docs.items(), start=1): for word in text.split(): word = word.strip().lower() if word: keywords[word].add(doc_id) if not i % 100 or i == total: # Progressbar should be used only in the interactive shell # In production it can produce a lot of mess in the log file progressbar(i, total) return keywords
def parse_all_novels(self): """ Parses and scrapes information from all novel pages. :returns: A list of dictionaries with all scraped and cleaned information of the novels. """ novel_ids = self.get_all_novel_ids() all_novel_information = [] for novel_id in progressbar(novel_ids, prefix="Parsing novels: ", suffix="current novel id: "): info = self.parse_single_novel(novel_id) all_novel_information.append(info) sleep(self.delay) return all_novel_information
def get_model_outputs(model, dataloader, device='cuda'): """ """ model.eval() ret = [] with torch.no_grad(): for inputs, _ in progressbar(dataloader, prefix="Evaluating Logist"): inputs = inputs.to(device) if device == 'cuda': inputs = inputs.half() outputs = model(inputs) ret.append(outputs) return ret
def download_dataset(): """ download all the years of flight data into data folder """ if check_dataset(): if not os.path.exists(DATA_DIR): os.mkdir(DATA_DIR) year_range = range(1987, 2009) for ind, year in enumerate(year_range): # vars data_url = '{}/{}.csv.bz2'.format(DATA_SOURCE, year) file_path = '{}/raw_{}.csv.bz2'.format(DATA_DIR, year) # download download_handler(file_path, data_url) # progress utils.progressbar(len(year_range), ind + 1, 'download status: ') else: print( 'data downloaded. you can skip this step or delete data folder to download again.' )
def download_data(): """ Downloads images meta information from unsplash website as JSON. """ images_list = [] config = configparser.ConfigParser() if len(config.read('config.ini')) == 0: raise Exception('No config file found, you must create config first.') client_id = config.get('UNSPLASH', 'access_key', fallback='no_key') if client_id in (None, '', 'no_key'): raise Exception('No key is provided, please get your key.') try: for cnt in utils.progressbar(it=range(0, 1500, 30), prefix='Downloading '): response = requests.get( f'https://api.unsplash.com/photos/random/?count=30', headers={ 'Accept-Version': 'v1', 'Authorization': f'Client-ID {client_id}' }, stream=True ) if response.status_code == 200: raw_json = json.loads(response.content) images_list.extend(raw_json) elif response.status_code == 403: print('Api limit reached!') break else: print('Something went wrong!') break except KeyboardInterrupt: print('Operation interrupted by user.') except Exception as ex: print('Something went wrong', ex) finally: append_timestamp = round(datetime.datetime.now().timestamp()) with open(f'data/json/data_{append_timestamp}.json', 'w+') as writer: json.dump(images_list, writer, indent=4)
def action_make_index_for_vulnerabilities_table(): start_time = time.time() result = dict(time_delta=0, items=0, message="Start") database.connect() count = 0 # Clear cache for key in cache.keys(cache_indexer_collection_mask): cache.delete(key) print("Cache clear") # Get All rows from VULNER Table all_vulnerabilities = VULNERABILITIES.select() print("All VULNERABILITIES selected") for one_vulner in progressbar(all_vulnerabilities): one_vulner_data = one_vulner.data component = one_vulner_data["component"] version = one_vulner_data["version"] new_collection_name = ''.join( [cache_indexer_collection_mask, component, "::", version]) dictionary = dict(component=component, version=version, vuln_id=one_vulner_data["id"]) cache.hmset(new_collection_name, dictionary) result["time_delta"] = time.time() - start_time result["items"] = count result["message"] = "Complete" database.close() return result
def get_all_novel_ids(self): """ There is no easy way to get all novel ids (they are not strictly consecutive). Gets all novel ids from the novels listing page. The page contains multiple tabs with novels, first the maximum number of pages is obtained and then these are iterated through. :returns: A list with the novel ids of all currently listed novels. """ if self.debug: novels_num_pages = 1 print('Debug run, using 1 page with novels.') else: page = self.scraper.get(self.NOVEL_LIST_URL + '1') novels_num_pages = self.get_novel_list_num_pages(page) print('Full run, pages with novels:', novels_num_pages) all_novel_ids = [] page_nums = progressbar(range(1, novels_num_pages + 1), prefix="Obtaining novel ids: ", suffix="current page: ") for page_num in page_nums: page = self.scraper.get(self.NOVEL_LIST_URL + str(page_num)) novel_ids = self.get_novel_ids(page) all_novel_ids.extend(novel_ids) sleep(self.delay) return all_novel_ids
def aggregate_data(out_file=sys.stdout): dances = get_unique_dance_names(csv_data_dir) comprehensive_train_X = np.array([]) comprehensive_train_Y = np.array([]) comprehensive_validate_X = np.array([]) comprehensive_validate_Y = np.array([]) comprehensive_evaluation_X = np.array([]) comprehensive_evaluation_Y = np.array([]) comprehensive_train_Class_Y = np.array([]) comprehensive_validate_Class_Y = np.array([]) comprehensive_evaluation_Class_Y = np.array([]) start_time = time.time() for dance in progressbar(dances, "Progress: "): csv_filename, np_filename = get_save_path(dance) train_X, train_Y, validate_X, validate_Y, evaluation_X, evaluation_Y = get_sample_data( csv_filename, np_filename, look_back, offset, forecast, sample_increment, training_split, validation_split, pos_pre_processes, rot_pre_processes) sentiment = dance.split('_')[-1] train_Class_Y = np.full((train_X.shape[0], 1), int(sentiment)) validate_Class_Y = np.full((validate_X.shape[0], 1), int(sentiment)) evaluation_Class_Y = np.full((evaluation_X.shape[0], 1), int(sentiment)) if (len(comprehensive_train_X) == 0): comprehensive_train_X = train_X comprehensive_train_Y = train_Y comprehensive_validate_X = validate_X comprehensive_validate_Y = validate_Y comprehensive_evaluation_X = evaluation_X comprehensive_evaluation_Y = evaluation_Y comprehensive_train_Class_Y = train_Class_Y comprehensive_validate_Class_Y = validate_Class_Y comprehensive_evaluation_Class_Y = evaluation_Class_Y else: comprehensive_train_X = np.vstack((comprehensive_train_X, train_X)) comprehensive_train_Y = np.vstack((comprehensive_train_Y, train_Y)) comprehensive_validate_X = np.vstack( (comprehensive_validate_X, validate_X)) comprehensive_validate_Y = np.vstack( (comprehensive_validate_Y, validate_Y)) comprehensive_evaluation_X = np.vstack( (comprehensive_evaluation_X, evaluation_X)) comprehensive_evaluation_Y = np.vstack( (comprehensive_evaluation_Y, evaluation_Y)) comprehensive_train_Class_Y = np.vstack( (comprehensive_train_Class_Y, train_Class_Y)) comprehensive_validate_Class_Y = np.vstack( (comprehensive_validate_Class_Y, validate_Class_Y)) comprehensive_evaluation_Class_Y = np.vstack( (comprehensive_evaluation_Class_Y, evaluation_Class_Y)) write( "Fetching and Agregating Training Data --- {} seconds ---".format( start_time - time.time()), out_file) np.save(training_filepath + "_X", comprehensive_train_X) np.save(training_filepath + "_Y", comprehensive_train_Y) np.save(validation_filepath + "_X", comprehensive_validate_X) np.save(validation_filepath + "_Y", comprehensive_validate_Y) np.save(evaluation_filepath + "_X", comprehensive_evaluation_X) np.save(evaluation_filepath + "_Y", comprehensive_evaluation_Y) np.save(training_filepath + "_Class_Y", comprehensive_train_Class_Y) np.save(validation_filepath + "_Class_Y", comprehensive_validate_Class_Y) np.save(evaluation_filepath + "_Class_Y", comprehensive_evaluation_Class_Y) print("Saved to", training_filepath + "_Class_Y")
def train_model(model, out_file=sys.stdout): """ Trains the model with the dance data. The History object's History.history attribute is a record of training loss values and metrics values at successive epochs, as well as cooresponding validation values (if applicable). :param model: the model to train :type keras.Model :param out_file: what to display/write the status information to :type output stream :return: the class containing the training metric information, the trained model, and the comprehensive evaluation data :type tuple """ dances = get_unique_dance_names(csv_data_dir) checkpoint = keras.callbacks.ModelCheckpoint(filepath=weights_file, monitor='val_loss', mode='auto', save_weights_only=True, save_best_only=True) early_stopping = tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=stopping_patience, verbose=2, mode='auto', restore_best_weights=True) callbacks_list = [ keras.callbacks.TerminateOnNaN(), checkpoint, early_stopping, CustomCallback(out_file) ] comprehensive_train_X = np.array([]) comprehensive_train_Y = np.array([]) comprehensive_validate_X = np.array([]) comprehensive_validate_Y = np.array([]) comprehensive_evaluation_X = np.array([]) comprehensive_evaluation_Y = np.array([]) write("Fetching and Agregating Training Data ...") #sys.stdout start_time = time.time() for dance in progressbar(dances, "Progress: "): csv_filename, np_filename = get_save_path(dance) train_X, train_Y, validate_X, validate_Y, evaluation_X, evaluation_Y = get_sample_data( csv_filename, np_filename, look_back, offset, forecast, sample_increment, training_split, validation_split, convensional_method) if (len(comprehensive_train_X) == 0): comprehensive_train_X = train_X comprehensive_train_Y = train_Y comprehensive_validate_X = validate_X comprehensive_validate_Y = validate_Y comprehensive_evaluation_X = evaluation_X comprehensive_evaluation_Y = evaluation_Y else: comprehensive_train_X = np.vstack((comprehensive_train_X, train_X)) comprehensive_train_Y = np.vstack((comprehensive_train_Y, train_Y)) comprehensive_validate_X = np.vstack( (comprehensive_validate_X, validate_X)) comprehensive_validate_Y = np.vstack( (comprehensive_validate_Y, validate_Y)) comprehensive_evaluation_X = np.vstack( (comprehensive_evaluation_X, evaluation_X)) comprehensive_evaluation_Y = np.vstack( (comprehensive_evaluation_Y, evaluation_Y)) write( "Fetching and Agregating Training Data --- {} seconds ---".format( start_time - time.time()), out_file) start_time = time.time() history = model.fit(comprehensive_train_X, comprehensive_train_Y, batch_size=batch_size, callbacks=callbacks_list, validation_data=(comprehensive_validate_X, comprehensive_validate_Y), epochs=epochs, shuffle=shuffle_data, verbose=1) save_model_checkpoint(model, model_file) np.save(evaluation_filepath + "_X", comprehensive_evaluation_X) np.save(evaluation_filepath + "_Y", comprehensive_evaluation_Y) with open(history_train_file, "w") as history_file: json.dump( pd.DataFrame.from_dict(history.history).to_dict(), history_file) write("Saved training metric history to json file:\n\t" + history_train_file) #sys.stdout write( "Saved training metric history to json file:\n\t" + history_train_file, out_file) return history, model, comprehensive_evaluation_X, comprehensive_evaluation_Y
imlist = sorted(imlist) if args.features == 'vgg16': model = VGG16FeatureExtractor() elif args.features == 'vgg19': model = VGG19FeatureExtractor() elif args.features == 'resnet50': model = ResNet50FeatureExtractor() elif args.features == 'resnet152': model = ResNet152FeatureExtractor() elif args.features == 'rmac': model = RMACFeatureExtractor() else: raise RuntimeError('not a valid feature type') for imfile in progressbar(imlist): featfile = join(args.output_path, splitext(imfile)[0]) + '.dat' if exists(featfile): continue img = Image.open(join(impath, imfile)).convert('RGB') feat = model.process(img) if not exists(dirname(featfile)): os.makedirs(dirname(featfile)) save(featfile, feat) # print('{} ({}x{}): {} features'.format(imfile, im.size[0], im.size[1], len(feat)))
def do_test(): test_set = ds.parse_test_dataset(args.dataset_dir) print("Test Set Length: {}".format(len(test_set))) if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) print("Writing test results to ", args.save_dir) csv_file = open("{}/{}.csv".format(args.save_dir, "test"), "w") csv_writer = csv.writer(csv_file, delimiter=';') csv_writer.writerow(("timestep", "image_file", "steering_predicted", "steering_expected", "speed_predicted", "speed_expected")) csv_file.flush() image_queue = deque() next_state = None with tf.Session() as sess: saver = tf.train.import_meta_graph(args.metagraph_file) ckpt = tf.train.latest_checkpoint(args.checkpoint_dir) saver.restore(sess, ckpt) input_images = tf.get_collection("input_images")[0] prev_state = tf.get_collection("prev_state") next_state_op = tf.get_collection("next_state") prediction_op = tf.get_collection("predictions")[0] lookback_length_op = tf.get_collection("lookback_length")[0] stats_op = tf.get_collection("stats") total_steps = len(test_set) for step, sample in enumerate(test_set): imfile, expected = sample image = cv2.imread(imfile, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if len(image_queue) == 0: lookback_length = sess.run(lookback_length_op) image_queue.extend([image] * (lookback_length + 1)) mean, stddev = sess.run(stats_op) print("Training mean: {}\nTraining stddev: {}" .format(mean, stddev)) else: image_queue.popleft() image_queue.append(image) image_sequence = np.stack(image_queue) feed_dict = { input_images: image_sequence, } if next_state is not None: feed_dict.update(dict(zip(prev_state, next_state))) next_state, prediction = sess.run([next_state_op, prediction_op], feed_dict=feed_dict) predicted = np.round(prediction).flatten().astype(np.int32) expected = np.array(expected, dtype=np.int32) csv_writer.writerow((step, imfile, predicted[0], expected[0], predicted[1], expected[1])) csv_file.flush() progressbar(total_steps, step + 1, "Testing ") csv_file.close()
def do_test(): test_set = ds.parse_test_dataset(args.dataset_dir) print("Test Set Length: {}".format(len(test_set))) if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) print("Writing test results to ", args.save_dir) csv_file = open("{}/{}.csv".format(args.save_dir, "test"), "w") csv_writer = csv.writer(csv_file, delimiter=';') csv_writer.writerow( ("timestep", "image_file", "steering_predicted", "steering_expected", "speed_predicted", "speed_expected")) csv_file.flush() image_queue = deque() next_state = None with tf.Session() as sess: saver = tf.train.import_meta_graph(args.metagraph_file) ckpt = tf.train.latest_checkpoint(args.checkpoint_dir) saver.restore(sess, ckpt) input_images = tf.get_collection("input_images")[0] prev_state = tf.get_collection("prev_state") next_state_op = tf.get_collection("next_state") prediction_op = tf.get_collection("predictions")[0] lookback_length_op = tf.get_collection("lookback_length")[0] stats_op = tf.get_collection("stats") total_steps = len(test_set) for step, sample in enumerate(test_set): imfile, expected = sample image = cv2.imread(imfile, cv2.IMREAD_COLOR) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if len(image_queue) == 0: lookback_length = sess.run(lookback_length_op) image_queue.extend([image] * (lookback_length + 1)) mean, stddev = sess.run(stats_op) print("Training mean: {}\nTraining stddev: {}".format( mean, stddev)) else: image_queue.popleft() image_queue.append(image) image_sequence = np.stack(image_queue) feed_dict = { input_images: image_sequence, } if next_state is not None: feed_dict.update(dict(zip(prev_state, next_state))) next_state, prediction = sess.run([next_state_op, prediction_op], feed_dict=feed_dict) predicted = np.round(prediction).flatten().astype(np.int32) expected = np.array(expected, dtype=np.int32) csv_writer.writerow((step, imfile, predicted[0], expected[0], predicted[1], expected[1])) csv_file.flush() progressbar(total_steps, step + 1, "Testing ") csv_file.close()
marked_samples = {} # Profile at random over solution space for r in range(n_iterations): random_sample = int(np.random.rand() * n_assignments) while random_sample in marked_samples: random_sample = int(np.random.rand() * n_assignments) marked_samples[random_sample] = True if args.mode == "name": exec_time, max_rss = runner.profile_by_opname(random_sample) elif args.mode == "index": exec_time, max_rss = runner.profile_by_opindex(random_sample) else: print("Invalid mode ", mode) sys.exit(-1) pareto_obj.update_pareto_solutions(random_sample, exec_time, max_rss) progressbar(r, n_assignments, prefix="% samples computed. : ") progressbar(r + 1, n_assignments, prefix="% samples computed. : ") # Dump profiler results dumpdata = {} dumpdata['mode'] = args.mode dumpdata = pareto_obj.dump_pareto_solutions(dumpdata) dumpdata = runner.dump_config(dumpdata) with open(dumpfile, "w") as ofile: json.dump(dumpdata, ofile) t_end = time.time() print("\n") print("done.., profiling time = ", (t_end - t_start), " seconds")
def write(self, progress = 1.): utils.progressbar(progress, 'Saving results to file') with h5py.File(self.outputfile, 'a') as datafile: for item in self.output: datafile[item][...] = self.output[item]
def evaluate(self, algorithm, rrd_file): """ Processa as informações contidas nos arquivos selecionados em "self.input_dir" e 'appenda' as informações no "rrd_file". """ vetor_ewma = [] # EWMA janela = 1 # EWMA files = glob.glob(self.dir + '/' + self.wildcard) files.sort() first_timestamp = files[0].split('/')[-1].split('-')[0] # EWMA first_timestamp_epoch = time.mktime( time.strptime( first_timestamp, "%Y%m%d%H%M" ) ) # EWMA idx_arq = 0 #pb = progressbar(288, "*") pb = progressbar(len(files), "*") if not os.path.isfile(rrd_file): createrrdhw(rrd_file, '200701010000') # Cria o arquivo da base RRD tunerrdgamma(rrd_file, '0.1') # Modifica o parametro gamma do Holt-Winters no RRD for f in files: idx_arq = idx_arq + 1 histogram = FlowsSample(f).histogram() result = algorithm(histogram, to_be_normalized = True) # Calcula entropia de shannon timestamp = f.split('/')[-1].split('-')[0] timestamp_epoch = time.mktime( time.strptime( timestamp, "%Y%m%d%H%M" ) ) vetor_ewma.append(result) # EWMA - Guarda as entropias calculadas no vetor rrd_line = "%s:%f" % (timestamp_epoch, result) # rrd_line = parametros para gravar entropia no RRD #if not os.path.isfile(rrd_file): # Verifica se existe o arquivo RRD # createrrdhw(rrd_file, '200701010000') # Cria o arquivo da base RRD # tunerrdgamma(rrd_file, '0.1') # Modifica o parametro gamma do Holt-Winters no RRD os.system('rrdtool update %s %s' % (rrd_file, rrd_line)) # Grava o valor da entropia na base RRD pb.progress(idx_arq) # Exibe barra de progresso do processamento de SHANNON #print "[%s] [%d] %s" % (rrd_file, idx_arq, rrd_line) # Grafico Entropia com Holt-Winters #graphtitle = rrd_file + " Entropia" # Titulo do grafico SHANNON c/ Holt-Winters #startdate = '200811270000' # Data inicial grafico SHANNON c/ H.W. #enddate = '200811302355' # Data final grafico SHANNON c/ H.W. #rrdgraphhw(rrd_file, graphtitle, startdate, enddate) # Gera grafico SHANNON c/ H.W. resultado_ewma = [] # EWMA - Inicializa vetor p/ EWMA ewma_entropias = ewma(vetor_ewma,janela) # EWMA - Calcula EWMA dos valores normalizados ewma_timestamp_epoch = first_timestamp_epoch for item_ewma_entropias in range(len(ewma_entropias)): # EWMA - Loop p/ calcular EWMA resultado_ewma.append( (ewma_timestamp_epoch, ewma_entropias[item_ewma_entropias] ) ) # EWMA - Guarda EWMA no vetor ewma_timestamp_epoch = ewma_timestamp_epoch + float(300) # EWMA - Preenche tuplas com timestamp rrd_file_ewma = rrd_file + '-ewma.rrd' # EWMA - Testa se existe o arquivo RRD para cada metrica, caso negativo o arquivo e' criado: #if not os.path.isfile('sa-ewma.rrd') and not os.path.isfile('da-ewma.rrd') and not os.path.isfile('sp-ewma.rrd') and not os.path.isfile('dp-ewma.rrd'): # EWMA - Se nao existir nenhum arq. RRD # rrd_file_ewma = 'sa-ewma.rrd' # Arquivo RRD a ser criado #elif os.path.isfile('sa-ewma.rrd') and not os.path.isfile('da-ewma.rrd') and not os.path.isfile('sp-ewma.rrd') and not os.path.isfile('dp-ewma.rrd'): # EWMA - ARquivo RRD a ser criado # rrd_file_ewma = 'da-ewma.rrd' #elif os.path.isfile('sa-ewma.rrd') and os.path.isfile('da-ewma.rrd') and not os.path.isfile('sp-ewma.rrd') and not os.path.isfile('dp-ewma.rrd'): # EWMA - Arq. RRD a ser criado # rrd_file_ewma = 'sp-ewma.rrd' #elif os.path.isfile('sa-ewma.rrd') and os.path.isfile('da-ewma.rrd') and os.path.isfile('sp-ewma.rrd') and not os.path.isfile('dp-ewma.rrd'): # EWMA - Arq. RRD a ser criado # rrd_file_ewma = 'dp-ewma.rrd' #createrrd(rrd_file_ewma, '200801010000') # EWMA - Comando p/ criar o arq. RRD createrrdhw(rrd_file_ewma, '200801010000') # EWMA - Comando p/ criar o arq. RRD for item_ewma in resultado_ewma: # EWMA - Para cada tupla do vetor EWMA ewma_timestamp = str(item_ewma[0]) # EWMA - armazena o timestamp da tupla ewma_value = str(item_ewma[1]) # EWMA - Valor do EWMA da tupla rrd_line_ewma = "%s:%s" % (ewma_timestamp, ewma_value) # EWMA - Tupla a ser gravada no RRD # rrd_line_ewma = "%s:%s" % (str(item_ewma[0], str(item_ewma[1]) # EWMA - Tupla a ser gravada no RRD os.system('rrdtool update %s %s' % (rrd_file_ewma, rrd_line_ewma)) # EWMA - Grava tupla no RRD #graphtitle_ewma = rrd_file_ewma + " EWMA" # EWMA - Titulo do grafico EWMA #startdate_ewma = '200811270000' # EWMA - Data inicial do grafico EWMA #enddate_ewma = '200811302355' # EWMA - Data final do grafico EWMA #rrdgraph(rrd_file_ewma, graphtitle_ewma, startdate_ewma, enddate_ewma) # EWMA - Gera grafico EWMA return rrd_line