def put(conn: socket, args=None): # recv file from client and write to file print('receiving file...') client_data = json.loads(_bytes_to_string(recv_msg(conn))) args['filename'] = os.path.join('server_files', args['filename']) data = client_data['data'] if data is None: print("Problem: data received is None") print("got the file data!: {}Bytes".format(len(data))) if not os.path.isdir('./server_files'): os.mkdir('./server_files') filename = os.path.join('server_files', path_leaf(args['filename'])) print('iv=', client_data['iv']) with open(filename, 'wb+') as f: plaintext = args['cipherfunc'](data=data, key=args['key'], decrypt=True, iv=client_data['iv']) f.write(plaintext) print('recieved file:', args['filename']) if os.path.isfile(filename): subprocess.Popen(r'explorer /select,"{}"'.format(args['filename']))
def get_file_paths(filename_extension): if 'demo' in sys.argv: from settings import DEFAULT_PATHS return DEFAULT_PATHS while True: print 'Please, select a .{} file(s)'.format(filename_extension) root = Tkinter.Tk() root.withdraw() paths = tkFileDialog.askopenfilenames(parent=root, title='Choose a file(s)') for path in paths: if not check_filename_extension( path=path, filename_extension=filename_extension): print('{} is not .{} file. Choose file(s) again.'.format( path_leaf(path), filename_extension)) paths = [] root.update() root.destroy() break if not paths: continue print 'Your file(s):\n{}\nAll right?'.format('\n'.join(paths)) while True: answer = raw_input('Print yes or no\n') if 'yes' == answer.strip().lower() or 'y' == answer.strip().lower( ): return paths elif 'no' == answer.strip().lower() or 'n' == answer.strip().lower( ): continue
def callback(conn: socket): # receive data resp = json.loads(_bytes_to_string(recv_msg(conn))) if 'file_index' in args and args['file_index'] == True: args['filename'] = resp['filename'] del args['file_index'] if not os.path.isdir('./client_files'): os.mkdir('./client_files') filename = os.path.join('client_files', path_leaf(args['filename'])) if os.path.isdir(filename): args['filename'] = os.path.join(args['filename'], resp['filename']) # === done preparing filesystem === with open(filename, 'wb+') as f: plaintext = args['cipherfunc'](data=resp['data'], key=args['key'], decrypt=True, iv=resp['iv']) f.write(plaintext) if os.path.isfile(filename): subprocess.Popen(r'explorer /select,"{}"'.format(filename))
def put(args: dict): args['iv'] = secrets.token_bytes(16) if 'file_index' in args and args[ 'file_index'] == True: # if access-by-fileindex, then remove attr (to prevent issues) and get filename del args['file_index'] file_index = int(args['filename']) args['filename'] = ls_local(args)[file_index] filename = os.path.join('client_files', path_leaf(args['filename'])) if not os.path.isfile(filename): # check if file exists print('ERROR: File "{}" doesn\'t exist'.format(filename)) return def callback(conn: socket): ciphertext = b'' with open(filename, 'rb') as f: data = f.read() ciphertext = args['cipherfunc'](data=data, key=args['key'], iv=args['iv']) return send_msg( conn, _string_to_bytes( json.dumps({ 'filename': filename, 'data': _bytes_to_string(ciphertext), 'iv': _bytes_to_string(args['iv']), }))) return send_command(args, callback)
def moveTrackToGenreFolder(trackPath, folderName): pathFolderName = "{0}/{1}".format(MusicManager.__getRootFolder(), folderName) if not os.path.exists(pathFolderName): os.makedirs(pathFolderName) pathFolderNameWithTrackName = "{0}/{1}".format( pathFolderName, utils.path_leaf(trackPath)) print("Copy {0}\t\tTO\t\t{1}".format(trackPath, pathFolderNameWithTrackName)) shutil.copy(trackPath, pathFolderNameWithTrackName)
def download_pdf(pdf_urls, dest_path, gdrive): if gdrive: for url in pdf_urls: gdrive_id = utils.get_gdrive_id(url) file_name = gdrive_id + ".pdf" utils.download_gdrive(gdrive_id, dest_path + file_name) else: for url in pdf_urls: download_script = "wget " + url move_script = "mv " + utils.path_leaf(url) + " " + dest_path os.system(download_script) os.system(move_script)
def main(args): global config_data now = time.strftime("%c") ## parse input arguement #check_argument() config_path = op.abspath(args.config_file) output_directory, output_prefix = utils.path_leaf(config_path) # read the config file file = utils.custparser() file.read(args.config_file) config_data = file.as_dict() logfile = op.join(output_directory, 'chronqc_crongen.log') #print(logfile) logging.basicConfig(filename=op.join(output_directory, 'chronqc_crongen.log'), level=logging.DEBUG) #print(output_directory) logging.info('STARTED crongen on %s' % now) try: ## set output directory and directory to be displayed in email to_directory = config_data["iomanip"]["destination"] display_directory = "" if ("display_destination" in config_data["iomanip"].keys()) and ( config_data["iomanip"]["display_destination"] != ""): display_directory = config_data["iomanip"]["display_destination"] else: display_directory = to_directory ## make directory for this month curr_date = time.strftime("%d_%b_%Y") to_directory = os.path.join(to_directory, curr_date) display_directory = os.path.join(display_directory, curr_date) logging.info('ABS_PATH: %s DISPLAY_PATH: %s' % (to_directory, display_directory)) link_dict = call_plots(to_directory) ## email users compose_mail(link_dict, display_directory) except Exception: logging.error(traceback.format_exc()) alert_admin(traceback.format_exc()) logging.info( 'Error encountered while creating ChronQC plots: please see {0} for details.' .format(logfile)) #print('Error encountered while creating ChronQC plots: please see {0} for details.'.format(logfile)) sys.exit(1) logging.info( 'Completed creating ChronQC plots: please see {0} for details.'.format( logfile))
def pdf2json(file_paths, json_out, squash=True, titles=None): data = {'data': [], 'version': '1.1'} for i, file_path in enumerate(file_paths): _, paragraphs = pdf_reader.read_pdf(file_path, squash) title = titles[i] if titles else utils.path_leaf(file_path) doc = { 'paragraphs': [], 'title': title, 'department': '', 'chapter': '' } for p in paragraphs: doc['paragraphs'].append({'context': p, 'qas': []}) data['data'].append(doc) with open(json_out, 'w') as f: json.dump(data, f)
def thread_target(eula_file, predict_method): print("============ eula_file : ", eula_file, " ============") """ clause_list = get_content(eula_file) clause_dic = convert_to_clauses(clause_list) """ _, _, clause_dic, _ = get_content(eula_file) clause_list = list(clause_dic.values()) clauses_key = list(clause_dic.keys()) Y = predict_method(clause_list) labels, probabilities = [], [] for y in Y: a = max(y) probabilities.append(a) labels.append(y.index(a)) file_name = path_leaf(path=eula_file) file_name, extension = os.path.splitext(file_name) file_name = file_name + "_" + extension.replace(".", "") csv_file = file_name + ".csv" if os.path.isfile(csv_file): i = 1 while os.path.isfile(file_name + '.' + str(i) + ".csv"): i += 1 csv_file = file_name + '.' + str(i) + '.csv' print("============ csv_file : ", csv_file, " ============") print() #pd.DataFrame(zip(clauses_key, clause_list, labels, probabilities)).to_csv(csv_file, header= ["clauses_id" ,"clauses", "labels", "probabilities"]) pd.DataFrame(zip(clause_list, labels, probabilities)).to_csv( csv_file, header=["clauses", "labels", "probabilities"])
def create_horizontal_cut(path, data): print path data['nc_file'] = nc4.Dataset(path, mode='r') u_theta, v_theta, w_theta = get_theta_wind_matrix(path=path) if u_theta is None or v_theta is None or w_theta is None: return None data['file_name'] = path_leaf(path) data['hour'] = get_hour_from_nc_file(data=data) max_level = u_theta.shape[0] - 1 if 'start_height' not in data.keys() or 'end_height' not in data.keys(): data['start_height'], data['end_height'] = get_heights( max_level=max_level) for height in range(data.get('start_height'), data.get('end_height')): data['height'] = height plot_horizontal_cut(u=u_theta[height], v=v_theta[height], w=w_theta[height], data=data)
def read_pdf(file_path, squash=True, verbose=True): if verbose: print("Parsing", file_path) file_name = utils.path_leaf(file_path) if file_name[-4:] != ".pdf": raise TypeError("Expecting input of pdf file") paragraphs = [] title = file_name[:-4] pdf_file_in = open(file_path, 'rb') rsrcmgr = PDFResourceManager() retstr = io.StringIO() laparams = LAParams() device = TextConverter(rsrcmgr, retstr, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) page_no = 0 for pageNumber, page in enumerate(PDFPage.get_pages(pdf_file_in)): if pageNumber == page_no: if verbose and pageNumber % 10 == 0: print("Read page", str(pageNumber)) interpreter.process_page(page) data = retstr.getvalue() data = ''.join(x for x in data if x in string.printable) if squash: data = data.replace("\t", " ").replace("\n", " ") else: data = data.replace("\t", " </tab> ").replace("\n", " </newline> ") data = re.sub("\s\s+" , " ", data) paragraphs.append(data) retstr.truncate(0) retstr.seek(0) page_no += 1 pdf_file_in.close() return title, paragraphs
def get(conn: socket, args=None): # send the file to client if args['file_index']: args['filename'] = os.listdir('server_files')[int(args['filename'])] iv = secrets.token_bytes(16) print('iv=', iv) filename = os.path.join('server_files', path_leaf(args['filename'])) with open(filename, 'rb') as f: plaintext = f.read() ciphertext = args['cipherfunc'](data=plaintext, key=args['key'], iv=iv) print("finished reading file \"{}\", {}B".format(filename, len(ciphertext))) return send_msg( conn, _string_to_bytes( json.dumps({ 'filename': filename, 'data': _bytes_to_string(ciphertext), 'iv': _bytes_to_string(iv), })))
if AUTO_CREATE_DIRS: pathlib.Path(path_head(SAVE_PREDS_PATH)).mkdir(parents=True, exist_ok=True) pathlib.Path(path_head(SAVE_RESULTS_PATH)).mkdir(parents=True, exist_ok=True) ##------------------------------------------------------------------## ## Load (and save) data ##------------------------------------------------------------------## # Load labels labels = np.load(LABELS_PATH) # Detect all the precomputed time point data frames precomputed_df_paths = glob.glob(join(PRECOMPUTED_DIR_PATH, "time_point_*.csv")) get_tp = lambda p: int(path_leaf(p).split("_")[-1].split(".")[0]) # Add time point info and sort by it precomputed_df_paths = [(get_tp(p), p) for p in precomputed_df_paths] precomputed_df_paths.sort(key=lambda x: int(x[0])) if DEV_MODE: precomputed_df_paths = precomputed_df_paths[:5] # Load the precomputed data frames time_point_dfs = [(tp, pd.read_csv(path)) for tp, path in precomputed_df_paths] ##------------------------------------------------------------------## ## Running CV on all time points for a single participant ##------------------------------------------------------------------## # Number of trials
def main(args): """ (args) -> html takes number of arguments and produces interactive chronqc html report """ db = op.abspath(args.db) panel = args.panel templates_dir = op.abspath(op.join(op.dirname(__file__), 'templates')) # output dir and file # Get output directory 1. user defined 2. db dir 3. multiqc_stats dir # Output file name day = date.today().strftime("%d_%b_%Y") if args.prefix is not None: prefix = '{0}.{1}.{2}.{3}'.format(args.prefix, panel, 'chronqc', day) else: prefix = '{0}.{1}.{2}'.format(panel, 'chronqc', day) # Get output file if args.output is not None: output_directory = op.abspath(args.output) else: output_directory, output_prefix = utils.path_leaf(db) output_directory = op.join(output_directory, "chronqc_output") if not op.exists(output_directory): os.makedirs(output_directory) elif op.exists(output_directory) and not args.force: # logger.fatal("Output directory %s already exists", output_directory) print( "FATAL: Output directory {0} already exists, use -f to overwrite". format(output_directory)) sys.exit(1) elif op.exists(output_directory) and args.force: pass # html report out_file = op.join(output_directory, "%s.html" % prefix) # create logger log_file = op.join(output_directory, 'chronqc.log') logging.basicConfig(filename=log_file, format='%(asctime)s - %(name)s - %(levelname)s - \ %(message)s') logger = logging.getLogger('chronqc') logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.CRITICAL) # create formatter formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s -' '%(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger logger.addHandler(ch) logger.info("Started chronqc {0}".format(day)) # read plot config f = op.abspath(args.json) try: config = json.load(io.open(f, 'r', encoding='utf-8-sig'), strict=False) logger.info("Got required parameters for chronqc") except ValueError: e = sys.exc_info()[1] logger.critical("FATAL: Error in JSON file {0}:{1}".format( e, op.abspath(args.json))) sys.exit(1) # enddate = date.today() + relativedelta(months=+1) # enddate = enddate.strftime('%Y-%m-%d') # Create dictionary of data tobe filled in html file datetime = date.today() vals = { 'htmltemplates': '', 'calendartemplates': '', 'javascripttemplate': '', 'sidebartemplates': '', 'j': '$j', 'panel': panel, 'startdate': '$startdate', 'enddate': '$enddate', 'datetime': datetime, 'pdfname': '$pdfname', 'table': '$table', 'headers': '$headers', 'rows': '$rows', 'row': '$row', 'cols': '$cols', 'col': '$col', 'text': '$text' } i = 1 chart_ids = [] group_ids = {} for chart in config: chart_id = 'g' + str(i) chart_ids.append(chart_id) table = chart.get('table_name', 'chronqc_stats_data') i = i + 1 vals['htmltemplates'] = vals[ 'htmltemplates'] + '$' + chart_id + '_html' + '\n' vals['calendartemplates'] = vals[ 'calendartemplates'] + '$' + chart_id + '_calendar' + '\n' vals['javascripttemplate'] = vals[ 'javascripttemplate'] + '$' + chart_id + '_js' + '\n' group_side = '<p class="nav-item2"> {0}</p>\n'.format( table.replace('_', ' ').title()) #vals['sidebartemplates'] = vals['sidebartemplates'] + '$' + chart_id + '_sidebar' + '\n' if table not in group_ids: group_ids[table] = ['$' + chart_id + '_sidebar'] vals['sidebartemplates'] = vals[ 'sidebartemplates'] + group_side + '$' + chart_id + '_sidebar' + '\n' else: vals['sidebartemplates'] = vals[ 'sidebartemplates'] + '$' + chart_id + '_sidebar' + '\n' # SUBSTITUTION 1: create a template based on number of plots to be plotted tmpl = string.Template( open(op.join(templates_dir, "base_template.html")).read()) tmpl = tmpl.substitute(**vals) logger.info("Finished creating base template based on number of plots") print('Started ChronQC') # SUBSTITUTION 2: for all plots to be plotted do data processing # and substitute values in html, calander and js templates i = 1 for chart in config: chart_id = 'g' + str(i) i = i + 1 table = chart.get('table_name', 'chronqc_stats_data') column_name = chart["chart_properties"]["y_value"] include_samples = chart.get('include_samples', 'all') exclude_samples = chart.get('exclude_samples', '') per_sample = chart["chart_properties"].get('per_sample', 'False') categories = chart["chart_properties"].get('categories', '') category_str = '' ylabel2 = '' df_chart_cumsum = '' logger.info("Plotting {0}".format(chart_id)) # Fetch data from the sqlite database df = fetch_stats_data(db, table, panel, categories=categories, ColumnName=column_name) logger.info("For {0}: got total {1} records".format(chart_id, len(df))) if len(df) == 0: logger.critical( "FATAL: For {0} {1}: no records found in {2}".format( chart_id, column_name, table)) sys.exit(1) # keep only desired samples try: df = get_samples_data(df, include_samples, exclude_samples, per_sample=per_sample) except KeyError: e = sys.exc_info()[1] logger.critical("FATAL: no {0} column found in {1}".format( e, table)) sys.exit(1) except Exception: e = sys.exc_info()[1] logger.critical("FATAL: please check {0} column in {1}".format( e, table)) sys.exit(1) if len(df) == 0: logger.critical( "FATAL: For {0} {1}: no records found for {2}".format( chart_id, column_name, include_samples)) sys.exit(1) logger.info("For {0}: kept {1} records after filtering".format( chart_id, len(df))) # dates for display startdate_year = df.loc[len(df) - 1, 'Date'].date() + relativedelta(months=-12) start_df = df.loc[0, 'Date'].date() if startdate_year > start_df: startdate = startdate_year.strftime('%Y-%m-%d') else: start_df = start_df + relativedelta(months=-1) startdate = start_df.strftime('%Y-%m-%d') enddate = df.loc[len(df) - 1, 'Date'] + relativedelta(months=+1) vals['startdate'] = startdate vals['enddate'] = enddate # process y formatting y = process_y(column_name) # generate data in format for html if chart['chart_type'] == 'time_series_with_percentage_category': cat = chart["chart_properties"].get('category', 'PASS') t = '% Samples per run with {0} = {1}'.format(y, cat) y = '% {0} = {1}'.format(y, cat) chart_title = chart["chart_properties"].get('chart_title', t) y_label = chart["chart_properties"].get('y_label', y) js_tmpl = string.Template( open(op.join(templates_dir, "percent_plot_threshold.txt")).read()) if not column_name in df.columns: logger.critical("FATAL: no {0} column found in {1}".format( column_name, table)) sys.exit(1) df_chart = percentage_category(df, column_name, cat) logger.info( "For {0}: {1} data points will be written to html".format( chart_id, len(df_chart))) elif chart[ 'chart_type'] == 'time_series_with_percentage_of_samples_above_threshold': threshold = chart["chart_properties"]["threshold"] t = '% Samples per run with {0} ≥ {1}'.format(y, threshold) y = '% {0} ≥ {1}'.format(y, threshold) chart_title = chart["chart_properties"].get('chart_title', t) y_label = chart["chart_properties"].get('y_label', y) js_tmpl = string.Template( open(op.join(templates_dir, "percent_plot_threshold.txt")).read()) if not column_name in df.columns: logger.critical("FATAL: no {0} column found in {1}".format( column_name, table)) sys.exit(1) df_chart = percentage_of_samples_above_threshold( df, column_name, threshold) logger.info( "For {0}: {1} data points will be written to html".format( chart_id, len(df_chart))) elif chart['chart_type'] == 'time_series_with_mean_and_stdev': win = chart["chart_properties"].get('window', '365D') info = 'sample' if per_sample == 'True' else 'run' try: win = int(win) winf = "past {0} {1}s".format(win, info) except: winf = "past {0} {1}s".format(win, info) if win == '365D': winf = "past 1 year {}s".format(info) if per_sample == 'False': t = '{0} (Mean per run with {1} rolling mean and ±2 standard deviation)'.format( y, winf) y = '{0} (Mean per run)'.format(y) else: t = '{0} (with {1} rolling mean and ±2 standard deviation)'.format( y, winf) y = '{0}'.format(y) chart_title = chart["chart_properties"].get('chart_title', t) y_label = chart["chart_properties"].get('y_label', y) js_tmpl = string.Template( open(op.join(templates_dir, "mean_and_stdev.txt")).read()) if not column_name in df.columns: logger.critical("FATAL: no {0} column found in {1}".format( column_name, table)) sys.exit(1) df_chart = mean_and_stdev(df, column_name, win=win, per_sample=per_sample) logger.info( "For {0}: {1} data points will be written to html".format( chart_id, len(df_chart))) elif chart['chart_type'] == 'time_series_with_absolute_threshold': if per_sample == 'False': t = '{0} (Mean per run)'.format(y) y = '{0} (Mean per run)'.format(y) else: t = '{0}'.format(y) y = '{0}'.format(y) chart_title = chart["chart_properties"].get('chart_title', t) y_label = chart["chart_properties"].get('y_label', y) js_tmpl = string.Template( open(op.join(templates_dir, "absolute_threshold.txt")).read()) lower_threshold = chart["chart_properties"].get( "lower_threshold", np.nan) upper_threshold = chart["chart_properties"].get( "upper_threshold", np.nan) Type = chart["chart_properties"].get("Type", '') if not column_name in df.columns: logger.critical("FATAL: no {0} column found in {1}".format( column_name, table)) sys.exit(1) df_chart = absolute_threshold(df, column_name, lower_threshold=lower_threshold, upper_threshold=upper_threshold, Type=Type, per_sample=per_sample) #df_chart.to_clipboard(sep=',') logger.info( "For {0}: {1} data points will be written to html".format( chart_id, len(df_chart))) elif chart['chart_type'] == 'time_series_with_box_whisker_plot': t = '{0} Monthly Box-and-Whisker Plot'.format(y) y = '{0}'.format(y) chart_title = chart["chart_properties"].get('chart_title', t) y_label = chart["chart_properties"].get('y_label', y) Type = chart["chart_properties"].get("Type", '') lower_threshold = chart["chart_properties"].get( "lower_threshold", np.nan) upper_threshold = chart["chart_properties"].get( "upper_threshold", np.nan) js_tmpl = string.Template( open(op.join(templates_dir, "box_whisker_plot.txt")).read()) if not column_name in df.columns: logger.critical("FATAL: no {0} column found in {1}".format( column_name, table)) sys.exit(1) if Type != '': df_chart = box_whisker_plot(df, column_name, Type=Type, lower_threshold=lower_threshold, upper_threshold=upper_threshold) else: df_chart = box_whisker_plot(df, column_name, lower_threshold=lower_threshold, upper_threshold=upper_threshold) logger.info( "For {0}: {1} data points will be written to html".format( chart_id, len(df_chart))) elif chart['chart_type'] == 'time_series_with_bar_line_plot': if categories == '': logger.critical( "FATAL: no categories defined in JSON for time_series_with_bar_line_plot" ) sys.exit(1) t = 'Monthly bar and line plot for {0} ({1})'.format(y, categories) y = 'Monthly count' chart_title = chart["chart_properties"].get('chart_title', t) y_label = chart["chart_properties"].get('y_label', y) y2 = 'Monthly total' ylabel2 = chart["chart_properties"].get('y_label2', y2) js_tmpl = string.Template( open(op.join(templates_dir, "bar_line_plot.txt")).read()) if not column_name in df.columns: logger.critical("FATAL: no {0} column found in {1}".format( column_name, table)) sys.exit(1) df_chart = bar_line_plot(df, column_name) categories = df_chart.columns category_str = '' x = 0 while x < len(categories) - 1: category_str = category_str + '"{0}", '.format(categories[x]) x = x + 1 if x == len(categories) - 1: category_str = category_str + ' "{0}"'.format(categories[x]) df_chart['Data'] = df_chart.values.tolist() df_chart = pd.DataFrame(df_chart['Data']) logger.info( "For {0}: {1} data points will be written to html".format( chart_id, len(df_chart))) elif chart['chart_type'] == 'time_series_with_stacked_bar_plot': if categories == '': logger.critical( "FATAL: no categories defined in JSON for time_series_with_stacked_bar_plot" ) sys.exit(1) t = 'Monthly stacked bar plot for {0} ({1})'.format(y, categories) y = 'Monthly count' chart_title = chart["chart_properties"].get('chart_title', t) y_label = chart["chart_properties"].get('y_label', y) js_tmpl = string.Template( open(op.join(templates_dir, "stacked_bar_plot.txt")).read()) if not column_name in df.columns: logger.critical("FATAL: no {0} column found in {1}".format( column_name, table)) sys.exit(1) df_chart, df_chart_cumsum = stacked_bar_plot(df, column_name) categories = df_chart_cumsum.columns category_str = '' x = 0 while x < len(categories) - 1: category_str = category_str + '"{0}", '.format(categories[x]) x = x + 1 if x == len(categories) - 1: category_str = category_str + ' "{0}"'.format(categories[x]) df_chart['Data'] = df_chart.values.tolist() df_chart = pd.DataFrame(df_chart['Data']) df_chart_cumsum['Data'] = df_chart_cumsum.values.tolist() df_chart_cumsum = pd.DataFrame(df_chart_cumsum['Data']) logger.info( "For {0}: {1} data points will be written to html".format( chart_id, len(df_chart))) else: logger.critical( "For {0}: No suitable chart_type is defined check JSON".format( chart_id)) sys.exit(1) # keep data in dir download_title = process_title(chart_title) vals = create_dir(vals, df_chart, chart_id, chart_title, y_label, startdate, enddate, categories=category_str, ylabel2=ylabel2, df_chart_cumsum=df_chart_cumsum, per_sample=per_sample, column_name=download_title) # html template html_tmpl = string.Template( open(op.join(templates_dir, "html.txt")).read()) vals[chart_id + '_html'] = html_tmpl.substitute( **vals[chart_id + 'htmltemplates']) logger.info( "For {0}: Finished creating html template".format(chart_id)) # calendar template #calendar_tmpl = string.Template(open(op.join(templates_dir, "calendar.txt")).read()) #vals[chart_id + '_calendar'] = calendar_tmpl.substitute(**vals[chart_id + 'htmltemplates']) #logger.info("For {0}: Finished creating calendar template".format(chart_id)) # js template vals[chart_id + '_js'] = js_tmpl.substitute(**vals[chart_id + 'htmltemplates']) logger.info("For {0}: Finished creating js template".format(chart_id)) # side bar with header sidebar_tmpl = string.Template( open(op.join(templates_dir, "sidebar.txt")).read()) vals[chart_id + '_sidebar'] = sidebar_tmpl.substitute( **vals[chart_id + 'htmltemplates']) utils.print_progress(i + 1, len(config) + 2, prefix='Running ChronQC', decimals=1, bar_length=50) vals['pdfname'] = "%s.pdf" % prefix # substitute vals in main template tmpl = string.Template(tmpl).substitute(**vals) with io.open(out_file, "w", encoding='utf8') as fh: fh.write(tmpl) logger.info("Finished creating {0} chronqc plots: {1}".format( i - 1, out_file)) print("Finished creating {0} chronqc plots: {1}".format(i - 1, out_file))
data_dir = sys.argv[1] if (not os.path.exists(data_dir) or not os.path.isdir(data_dir)): print("path '" + data_dir + "' is not a valid directory!") sys.exit(1) # get data file list data_fnames = glob.glob(os.path.join(data_dir, "*B.csv")) if (len(data_fnames) == 0): print("no csv files found in", data_dir + ". Exiting now...") sys.exit(0) # combine header_items = [] data_combined = None for i, data_fname in enumerate(data_fnames): print("processing", data_fname) header_items.append(path_leaf(data_fname)) data = np.loadtxt(data_fname, delimiter=',', dtype=np.uint64) if (data_combined is None): # lazy-init when we know a reasonable size data_combined = np.empty( [data.shape[0], len(data_fnames)], dtype=np.uint64) data_combined[:data.shape[0], i] = data # gen output file print("saving output to combined.csv") np.savetxt(os.path.join(data_dir, "combined.csv"), data_combined, delimiter=',', header=','.join(header_items))
def load_nwb_from_data(dir_path): # Get all files and directories present in the path files = utils.get_subfiles(dir_path) dirs = utils.get_subdirs(dir_path) files = files + dirs # Open YAML file with keywords, extension and keywords to exclude if existing then dump all data in a dict if os.path.isfile(dir_path + "\\" + [subfile for subfile in files if "default" in subfile][0]): with open(dir_path + "\\" + [subfile for subfile in files if "default" in subfile][0], 'r') as stream: data = yaml.safe_load(stream) # Remove the file from the list of files and directories so it isn't found twice files.remove([subfile for subfile in files if "default" in subfile][0]) else: data = None home_data = dict() # Look for another YAML file containing the keywords, extensions and keywords to exclude for file in files: if "yaml" not in file: continue # p is a placeholder until we know every yaml file name if "subject" not in file and "ophys" not in file and "data" not in file and "p" not in file: with open(dir_path + "\\" + file, 'r') as stream: home_data = yaml.safe_load(stream) # If 2 files are provided, the one given by the user will take the priority if data is not None: difference = set(list(data.keys())) - set(list(home_data.keys())) for i in list(difference): home_data[i] = data[i] # First we create the nwb file because it will be needed for everything converttonwb = home_data.pop("ConvertToNWB") filtered_list = [] for i in converttonwb: # If no extension is provided it means we are looking for a directory, so we filter the list of files and # directory to only contain directories if not converttonwb[i].get("extension"): filtered_list = [file for file in files if "." not in file] # Filter the file list depending on the extension provided in the YAML file else: for extension in converttonwb[i].get("extension"): filtered_list.extend([file for file in files if extension in file]) # print("Filter result : " + str(filtered_list) + " by extension : " + str(converttonwb[i].get("extension"))) # Conditional loop to remove all files or directories not containing the keywords # or containing excluded keywords counter = 0 while counter < len(filtered_list): delete = False for keyword in converttonwb[i].get("keyword"): if keyword not in filtered_list[counter]: # print("Keyword not found in : " + str(filtered_list)) del filtered_list[counter] # print("New list : " + str(filtered_list)) delete = True if not delete: for keyword_to_exclude in converttonwb[i].get("keyword_to_exclude"): if keyword_to_exclude in filtered_list[counter]: # print("Excluded keyword found in : " + str(filtered_list)) del filtered_list[counter] # print("New list : " + str(filtered_list)) delete = True if not delete: counter += 1 print("Files to pass for " + i + ": " + str(filtered_list)) # If files were found respecting every element, add the whole path to pass them as arguments yaml_path = os.path.join(dir_path, filtered_list[0]) nwb_file = test_cicada_test_paul.create_nwb_file(yaml_path) order_list = [] if home_data.get("order"): order_list = home_data.pop("order") while order_list: next_class = order_list.pop(0) # Get classname then instantiate it classname = getattr(test_cicada_test_paul, next_class) converter = classname(nwb_file) # Initialize a dict to contain the arguments to call convert arg_dict = {} print("Class name : " + str(next_class)) # Loop through all arguments of the convert of the corresponding class for j in home_data[next_class]: filtered_list = [] # If value if found it means the argument is not a file but a string/int/etc if home_data[next_class][j].get("value") and not home_data[next_class][j].get("extension") and \ (not home_data[next_class][j].get("keyword") or not home_data[next_class][j].get("keyword_to_exclude")): print(home_data[next_class][j].get("value")[0]) arg_dict[j] = home_data[next_class][j].get("value")[0] else: # If no extension is provided it means we are looking for a directory, so we filter the list of files and # directory to only contain directories if not home_data[next_class][j].get("extension"): filtered_list = [file for file in files if "." not in file] # Filter the file list depending on the extension provided in the YAML file else: for extension in home_data[next_class][j].get("extension"): filtered_list.extend([file for file in files if extension in file]) # print("Filter result : " + str(filtered_list) + " by extension : " + # str(home_data[i][j].get("extension"))) # Conditional loop to remove all files or directories not containing the keywords # or containing excluded keywords counter = 0 while counter < len(filtered_list): delete = False for keyword in home_data[next_class][j].get("keyword"): if keyword not in filtered_list[counter]: # print("Keyword not found in : " + str(filtered_list)) del filtered_list[counter] # print("New list : " + str(filtered_list)) delete = True if not delete: for keyword_to_exclude in home_data[next_class][j].get("keyword_to_exclude"): if keyword_to_exclude in filtered_list[counter]: # print("Excluded keyword found in : " + str(filtered_list)) del filtered_list[counter] # print("New list : " + str(filtered_list)) delete = True if not delete: counter += 1 print("Files to pass for " + j + ": " + str(filtered_list)) # If files were found respecting every element, add the whole path to pass them as arguments if filtered_list: arg_dict[j] = os.path.join(dir_path, filtered_list[0]) if "mat" in home_data[next_class][j].get("extension") and home_data[next_class][j].get("value"): arg_dict[j] = [arg_dict[j]] + list(home_data[next_class][j].get("value")) # If no file found, put the argument at None else: arg_dict[j] = None # print("Arguments to pass : "******"Class name : " + str(i)) # Loop through all arguments of the convert of the corresponding class for j in home_data[i]: filtered_list = [] # If value if found it means the argument is not a file but a string/int/etc if home_data[i][j].get("value") and not home_data[i][j].get("extension") and \ (not home_data[i][j].get("keyword") or not home_data[i][j].get("keyword_to_exclude")): print(home_data[i][j].get("value")[0]) arg_dict[j] = home_data[i][j].get("value")[0] else: # If no extension is provided it means we are looking for a directory, so we filter the list of files and # directory to only contain directories if not home_data[i][j].get("extension"): filtered_list = [file for file in files if "." not in file] # Filter the file list depending on the extension provided in the YAML file else: for extension in home_data[i][j].get("extension"): filtered_list.extend([file for file in files if extension in file]) # print("Filter result : " + str(filtered_list) + " by extension : " + # str(home_data[i][j].get("extension"))) # Conditional loop to remove all files or directories not containing the keywords # or containing excluded keywords counter = 0 while counter < len(filtered_list): delete = False for keyword in home_data[i][j].get("keyword"): if keyword not in filtered_list[counter]: # print("Keyword not found in : " + str(filtered_list)) del filtered_list[counter] # print("New list : " + str(filtered_list)) delete = True if not delete: for keyword_to_exclude in home_data[i][j].get("keyword_to_exclude"): if keyword_to_exclude in filtered_list[counter]: # print("Excluded keyword found in : " + str(filtered_list)) del filtered_list[counter] # print("New list : " + str(filtered_list)) delete = True if not delete: counter += 1 print("Files to pass for " + j + ": " + str(filtered_list)) # If files were found respecting every element, add the whole path to pass them as arguments if filtered_list: arg_dict[j] = os.path.join(dir_path, filtered_list[0]) if "mat" in home_data[i][j].get("extension") and home_data[i][j].get("value"): arg_dict[j] = [arg_dict[j]] + list(home_data[i][j].get("value")) # If no file found, put the argument at None else: arg_dict[j] = None #print("Arguments to pass : "******".nwb" with test_cicada_test_paul.NWBHDF5IO(os.path.join(dir_path, nwb_name), 'w') as io: io.write(nwb_file) print("NWB file created at : " + str(os.path.join(dir_path, nwb_name)))
def get_file_name(url, gdrive): if gdrive: gdrive_id = utils.get_gdrive_id(url) file_name = gdrive_id + ".pdf" return file_name return utils.path_leaf(url)
def main(args): """ (args) -> SQLitedb takes number of arguments and produces ChronQC SQLite database """ if args.mode == 'update' and not args.db: print("can't update database {} without a -db argument".format(args.mode)) elif args.mode == 'update' and args.prefix: print("can't use prefix in update mode so ignoring it") elif args.mode == 'create' and not args.output: print("provide output directory --output argument for creating db".format(args.mode)) # output dir and file # Get output directory 1. user defined 2. db dir 3. multiqc_stats dir # Get output file name prefix and out file name multiqc_stats = op.abspath(args.mstats) if args.db is not None: output_directory, output_prefix = utils.path_leaf(args.db) out_file = op.abspath(args.db) else: output_directory = op.abspath(args.output) output_directory = op.join(output_directory, "chronqc_db") if op.exists(output_directory) and not args.force: print("FATAL: Output directory {0} already exists, use -f to overwrite".format(output_directory)) sys.exit(1) elif op.exists(output_directory) and args.force: pass if not op.exists(output_directory): os.makedirs(output_directory) output_prefix = '{0}.{1}'.format(args.prefix, 'chronqc.stats.sqlite') if args.prefix is not None else '{0}'.format('chronqc.stats.sqlite') out_file = op.join(output_directory, output_prefix) output_cols = '{0}.{1}'.format(args.prefix, 'chronqc.stats.cols.txt') if args.prefix is not None else '{0}'.format('chronqc.stats.cols.txt') out_cols = op.join(output_directory, output_cols) output_json = '{0}.{1}'.format(args.prefix, 'chronqc.default.json') if args.prefix is not None else '{0}'.format('chronqc.default.json') out_json = op.join(output_directory, output_json) # create logger log_file = op.join(output_directory, 'chronqc_stats.log') logging.basicConfig(filename=log_file, format='%(asctime)s - %(name)s - %(levelname)s \ - %(message)s') logger = logging.getLogger('chronqc') logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.CRITICAL) # create formatter formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s -' '%(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger logger.addHandler(ch) logger.info("Started ChronQC {} SQLite db generation".format(out_file)) # Get parameters table_name = args.db_table table_name = table_name if table_name is not None else 'chronqc_stats_data' panel = args.panel if args.run_dir_level is not None: run_dir_level = int(args.run_dir_level) logger.info("Got required parameters for generating ChronQC SQLite db") utils.print_progress(1, 4, prefix='Running ChronQC', decimals=1, bar_length=50) # Get run id and date info if args.run_date_info is not None: run_date_info = op.abspath(args.run_date_info) df_run = pd.read_csv(run_date_info, comment='#', chunksize=1000, low_memory=False, iterator=True) df_run = pd.concat(list(df_run), ignore_index=True) logger.info("Generated run and date information from {0}".format(run_date_info)) else: multiqc_sources = op.abspath(args.multiqc_sources) df_run = pd.read_csv(multiqc_sources, sep='\t', comment='#', chunksize=1000, low_memory=False, iterator=True) df_run = pd.concat(list(df_run), ignore_index=True) df_run.rename(columns={'Sample Name': 'Sample'}, inplace=True) df_run_m = df_run.loc[df_run.Module.str.contains(args.module)] # stick to defined module (FASTQC) df_run = df_run_m.copy() if len(df_run_m) > 0 else df_run.copy() # Get date and run df_run['Source_path'] = df_run['Source'].apply(op.abspath) df_run['Date'] = df_run['Source_path'].apply(creation_date) multiqc_sources_time = time.localtime(op.getmtime(multiqc_sources)) multiqc_sources_time = time.strftime("%m/%d/%Y", multiqc_sources_time) df_run['Date'].fillna(multiqc_sources_time, inplace=True) df_run['Run'] = df_run.apply(lambda row: updir(row['Source_path'], run_dir_level), axis=1) logger.info("Generated run and date information from {0}".format(multiqc_sources)) df_run['Date'] = pd.to_datetime(df_run.Date, dayfirst=True) df_run.sort_values(by=['Date'], inplace=True) df_run.drop_duplicates(subset=['Sample'], inplace=True) if len(df_run) == 0: logger.critical("FATAL: For run and date information no records found") sys.exit(1) utils.print_progress(2, 4, prefix='Running ChronQC', decimals=1, bar_length=50) # Read multiqc_stats df = pd.read_csv(multiqc_stats, sep='\t', comment='#', chunksize=1000, low_memory=False, iterator=True) df = pd.concat(list(df), ignore_index=True) logger.info("Got {0} records from {1} for ChronQC SQLite db generation".format(len(df), multiqc_stats)) if len(df) == 0: logger.critical("FATAL: No records found in {0}".format(multiqc_stats)) sys.exit(1) utils.print_progress(3, 4, prefix='Running ChronQC', decimals=1, bar_length=50) # Read config and get default parameters #sdir = op.dirname(op.abspath('__file__')) sdir = op.abspath(op.join(op.dirname(__file__), 'config')) config_file = op.join(sdir, 'chronqc.conf') Config.read(config_file) # [ignore_columns] ignore_columns = Config.get('ignore_columns', 'columns').split(',') ignore_columns = [s.strip() for s in ignore_columns] # [time_series_with_box_whisker_plot] box_whisker_plot = Config.get('time_series_with_box_whisker_plot', 'columns').split(',') box_whisker_plot = [s.strip() for s in box_whisker_plot] # [time_series_with_mean_and_stdev] mean_and_stdev = Config.get('time_series_with_mean_and_stdev', 'columns').split(',') mean_and_stdev = [s.strip() for s in mean_and_stdev] # [time_series_with_absolute_threshold] absolute_threshold_c = Config.get('time_series_with_absolute_threshold', 'columns').split(',') absolute_threshold_c = [s.strip() for s in absolute_threshold_c] # absolute_threshold = Config.get('time_series_with_absolute_threshold', 'threshods').split(',') # absolute_threshold = [int(s.strip()) for s in absolute_threshold] # [time_series_with_percentage_of_samples_above_threshold] percentage_of_samples_above_threshold_c = Config.get('time_series_with_percentage_of_samples_above_threshold', 'columns').split(',') percentage_of_samples_above_threshold_c = [s.strip() for s in percentage_of_samples_above_threshold_c] # percentage_of_samples_above_threshold = Config.get('time_series_with_percentage_of_samples_above_threshold', 'threshods').split(',') # percentage_of_samples_above_threshold = [int(s.strip()) for s in percentage_of_samples_above_threshold] # [time_series_with_percentage_category] percentage_category_c = Config.get('time_series_with_percentage_category', 'columns').split(',') percentage_category_c = [s.strip() for s in percentage_category_c] # percentage_category = Config.get('time_series_with_percentage_category', 'categories').split(',') # percentage_category = [s.strip() for s in percentage_category] logger.info("Finished reading parameters from config file for generating \ chronqc db and json") # remove unwanted columns cols = [col for col in list(df.columns)] cols = ['Sample'] + sorted(cols[1:]) df = pd.DataFrame(df, columns=cols) # process df for adding in to chronqc db # Add panel df['Panel'] = panel # Add run and date information df = pd.merge(df_run, df, left_on='Sample', right_on='Sample', how='inner') if len(df) == 0: logger.critical("FATAL: Run ID's do not match the sample information in {0}".format(multiqc_stats)) sys.exit(1) df['Date'] = pd.to_datetime(df.Date, dayfirst=True) # remove blank spaces in column names df.columns = [x.strip().replace(' ', '_') for x in df.columns] logger.info("Kept {0} records after merging run, date and stats for ChronQC SQLite db".format(len(df))) # convert boolean types This method will not work for object type column # booleandf = df.select_dtypes(include=[bool]) # booleanDictionary = {True: 'TRUE', False: 'FALSE'} # for column in booleandf: # df[column] = df[column].map(booleanDictionary) df.replace(to_replace=True, value='TRUE', inplace=True) df.replace(to_replace=False, value='FALSE', inplace=True) # write db cnx = sqlite3.connect(out_file) if args.mode == 'create': df.to_sql(table_name, cnx, index=False, if_exists='replace', chunksize = 1000) out_cols = open(out_cols, 'w') for item in list(df.columns): out_cols.write("%s\n" % item) out_cols.close() # create default JSON file # only numeric columns df_num = df._get_numeric_data() num_cols = list(df_num) ############################################################# default_json = [] # absolute_threshold absolute_threshold_c = [c for c in absolute_threshold_c if c not in ignore_columns] absolute_threshold_c = [c for c in absolute_threshold_c if c in num_cols] abst = '{{"table_name": "{0}", "chart_type": "time_series_with_absolute_threshold", "chart_properties": {{"y_value": "{1}", "lower_threshold": 30}}}}' for col in absolute_threshold_c: default_json.append(json.loads(abst.format(table_name, col))) # percentage_of_samples_above_threshold percentage_of_samples_above_threshold_c = [c for c in percentage_of_samples_above_threshold_c if c not in ignore_columns] percentage_of_samples_above_threshold_c = [c for c in percentage_of_samples_above_threshold_c if c in num_cols] pctth = '{{"table_name": "{0}", "chart_type": "time_series_with_percentage_of_samples_above_threshold", "chart_properties": {{"y_value": "{1}", "threshold": 30}}}}' for col in percentage_of_samples_above_threshold_c: default_json.append(json.loads(pctth.format(table_name, col))) # percentage_category percentage_category_c = [c for c in percentage_category_c if c not in ignore_columns] percentage_category_c = [c for c in percentage_category_c if c in num_cols] pctcat = '{{"table_name": "{0}", "chart_type": "time_series_with_percentage_category", "chart_properties": {{"y_value": "{1}", "category": "TRUE"}}}}' for col in percentage_category_c: default_json.append(json.loads(pctcat.format(table_name, col))) # mean_and_stdev mean_and_stdev = [c for c in mean_and_stdev if c not in ignore_columns] mean_and_stdev = [c for c in mean_and_stdev if c in num_cols] if 'QualiMap_percentage_aligned' and 'Bamtools_mapped_reads_pct' in mean_and_stdev: mean_and_stdev.remove('Bamtools_mapped_reads_pct') if 'FastQC_percent_gc' and 'QualiMap_avg_gc' in mean_and_stdev: mean_and_stdev.remove('QualiMap_avg_gc') if 'QualiMap_mapped_reads' and 'Samtools_Flagstat_mapped_passed' in mean_and_stdev: mean_and_stdev.remove('Samtools_Flagstat_mapped_passed') if 'FastQC_total_sequences' and 'QualiMap_total_reads' in mean_and_stdev: mean_and_stdev.remove('QualiMap_total_reads') mstd = '{{"table_name": "{0}", "chart_type": "time_series_with_mean_and_stdev", "chart_properties": {{"y_value": "{1}"}}}}' for col in mean_and_stdev: default_json.append(json.loads(mstd.format(table_name, col))) # box_whisker_plot box_whisker_plot = [c for c in box_whisker_plot if c not in ignore_columns] box_whisker_plot = [c for c in box_whisker_plot if c in num_cols] bwp = '{{"table_name": "{0}", "chart_type": "time_series_with_box_whisker_plot", "chart_properties": {{"y_value": "{1}"}}}}' for col in box_whisker_plot: default_json.append(json.loads(bwp.format(table_name, col))) # remaining cols num_cols = [c for c in num_cols if c not in ignore_columns] num_cols = [c for c in num_cols if c not in box_whisker_plot] num_cols = [c for c in num_cols if c not in mean_and_stdev] num_cols = [c for c in num_cols if c not in absolute_threshold_c] num_cols = [c for c in num_cols if c not in percentage_of_samples_above_threshold_c] num_cols = [c for c in num_cols if c not in percentage_category_c] if len(num_cols) > 0: for col in num_cols: default_json.append(json.loads(mstd.format(table_name, col))) with open(out_json, 'w') as out_json_file: json.dump(default_json, out_json_file, sort_keys = False, indent = 4, ensure_ascii = False) logger.info("Created ChronQC db: {0} with {1} records".format(out_file, len(df))) logger.info("Created ChronQC default JSON file: {0}. Customize the JSON as needed before generating ChronQC plots.".format(out_json)) elif args.mode == 'update': df.to_sql(table_name, cnx, index=False, if_exists='append', chunksize = 1000) logger.info("Updated ChronQC db: {0} with {1} records".format(out_file, len(df))) cnx.close() utils.print_progress(4, 4, prefix='Running ChronQC', decimals=1, bar_length=50) if args.mode == 'create': print("Created ChronQC db: {0} with {1} records".format(out_file, len(df))) print("Created ChronQC default JSON file: {0}. Customize the JSON as needed before generating ChronQC plots.".format(out_json)) elif args.mode == 'update': print("Updated ChronQC db: {0} with {1} records".format(out_file, len(df)))
plt.hist(data, range=(0, np.percentile(data, percentile))) plt.title(title) plt.ylabel('Count') plt.xlabel('Ticks') plt.xticks(rotation=45) # save out plt.savefig(os.path.join(output_dir, title), dpi=300, bbox_inches='tight') plt.close() if __name__ == "__main__": # get data dir path from argv if (len(sys.argv) != 2): sys.exit(1) data_dir = sys.argv[1] if (not os.path.exists(data_dir) or not os.path.isdir(data_dir)): print("path '" + data_dir + "' is not a valid directory!") sys.exit(1) path = os.path.join(data_dir, "*B.csv") #Spread out the values across the histogram by taking out the top 99.5 percentile (removing high outliers) for fname in glob.glob(path): data = np.loadtxt(fname, delimiter=",", dtype=np.uint64) buffer_name = path_leaf(fname) plot_and_save(data, buffer_name, IMAGES_DIR, 100) plot_and_save(data, buffer_name, IMAGES_ADJUSTED_DIR, 99.5)
def main(args): print('Feature extractor training.') print('CONFIGURATION:\t{}'.format(args.config)) with open(args.config) as json_config_file: config = utils.AttrDict(json.load(json_config_file)) # Set up output directory experiment_name = generate_experiment_name(config) model_dir = os.path.join(os.path.expanduser(config.output.output_dir), experiment_name) if not os.path.exists(model_dir): os.makedirs(model_dir) print('Model saved at {}'.format(model_dir)) config_filename = utils.path_leaf(args.config) copyfile(args.config, os.path.join(model_dir, config_filename)) # CUDA for PyTorch use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") # device = torch.device("cpu") # Get dataloaders train_loader = dataloaders.get_traindataloaders(config.train_dataset, config) evaluators_list = dataloaders.get_evaluators(config.evaluation_datasets, config) # Set up training model print('Building training model') if config.model.checkpoint: checkpoint_path = config.model.checkpoint_path else: checkpoint_path = None model = models.load_model(config.model.model_arch, device, checkpoint_path=checkpoint_path, embedding_size=config.model.embedding_size, imgnet_pretrained=config.model.pretrained_imagenet) optimizer = optim.SGD(model.parameters(), lr=config.hyperparameters.learning_rate, momentum=0.9, nesterov=True, weight_decay=2e-4) # scheduler = lr_scheduler.StepLR(optimizer, 5, gamma=0.1) # scheduler = lr_scheduler.ExponentialLR(optimizer, config.hyperparameters.learning_rate_decay_factor) scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.hyperparameters.n_epochs, eta_min=1e-6) plotter = utils.VisdomPlotter(config.visdom.server, env_name=experiment_name, port=config.visdom.port) miner = miners.FunctionSemihardTripletSelector(config.hyperparameters.margin, plotter) loss = nn.TripletMarginLoss(config.hyperparameters.margin, swap=config.hyperparameters.triplet_swap) my_trainer = trainer.Triplet_Trainer(model, miner, loss, optimizer, scheduler, device, plotter, config.hyperparameters.margin, config.model.embedding_size, evaluation.pair_evaluate, batch_size=config.hyperparameters.batch_size) # Loop over epochs epoch = 0 print('Training Launched.') while epoch < config.hyperparameters.n_epochs: # Validation for evaluator in evaluators_list: print('\nEvaluation on {}'.format(evaluator.test_name)) evaluator.evaluate(model, device, plotter=plotter, epoch=epoch) # Training print('\nTrain Epoch {}'.format(epoch)) my_trainer.Train_Epoch(train_loader, epoch) # Save model if not (epoch + 1) % config.output.save_interval: model_file_path = os.path.join(model_dir, 'model_{}.pth'.format(epoch)) print('\nSave model at {}'.format(model_file_path)) torch.save({'epoch': epoch, 'model_state_dict': utils.state_dict_to_cpu(model.state_dict()), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'embedding_size': config.model.embedding_size, }, model_file_path) epoch += 1 # Final save. model_file_path = os.path.join(model_dir, 'model_{}.pth'.format(epoch)) print('\nSave model at {}'.format(model_file_path)) torch.save({'epoch': epoch, 'model_state_dict': utils.state_dict_to_cpu(model.state_dict()), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'embedding_size': config.model.embedding_size, }, model_file_path) print('Finish.') return model