def main(): print( 'These scripts will create configuration files to set up an Elasticsearch cluster in Kubernetes.' ) context = do_prompts() cluster_dir = os.path.join(clusters_dir, context['namespace']) do_logstash = '' if context['skip_logstash'] else '6' do_oauth_proxy = '' if context['skip_oauth'] else '7' for template in jinja_env.list_templates(filter_func=(lambda x: re.match( f'^[1-5{do_logstash}{do_oauth_proxy}]_.+\.yml$', x))): if context['namespace'] is 'default' and template.endswith( '/namespace.yml'): continue if context['data_node'][ 'storage_class'] is 'standard' and template.endswith( '-storage.yml'): continue output = jinja_env.get_template(template).render(context) out_path = os.path.join(cluster_dir, template) ensure_dir(out_path) with open(out_path, 'w') as output_file: print(output, file=output_file) # The files are still in the clusters/namespace/logstash-ssl-keys directory # This removes a temporary copy in the template directory try: shutil.rmtree(template_secrets_dir) except: pass print('\nSuccessfully generated cluster files.') print(f'configuration files have been saved to {cluster_dir}')
def batch_learn(image_dir, bottleneck_dir, model_out_dir, options): distortion_map = { '--random_scale 20': 'random_scale_20', '--random_scale 10': 'random_scale_10', '--random_crop 10': 'random_crop_10', '--random_brightness 10': 'random_brightness_10', '--random_crop 20': 'random_crop_20', '--random_brightness 20': 'random_brightness_20', '--random_scale 50': 'random_scale_50', '--random_crop 50': 'random_crop_50', '--random_brightness 50': 'random_brightness_50' } option_model = ' --image_dir {0} --bottleneck_dir {1}'.format(image_dir, bottleneck_dir) for option_distort,distort_sub_dir in distortion_map.iteritems(): out_dir = '{0}/{1}'.format(model_out_dir, distort_sub_dir) util.ensure_dir(out_dir) #if not glob.glob(out_dir + '/model*'): cmd = 'python ./learn.py {0} {1} {2} --model_dir {3}'.format(options, option_model, option_distort, out_dir) print(cmd) subproc = subprocess.Popen(cmd, env=os.environ, shell=True) subproc.communicate() util_plot.plot_metrics(model_out_dir)
def saveModules(where): with modulesLock: for i in ActiveModules: #Iterate over all of the resources in a module and save them as json files #under the URL urld module name for the filename. for resource in ActiveModules[i]: #Make sure there is a directory at where/module/ util.ensure_dir(os.path.join(where,url(i),url(resource)) ) #Open a file at /where/module/resource with open(os.path.join(where,url(i),url(resource)),"w") as f: #Make a json file there and prettyprint it json.dump(ActiveModules[i][resource],f,sort_keys=True,indent=4, separators=(',', ': ')) #Now we iterate over the existing resource files in the filesystem and delete those that correspond to #modules that have been deleted in the ActiveModules workspace thing. for i in util.get_immediate_subdirectories(os.path.join(where,url(i))): if unurl(i) not in ActiveModules: os.remove(os.path.join(where,url(i),i)) for i in util.get_immediate_subdirectories(where): #Look in the modules directory, and if the module folder is not in ActiveModules\ #We assume the user deleted the module so we should delete the save file for it. #Note that we URL url file names for the module filenames and foldernames. if unurl(i) not in ActiveModules: shutil.rmtree(os.path.join(where,i)) with open(os.path.join(where,'__COMPLETE__'),'w') as f: f.write("By this string of contents quite arbitrary, I hereby mark this dump as consistant!!!")
def saveModules(where): with modulesLock: for i in ActiveModules: #Iterate over all of the resources in a module and save them as json files #under the URL urld module name for the filename. for resource in ActiveModules[i]: #Make sure there is a directory at where/module/ util.ensure_dir(os.path.join(where, url(i), url(resource))) #Open a file at /where/module/resource with open(os.path.join(where, url(i), url(resource)), "w") as f: #Make a json file there and prettyprint it json.dump(ActiveModules[i][resource], f, sort_keys=True, indent=4, separators=(',', ': ')) #Now we iterate over the existing resource files in the filesystem and delete those that correspond to #modules that have been deleted in the ActiveModules workspace thing. for i in util.get_immediate_subdirectories( os.path.join(where, url(i))): if unurl(i) not in ActiveModules: os.remove(os.path.join(where, url(i), i)) for i in util.get_immediate_subdirectories(where): #Look in the modules directory, and if the module folder is not in ActiveModules\ #We assume the user deleted the module so we should delete the save file for it. #Note that we URL url file names for the module filenames and foldernames. if unurl(i) not in ActiveModules: shutil.rmtree(os.path.join(where, i)) with open(os.path.join(where, '__COMPLETE__'), 'w') as f: f.write( "By this string of contents quite arbitrary, I hereby mark this dump as consistant!!!" )
def main(args): filenames = get_filenames_from_dir(args.input_dir) if args.transformed_dir: ensure_dir(args.transformed_dir) if args.recognized_dir: ensure_dir(args.recognized_dir) for filename in filenames: print(f'Transforming file {filename}') src_img = Image.open(filename) img = image.transform_image(src_img, args.transform_k) if args.transformed_dir and not args.recognized_dir: img.save(change_filename_dir(filename, args.transformed_dir)) if args.recognized_dir: print(f'Recognizing file {filename}') rect = image.recognize(img, rect_size=12, step_percent=0.475) if args.transformed_dir: image.util.draw_rectangle(img, rect, stroke=2, color=(0, 255, 0)) img.save(change_filename_dir(filename, args.transformed_dir)) image.util.draw_rectangle(src_img, rect, stroke=2, color=(0, 255, 0)) src_img.save(change_filename_dir(filename, args.recognized_dir)) print('Finished ' + ('recognition' if args.recognized_dir else 'transformation'))
def my_scorer(clf, X_val, y_true_val): log_name = extract_clf_name(clf) metric = s.SCORER_METRIC fold_log_dirp = s.SCORER_FOLD_LOG_DIRP util.ensure_dir(fold_log_dirp) fold_model_dirp = s.SCORER_FOLD_MODEL_DIRP util.ensure_dir(fold_model_dirp) # do all the work and return some of the metrics y_pred_val = clf.predict(X_val) results = get_metrics(y_true=y_true_val, y_pred=y_pred_val) model_save_fp = None if fold_model_dirp: model_save_fp = save_model(clf, fold_model_dirp, log_name=log_name) if fold_log_dirp: log_metrics_and_params( clf, results, fold_log_dirp, y_true=y_true_val, y_pred=y_pred_val, model_save_fp=model_save_fp, log_name=log_name, ) return results[metric]
def __init__(self, name, password, seed, testnet=False): dirname = "wallets" util.ensure_dir(dirname) self.name = name self.fname = dirname + "/" + name self.password = password self.seed = seed self.testnet = testnet
def gen_release_package(dir): infos = get_file_infos(dir) new_dir = dir+"\\..\\"+os.path.basename(dir)+"_filelist\\" if(os.path.exists(new_dir)): shutil.rmtree(new_dir) util.ensure_dir(new_dir) gen_list_file(infos, new_dir) copy_all_files(dir, new_dir, infos)
def saveAll(): #This dumps the contents of the active modules in ram to a subfolder of the moduledir named after the current unix time""" dn = os.path.join(directories.persistdir,str(time.time()) ) #Ensure dir does not make the last path component util.ensure_dir(os.path.join(dn,"dummy")) savefiles(dn) #We only want 1 backup(for now at least) so clean up old ones. util.deleteAllButHighestNumberedNDirectories(directories.persistdir,2)
def main(args): ensure_dir(args.results_dir) wbx = whitebox(args) total_accu, defense_aacu, adv, gt, reform = wbx.eval_() _save_image(args.results_dir, adv, 'adv') _save_image(args.results_dir, gt, 'gt') _save_image(args.results_dir, reform, 'reform') print(total_accu) print(defense_aacu)
def prompt_for_logstash_certs(context, cert_dir): if check_cert_presence(cert_dir): print(f"Using keys and certs for Logstash found in {cert_dir}.") context['skip_logstash'] = False else: do_logstash = prompt("Would you like to set up Logstash (with SSL beats input)? (Y/n)", "^[yYnN]?$" ) if do_logstash and do_logstash.lower() != 'y': context['skip_logstash'] = True return else: context['skip_logstash'] = False print("Provide the following information to generate self-signed certificates: ") ca_name = prompt("Certificate Authority Name", default='Logstash CA') url = prompt("CN - Common Name for Logstash", regex='^[a-zA-Z.-0-9]+$', default='logstash.my-domain.com' ) country = prompt("C - Country Code", regex='[A-Z]{1,4}', default='US' ) state = prompt("ST - State", regex='[A-Z]{1,4}', default='CA' ) loc = prompt("L - Location", regex='[A-Za-z 0-9-_.]+', default='San Francisco' ) org = prompt("O - Org", regex='[A-Za-z 0-9-_.]+', default='Acme' ) org_unit = prompt("OU - Org Unit", regex='[A-Za-z 0-9-_.]+', default='Computers' ) ensure_dir(os.path.join(cert_dir,'afile')) subprocess.run([ os.path.join(dirname, 'templates', '6_logstash', 'ssl-gen.sh'), ca_name, url, country, state, loc, org, org_unit, cert_dir ], check=True) if not check_cert_presence(cert_dir): raise RuntimeError('certs failed to generate') try: shutil.rmtree(template_secrets_dir) except: pass shutil.copytree(cert_dir, template_secrets_dir) context['logstash_beats_port'] = '8751'
def rasterize_perim(run_output, perim, year, name, raster=None): """! Convert a perimeter to a raster @param run_output Folder to save perimeter to @param perim Perimeter to convert to raster @param year Year to find reference raster for projection @param name Name of fire to use for file name @param raster Specific name of file name to output to @return Perimeter that was rasterized @return Path to raster output """ prj = os.path.join(run_output, os.path.basename(perim).replace('.shp', '_NAD1983.shp')) ensure_dir(os.path.dirname(prj)) ref_NAD83 = osr.SpatialReference() ref_NAD83.SetWellKnownGeogCS('NAD83') #~ try: Project(perim, prj, ref_NAD83) del ref_NAD83 r = find_best_raster(Extent(prj).XCenter, year) prj_utm = os.path.join( run_output, os.path.basename(perim).replace('.shp', os.path.basename(r)[9:14] + '.shp')) Delete(prj_utm) zone = GetSpatialReference(r) Project(perim, prj_utm, zone) del zone cellsize = GetCellSize(r) size = 0.0 dataSource = ogr.GetDriverByName('ESRI Shapefile').Open( prj_utm, gdal.GA_ReadOnly) layer = dataSource.GetLayer() for feature in layer: geom = feature.GetGeometryRef() area = geom.GetArea() size += area / (cellsize * cellsize) del geom del feature del layer del dataSource if size < 1: # this is less than one cell in area so don't use perimeter perim = None raster = None else: if not raster: raster = os.path.join(run_output, name + '.tif') Rasterize(prj_utm, raster, r) return perim, raster
def main(): # load data X, y = dh.load_data(s.DATA_FP, n_features=s.NUM_FEATURES, memmapped=False) # # # TESTING # print("Warning TESTING") # X, y = X[0:1000, :], y[0:1000] # logging.warning("TESTING with {}".format(X.shape)) util.ensure_dir(s.OPT_DIRP) X = dh.do_memmap(X) select_model(X, y) util.send_text_message("{}: Ended run and wrote all to {}".format( str(datetime.datetime.now()), s.OPT_DIRP))
def do_command(args): ensure_dir(args.output) for fname in os.listdir(args.input): if fname.endswith(".json"): with open(os.path.join(args.input, fname)) as f: doc = json.load(f) relations = doc['relations'] for relation in relations: doc['relations'] = [relation] b1, e1 = relation['subject']['doc_char_begin'], relation['subject']['doc_char_end'] b2, e2 = relation['object']['doc_char_begin'], relation['object']['doc_char_end'] logger.info("Saving %s with %d relations", doc['doc_id'], len(doc['relations'])) with open(os.path.join(args.output, "{}-{}-{}-{}-{}.json".format(doc['doc_id'], b1, e1, b2, e2)), 'w') as f: json.dump(doc, f)
def find_in_context_all(lemma: str) -> str: lemma_dir = CONTEXTS_DIR + lemma ensure_dir(lemma_dir) for i in range(5): rating_level = i + 1 doc = read_tks_file(get_tokenized_file_name(rating_level)) file_name = get_context_file_name(lemma, rating_level) if not os.path.exists(file_name): with open(file_name, "w", encoding=UTF_8) as context_file: for line_num, tokenized_review in enumerate(doc, start=1): if count_lemma_in_tokens(lemma, tokenized_review) != 0: print( f"{line_num}:\t{tokens_to_str(tokenized_review)}", file=context_file) return lemma
def aws_download(): logging.info('Downloading AWS Files') ensure_dir(paths['aws_path']) file_count = download_new_files() if file_count == 0: timeout = default_timeout for i in range(download_retries): logging.info( 'No new files found. Checking again in {} minutes.'.format( timeout / 60)) sleep(timeout) timeout = timeout * 2 file_count = download_new_files() if file_count == 0: logging.warning('No new files downloaded. Aborting process.') exit()
def build_libuv(arch, out): if platform.system() == "Darwin": build_libuv_mac() elif platform.system() == "Linux": build_libuv_linux(arch) elif platform.system() == "Windows": build_libuv_windows(arch) else: print("Unsupported platform: " + platform.system()) sys.exit(1) # Copy the build library to the build directory for Mac and Linux where we # support building for multiple architectures. if platform.system() != "Windows": ensure_dir(os.path.dirname(out)) shutil.copyfile( os.path.join(LIB_UV_DIR, "out", "Release", "libuv.a"), out)
def create_and_start(self): util.debug("Starting and restoring wallet {}".format(self)) util.rm_file_if_exists(self.fname) util.ensure_dir( "~/.electrum" ) # TODO not sure why I have to do this, but it crashes otherwise # start daemon # TODO messy; assumes it takes 5 seconds; for some reason shell_blocking won't work util.shell_expect("electrum daemon start") util.sleep(5) util.shell_expect(self._args("electrum daemon start")) util.sleep(5) util.shell_blocking( self._with_password( self._args("electrum restore -o \"{}\"".format(self.seed)))) util.shell_blocking(self._args("electrum daemon load_wallet")) util.debug("Started and restored wallet {}".format(self))
def do_job(self): dicoms_out_dir = self.export_property.output_dir+ '/' + self.export_property.export_style ensure_dir(dicoms_out_dir) if self.export_property.export_style == "EcliseTPS-CTImage": self.image_style = 'template' if self.image_style == 'template': self.slice_dataset = self.slice_template # self.set_magical_phantom_info( self.slice_dataset) #get the wrong files self.set_magical_phantom_descript_info( self.slice_dataset) self.set_magical_phantom_uids( self.slice_dataset) #the up two functions get the right files elif self.image_style == 'minimum': self.slice_dataset = self.dicom_minimum if len(self.image_sets) == 1: self.image_3d = self.image_sets[0] self.gen_dicom_files(dicoms_out_dir,self.image_3d,self.dicom_image_set_info,self.slice_dataset) print "**********Dicom CT Exported Successfully*****************************/n" print self.image_style content = self.export_property.export_style + " Exported Successfully" message_box(message= content, title="Exported Successfully", severity='information') elif len(self.image_sets) > 1: #Do multiple image sets export print "in the multiple image sets export" pass
def _package(self): ensure_dir(self.j('{release_dir}')) pkg_root = self.j('{prefix_dir}') # Create the package listing file. files = list(file_list(self.j('{prefix_dir}'))) pkg_list_fn = self.j('{prefix_dir}', 'share', 'xyz', '{variant_name}') self.ensure_dir(os.path.dirname(pkg_list_fn)) with open(pkg_list_fn, 'w') as pkg_list_f: pkg_list_f.write('{variant_name}\n'.format(**self.config)) if not self.group_only: pkg_list_f.write("Source Version: {}\n".format(git_ver('{source_dir}'.format(**self.config)))) pkg_list_f.write("XYZ Version: {}\n".format(git_ver('.'))) pkg_list_f.write('\n') for fn in files: pkg_list_f.write('{} {}\n'.format( sha256_file(self.j('{prefix_dir}', fn)), fn)) logger.info("Creating tar.gz %s/%s -> %s", os.getcwd(), pkg_root, self.config['release_file']) tar_gz('{release_file}'.format(**self.config), pkg_root)
def run(self): url = "http://localhost/" txt = self.download_filelist(url+"filelist.txt") lines = self.parse_filelist(txt) if not lines: raise Exception("parse error") pass downfiles = [] downsize = 0 for file, size, md5 in lines: local = os.getcwd() + '\\.app\\'+file util.ensure_dir(local) remote = url+md5+".bin" if self.check_diff(local, size, md5): downfiles.append((local, remote)) downsize += size self.callback(EDownload.START, downsize) for local, remote in downfiles: self.download_file(remote, local) self.callback(EDownload.END, 0)
def __init__(self, build=None, host=None, jobs=1): detected_build = self._detect_build() if build is None: build = detected_build logger.info("Detected build: {}".format(build)) if host is None: host = build if build != detected_build: logger.warning("Provided build {} does not match detected build {}.".format( build, detected_build)) self.rules_dir = os.path.join(os.path.dirname(__file__), 'rules') self.build_platform = build self.host = host self.packaging_dir = '' self.source_path = os.path.join(self.packaging_dir, 'source') self.build_path = os.path.join(self.packaging_dir, 'build') self.jobs = jobs self.packages = {} ensure_dir(self.source_path)
def __init__(self, data_dir, logaggfs_dir, master, log): # For storing state data_path = os.path.abspath(os.path.join(data_dir, 'logagg-data')) self.data_path = util.ensure_dir(data_path) # For log file that have been read archive_path = os.path.abspath(os.path.join(data_dir, 'logagg-archive')) self.archive_dir = util.ensure_dir(archive_path) self.master = master self.log = log # For remembering the state of files self.state = DiskDict(self.data_path) # Initialize logaggfs paths self.logaggfs = self._init_logaggfs_paths(logaggfs_dir) # Log fpath to thread mapping self.log_reader_threads = {} # Handle name to formatter fn obj map self.formatters = {} self.queue = queue.Queue(maxsize=self.QUEUE_MAX_SIZE) # Add initial files i.e. serverstats to state if not self.state['fpaths']: self.log.info('init_fpaths') self._init_fpaths() # Create nsq_sender self.log.info('init_nsq_sender') self._init_nsq_sender() #self.nsq_sender = util.DUMMY self._ensure_trackfiles_sync()
def _setup_logging(backup=False): util.ensure_dir('log/') # Also log to backup file with date. if backup: fh = logging.FileHandler('log/' + time.strftime('%y-%m-%d_%H-%M-%S') + '.log') fh.setLevel(logging.DEBUG) f_formatter = logging.Formatter( fmt='%(asctime)s %(name)-16s %(levelname)-8s %(message)s', datefmt='%H:%M:%S' ) fh.setFormatter(f_formatter) logging.getLogger('').addHandler(fh) # Also log to console. console = logging.StreamHandler() console.setLevel(CONSOLE_LOG_LEVEL) c_formatter = logging.Formatter( fmt='%(asctime)s %(name)-16s %(levelname)-8s %(message)s', datefmt='%H:%M:%S' ) console.setFormatter(c_formatter) logging.getLogger('').addHandler(console)
def makeSumAll(_): sum_all = Int(Raster(zone_tif) * 0.0) # Divide by sum of economic and social so that they sum to 10 and affes can make it go over 10 div_by = weights['economic'] + weights['social'] for c in categories: cur_dir = ensure_dir(os.path.join(RAMPART_OUT, "scored", s)) cur_gdb = checkGDB(os.path.join(cur_dir, '{}{}'.format(c, suffix))) for_this = scores[c + '.' + s] for index in by_key: value = for_this[index] if value and not np.isnan(value): input = by_key[index] scored = calc( os.path.join(cur_gdb, os.path.basename(input)), lambda _: Int(1000000 * Con( IsNull(Raster(input)), 0.0, Raster(input) * float(value)))) def make_sum(_): arcpy.env.workspace = cur_gdb rasters = arcpy.ListRasters('*') #~ sum_raster = Con(IsNull(Raster(rasters[0])), 0.0, IsNull(Raster(rasters[0])) * 0.0) sum_raster = Int(Raster(zone_tif) * 0.0) for r in rasters: sum_raster += Raster(r) sum_raster = SetNull(0 == sum_raster, sum_raster) sum_raster = Int(sum_raster) sum_raster.save(_) sum_raster = calc( os.path.join(scored_gdb, '{}_{}{}'.format(c, s, suffix)), make_sum) result = calc(os.path.join(RAMPART_OUT, sum_raster.name + '.tif'), lambda _: arcpy.CopyRaster_management(sum_raster, _)) sum_all = Int(sum_all + Con(IsNull(sum_raster), 0, sum_raster) * weights[c] / div_by) sum_all = Int(sum_all) sum_all = SetNull(0 == sum_all, sum_all) sum_all.save(_)
def process_pic_one(path): image = cv2.imread(path) this_file = os.path.basename(path) pure_name = this_file.split(".")[0] cv2.imwrite(args.oraginal_dir + "/" + pure_name + ".jpg", image) # 高斯白性噪声 im_g = gass_process(image) save_dir = args.goss_dir + "/" util.ensure_dir(save_dir) cv2.imwrite(args.goss_dir + "/" + pure_name + ".jpg", im_g) # 高斯模糊 im_m = cv2.GaussianBlur(image, args.goss_m_kernel_size, args.goss_m_sigma) save_dir = args.m_dir + "/" util.ensure_dir(save_dir) cv2.imwrite(args.m_dir + "/" + pure_name + ".jpg", im_m) # jpeg 压缩 save_dir = args.jpeg_dir + "/" util.ensure_dir(save_dir) cv2.imwrite(args.jpeg_dir + "/" + pure_name + ".jpg", image, [int(cv2.IMWRITE_JPEG_QUALITY), args.jpeg_com_radio])
def do_job(self): self.prepare_extracted_images() out_dir = self.export_property.output_dir+ '/' + self.export_property.export_style ensure_dir(out_dir) if (len(self.extracted_density_sets) != len(self.extracted_index_sets) ): message_box(message= "The number of index is not equal to number of density!", title="Index not match Density" , severity='error') return #**************************************************** #Write out Geant4 geometr files sets_number = len(self.extracted_density_sets) fn_prefix = self.export_property.file_name_prefix fn_sufix = self.export_property.file_name_suffix import csv for n in range(0,sets_number): out_file_name = out_dir + '/' + fn_prefix \ + str(n) + '.' + fn_sufix # write out the Data set index = self.extracted_index_sets[n] density = self.extracted_density_sets[n] image_info = self.g4_image_set_info out_nvoxels = [image_info.nvoxels_x, image_info.nvoxels_y, image_info.nvoxels_z] out_bound_x = [image_info.bound_x_min, image_info.bound_x_max] out_bound_y = [image_info.bound_y_min, image_info.bound_y_max] out_bound_z = [image_info.bound_z_min, image_info.bound_z_max] with open(out_file_name, 'w') as csvfile: self.write_g4geometry_header(csvfile) writer = csv.writer(csvfile,delimiter = ' ', lineterminator='\n') writer.writerow(out_nvoxels) writer.writerow(out_bound_x) writer.writerow(out_bound_y) writer.writerow(out_bound_z) import numpy as np np_index = np.frombuffer(index.point_data.scalars.to_array(), dtype=np.int16) np_density = np.frombuffer(density.point_data.scalars.to_array(), dtype=np.float32) np_index.tofile(csvfile, sep=" ", format="%i") writer.writerow([]) np_density.tofile(csvfile, sep=" ", format="%f") csvfile.close() print "**********Geant4 Geometry Exported Successfully*****************************/n" content = self.export_property.export_style + " Exported Successfully" message_box(message= content, title="Exported Successfully", severity='information')
import numpy as np import pandas as pd import json import os import util import time outputdir = 'output/PeMSD7(M)' util.ensure_dir(outputdir) dataurl = 'input/PeMSD7(M)/' dataname = outputdir + '/PeMSD7(M)' dataset = pd.read_csv(dataurl + 'PeMSD7_W_228.csv', header=None) geo = [] for i in range(dataset.shape[0]): geo.append([i, 'Point', '[]']) geo = pd.DataFrame(geo, columns=['geo_id', 'type', 'coordinates']) geo.to_csv(dataname + '.geo', index=False) rel = [] reldict = dict() # dataset = pd.read_csv(dataurl+'PeMSD7_W_228.csv', header=None) for i in range(dataset.shape[0]): for j in range(dataset.shape[1]): sid = i eid = j cost = dataset[i][j] if (sid, eid) not in reldict: reldict[(sid, eid)] = len(reldict) rel.append([len(reldict) - 1, 'geo', sid, eid, cost])
group.add_argument('--flip_left_right', action='store_true', default=False, help="Whether to randomly flip the training images horizontally.") parser.add_argument('--random_crop', type=int, default=0, help="""A percentage determining how much of a margin to randomly crop off the training images.""") parser.add_argument('--random_scale', type=int, default=0, help="""A percentage determining how much to randomly scale up the size of the training images by.""") parser.add_argument('--random_brightness', type=int, default=0, help="""A percentage determining how much to randomly multiply the training image input pixels up or down by.""") args = parser.parse_args() return args if __name__ == '__main__': args = process_command_line() sess = tf.Session() # Set up the pre-trained graph. print("Using model directory {0} and model from {1}".format(args.model_dir, conf.DATA_URL)) util.ensure_dir(args.model_dir) util.maybe_download_and_extract(data_url=conf.DATA_URL, dest_dir=args.incp_model_dir) model_filename = os.path.join(args.incp_model_dir, conf.MODEL_GRAPH_NAME) graph, bottleneck_tensor, jpeg_data_tensor, resized_image_tensor=(util.create_inception_graph(sess, model_filename)) labels_list = None output_labels_file = os.path.join(args.model_dir, "output_labels.json") output_labels_file_lt20 = os.path.join(args.model_dir, "output_labels_lt20.json") d = os.path.dirname(output_labels_file_lt20) util.ensure_dir(d) # Look at the folder structure, and create lists of all the images. image_lists = util.create_image_lists(output_labels_file, output_labels_file_lt20, args.image_dir, args.testing_percentage, args.validation_percentage)
def do_make_task(args): """ Build a table of documents and attach mentions and relations appropriately. """ reader = csv.reader(args.input, delimiter="\t") documents = defaultdict(lambda: {"mentions": {}, "relations": []}) mention_map = {} for row in reader: # Data is always cleaned. subj, relation, obj, prov, confidence, mention_weight, entity_weight, relation_weight = row if relation in TYPES: # new mention! assert subj not in mention_map, "Seeing a duplicate mention definition!?: {}".format( row) doc_id, begin, end = parse_prov(prov) mention_map[subj] = doc_id mentions = documents[doc_id]["mentions"] mentions[subj] = { "id": subj, "gloss": obj, "type": relation, "doc_char_begin": begin, "doc_char_end": end } elif relation == "canonical_mention": doc_id = mention_map[subj] mentions = documents[doc_id]["mentions"] if subj not in mentions or obj not in mentions: logger.warning( "Couldn't find subject/object for canonical_mention: %s", row) continue other = mentions[obj] mentions[subj]["entity"] = { "id": other["id"], "gloss": other["gloss"], "link": other["link"], "doc_char_begin": other["doc_char_begin"], "doc_char_end": other["doc_char_end"], } elif relation == "link": doc_id = mention_map[subj] mentions = documents[doc_id]["mentions"] if subj not in mentions: logger.warning("Couldn't find subject for link: %s", row) continue mentions[subj]["link"] = obj else: doc_id, begin, end = parse_prov(prov) mentions = documents[doc_id]["mentions"] relations = documents[doc_id]["relations"] if subj not in mentions or obj not in mentions: logger.warning("Couldn't find subject/object for relation: %s", row) continue relations.append({ "subject": mentions[subj], "relation": relation, "object": mentions[obj], "doc_char_begin": begin, "doc_char_end": end, "confidence": float(confidence), "mention_weight": float(mention_weight), "entity_weight": float(entity_weight), "relation_weight": float(relation_weight), }) logger.info("Using %d documents", len(documents)) ensure_dir(args.output) for doc_id, doc in documents.items(): relations = doc['relations'] doc['doc_id'] = doc_id doc['sentences'] = query_doc(doc_id, args.sentence_table) assert len( doc['sentences']) > 0, "Couldn't find document {}".format(doc_id) for relation in relations: doc['relations'] = [relation] b1, e1 = relation['subject']['doc_char_begin'], relation[ 'subject']['doc_char_end'] b2, e2 = relation['object']['doc_char_begin'], relation['object'][ 'doc_char_end'] logger.info("Saving %s with %d relations", doc_id, len(doc['relations'])) with open( os.path.join( args.output, "{}-{}-{}-{}-{}.json".format(doc['doc_id'], b1, e1, b2, e2)), 'w') as f: json.dump(doc, f)
"/home/gilles/repos/cbrole/static/DETECT_LSVC_en_detect_3_CBROLE_150428295005/cv_pipelines/f_classif+maxabsscaler+linearsvc/150428295005_grid_search.joblibpkl", # the alternative "role_modelfp": # "/home/gilles/repos/cbrole/static/VOTING_en_3_CBROLE_150426243867/cv_pipelines/maxabsscaler+votingclassifier/150426243867_grid_search.joblibpkl", "/home/gilles/repos/cbrole/static/VOTING_en_3_CBROLE_150426243867/cv_pipelines/mutual_info_classif+maxabsscaler+votingclassifier/150426243867_grid_search.joblibpkl", # the alternative }, "nl": { "detect_modelfp": "/home/gilles/repos/cbrole/static/LINEARSVC_3_CBEVENT149968157182_nl/cv_pipelines/f_classif+linearsvc/149968157182_grid_search.joblibpkl", "role_modelfp": "/home/gilles/repos/cbrole/static/LINEARSVC_3_CBROLE149744106886_cbrole_nl/cv_pipelines/f_classif+linearsvc/149744106886_grid_search.joblibpkl", }, } METRIC = ["fscore", "precision", "recall", "acc"] detect_label = 1 util.ensure_dir( "/home/gilles/repos/cbrole/static/CASCADE_{}".format(LANGUAGE)) # load heldout X, y DATA_FP = s.langspec[LANGUAGE]["DATA_FP"] # X, y = dh.load_data(DATA_FP, n_features=s.langspec[LANGUAGE]['NUM_FEATURES'], memmapped=False) run_dir = os.path.dirname( os.path.dirname(os.path.dirname(langspec[LANGUAGE]["role_modelfp"]))) NUM_FEATURES_POSTSPLIT = json.load( open(os.path.join(run_dir, "holdinout_split_indices.json"), "rt"))["num_features"] X_in, y_in = dh.load_data( "{}/holdin.svm".format(run_dir), n_features=NUM_FEATURES_POSTSPLIT, memmapped=False, ) X_out, y_out = dh.load_data(
#!/usr/bin/env python import util import os import subprocess source_dir = "xyz_tiles_sequences" destination_dir = "xyz_tiles_video" zooms = util.listdir(source_dir) for z in zooms: z_path = os.path.join(source_dir, z) cols = util.listdir(z_path) for c in cols: c_path = os.path.join(z_path, c) rows = util.listdir(c_path) for r in rows: sequence_path = os.path.join(c_path, r) video_destination_dir = os.path.join(destination_dir, z, c) util.ensure_dir(video_destination_dir) destination_path = os.path.join(video_destination_dir, r + ".mp4") cmd = '/usr/bin/env ffmpeg -f image2 -i "' + sequence_path + '/%5d.jpg" -vcodec libx264 -b 300k "' + destination_path + '"' print cmd subprocess.call(cmd, shell=True)
## Root directory for GIS scripts HOME_DIR = os.path.dirname(os.path.realpath(__import__("__main__").__file__)) sys.path.append(HOME_DIR) sys.path.append(os.path.join(HOME_DIR, 'fbp_convert')) import shared #~ reload(shared) from shared import * import fuelconversion #~ reload(fuelconversion) from util import ensure_dir from FuelLookup import * import pandas as pd import numpy as np ## base directory for GIS data GIS_BASE = ensure_dir(r'C:\FireGUARD\data\GIS') ## intermediate data that doesn't get used anywhere else RAMPART_TMP = ensure_dir(os.path.join(GIS_BASE, 'intermediate', 'rampart')) ## generated data that gets used in FireGUARD RAMPART_OUT = ensure_dir(os.path.join(GIS_BASE, 'generated', 'rampart')) ## file that defines what to generate scores for and how to do so SCORES = os.path.join(HOME_DIR, 'scores.csv') def find_min(start, extent_min): x = start for p in xrange(9): n = pow(10, 10 - p) while x + n < extent_min: x += n return x
def get_context_file_name(lemma: str, rating_level: int) -> str: return CONTEXT_FILE_NAME_F.format(lemma, lemma, rating_level) def find_in_context_all(lemma: str) -> str: lemma_dir = CONTEXTS_DIR + lemma ensure_dir(lemma_dir) for i in range(5): rating_level = i + 1 doc = read_tks_file(get_tokenized_file_name(rating_level)) file_name = get_context_file_name(lemma, rating_level) if not os.path.exists(file_name): with open(file_name, "w", encoding=UTF_8) as context_file: for line_num, tokenized_review in enumerate(doc, start=1): if count_lemma_in_tokens(lemma, tokenized_review) != 0: print( f"{line_num}:\t{tokens_to_str(tokenized_review)}", file=context_file) return lemma if __name__ == "__main__": ensure_dir(CONTEXTS_DIR) for group in LEMMA_GROUPS_LIST: lemmas = read_lemmas_file(group) with mp.Pool() as pool: for word in pool.imap_unordered(find_in_context_all, lemmas): print(f"search for \"{word}\" complete")
""" Entry point to verbphysics system. author: mbforbes """ # IMPORTS # ----------------------------------------------------------------------------- # Logging first this was a fun bug. import logging import util util.ensure_dir('log/') logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(name)-16s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M:%S', filename='log/latest.log', filemode='w') base_logger = logging.getLogger(__name__) # builtins import argparse import sys import time # 3rd party import factorgraph as fg # local import attrgraph
def __iter__(self): ensure_dir(paths['bin_path']) self.bin_iter = iter(listdir(paths['bin_path'])) self.last_bin = None return self
args = parser.parse_args() return args if __name__ == '__main__': args = process_command_line() if not os.path.exists(args.events_xml): print ('Need to specify events XML --events_xml option') sess = tf.Session() # Set up the pre-trained graph. print("Using model directory {0} and model from {1}".format(args.model_dir, conf.DATA_URL)) util.ensure_dir(args.model_dir) util.maybe_download_and_extract(data_url=conf.DATA_URL, dest_dir=args.incp_model_dir) model_filename = os.path.join(args.incp_model_dir, conf.MODEL_GRAPH_NAME) graph, bottleneck_tensor, jpeg_data_tensor, resized_image_tensor = (util.create_inception_graph(sess, model_filename)) labels_list = None output_labels_file = os.path.join(args.model_dir, "output_labels.json") output_labels_file_lt20 = os.path.join(args.model_dir, "output_labels_lt20.json") d = os.path.dirname(output_labels_file_lt20) util.ensure_dir(d) util.ensure_dir(args.bottleneck_dir) # load the labels list, needed to create the model; exit if it's not there if gfile.Exists(output_labels_file): with open(output_labels_file, 'r') as lfile:
#!/usr/bin/env python # find $1 -name "*.MOV" -exec sh -c 'ffmpeg -y -i {} -vf "scale=910:512" -vf "crop=512:512:199:0" -c:v libx264 -preset medium -b:v 1000k -an -f mp4 delivery_video_test/`basename {}`.mp4' \; import util import os import subprocess source_dir = "source_video_tiles" destination_dir = "source_video_tiles_sequences" source_filenames = filter(lambda f: f != ".DS_Store", os.listdir(source_dir)) for source_filename in source_filenames: basename = os.path.splitext(source_filename)[0] source_path = source_dir + "/" + source_filename tile_destination_dir = destination_dir + "/" + basename + "/" util.ensure_dir(tile_destination_dir) destination_path = tile_destination_dir + "%05d.jpg" cmd = '/usr/bin/env ffmpeg -i "' + source_path + '" -an -f image2 "' + destination_path + '"' print cmd subprocess.call(cmd, shell=True)
def ensure_dir(self, *args): ensure_dir(self.j(*args))
def make_house_sparrow_or_sparrow( seed=None, rawdir=None, webdir=os.environ['SPARROW_FLICKR_WEB']): """Make house sparrow or sparrow task. Uses Flickr API. Args: seed: Hashable seed. rawdir (str): Directory to store results before randomization. webdir (str): Directory to store randomized and normalized task. """ if seed is not None: random.seed(seed) options = dict(per_page=100, content_type=1, sort='relevance', query_type='text', pages=(1, 2)) sparrows = util.get_flickr_photos(query='sparrow', **options) house_sparrows = util.get_flickr_photos(query='house sparrow', **options) for i, sparrow in enumerate(sparrows): sparrow['query'] = 'sparrow' sparrow['result_ind'] = i # Write intermediary results. if rawdir is not None: util.ensure_dir(os.path.join(rawdir, 'images', 'sparrow')) sparrow['rawpath'] = 'images/sparrow/{}.jpg'.format(sparrow['id']) urllib.urlretrieve(sparrow['url'], os.path.join( rawdir, sparrow['rawpath'])) for i, sparrow in enumerate(house_sparrows): sparrow['query'] = 'house sparrow' sparrow['result_ind'] = i # Write intermediary results. if rawdir is not None: util.ensure_dir(os.path.join(rawdir, 'images', 'house_sparrow')) sparrow[ 'rawpath'] = 'images/house_sparrow/{}.jpg'.format(sparrow['id']) urllib.urlretrieve(sparrow['url'], os.path.join( rawdir, sparrow['rawpath'])) # Write more intermediary results. if rawdir is not None: with open(os.path.join(rawdir, 'query_sparrow.csv'), 'w') as f: writer = csv.DictWriter(f, fieldnames=sparrows[0].keys()) writer.writerows(sparrows) with open(os.path.join(rawdir, 'query_house_sparrow.csv'), 'w') as f: writer = csv.DictWriter(f, fieldnames=house_sparrows[0].keys()) writer.writerows(house_sparrows) all_sparrows = sparrows + house_sparrows random.shuffle(all_sparrows) webdir_images = os.path.join(webdir, 'images') util.ensure_dir(webdir_images) sparrows_out = [] for i, sparrow in enumerate(all_sparrows): d = dict(id=i, data=sparrow) d['data']['path'] = 'images/{}.jpg'.format(d['data']['id']) urllib.urlretrieve(d['data']['url'], os.path.join( webdir, d['data']['path'])) sparrows_out.append(d) with open(os.path.join(webdir, 'data.json'), 'w') as f: json.dump(dict(data=sparrows_out), f)
def procesar(): g = geocoders.Google("ABQIAAAAtGLFHYz6bfKeWA7GGQ8fzRSfYWwldeQTn-MMsG6oDuo7Kf7ifBSD9Yv-SCgMoxscszNjCTLqX9vU2g") # Me conecto db, cursor = util.conectar() fecha = datetime.datetime.today().strftime("%Y%m%d-%H-%M") file_error = open(util.ensure_dir("./error/geocoders/log-error-geocoder-%s.txt" % (fecha)), "w") file_exito = open(util.ensure_dir("./exito/geocoders/log-exito-geocoder-%s.txt" % (fecha)), "w") file_etapa = open("log-etapa-geocoder.txt", "a") etapa = "2010-06%" # sql = "\ # SELECT \ # DISTINCT mm002_ventas_contacts_c.mm002_vente4f9ontacts_ida \ # FROM \ # mm002_ventas, mm002_ventas_contacts_c \ # WHERE \ # mm002_ventas.id=mm002_ventas_contacts_c.mm002_vent6709_ventas_idb AND\ # mm002_ventas.deleted =0 AND mm002_ventas.fecha_venta like '%s'"%(etapa) sql = "SELECT contacts_cstm.id_c FROM contacts_cstm WHERE contacts_cstm.estado_localizacion_c = '' or contacts_cstm.estado_localizacion_c is null" # Guardo el periodo que se filtro para las ventas file_etapa.write("%s\n" % (etapa)) try: cursor.execute(sql) except: file_error.write(sql + "\n") # Guardo consulta hecha file_exito.write(sql + "\n") # Obtengo el resultado de la consulta datos = cursor.fetchall() # Inicializo contadores count_ok = 0 count_default = 0 count_not_cordoba = 0 count_mas1 = 0 count_error = 0 count_done = 0 print "Cantidad de datos = %s" % len(datos) iter = 0 for dato in datos: iter += 1 id_contacto = dato[0] sql = ( "\ SELECT \ contacts.primary_address_street , IFNULL(IF(contacts_cstm.domicilio_uno_numero_c REGEXP '^[0-9]+' = 0, -1, contacts_cstm.domicilio_uno_numero_c), -1) AS altura,\ IFNULL(contacts.primary_address_city , 'NO-CITY' ) AS ciudad, contacts_cstm.estado_localizacion_c AS estado, \ IFNULL(contacts.primary_address_state , 'NO-STATE' ) AS provincia\ FROM contacts, contacts_cstm \ WHERE \ contacts.deleted=0 AND contacts.id = '" + str(id_contacto) + "' AND contacts.id=contacts_cstm.id_c " ) try: cursor.execute(sql) except: file_error.write(sql + "\n") # Obtengo datos con la direccion direcciones = cursor.fetchall() if len(direcciones) > 1: count_mas1 += 1 file_error.write("Se encontraron %s direcciones para %s\n" % (len(direcciones), id_contacto)) continue elif len(direcciones) == 1: direccion = direcciones[0] # Chequeo que la ciudad sea CORDOBA ciudad = (direccion[2].split("(")[0].strip()).replace(u"\xd1", "N").replace(u"\xf1", "n") provincia = (direccion[4].strip()).replace(u"\xd1", "N").replace(u"\xf1", "n") if (ciudad.upper() == "NO-CITY") or (ciudad.upper() != "CORDOBA"): count_not_cordoba += 1 continue if provincia.upper() == "NO-STATE": # count_not_cordoba += 1 provincia = "" # continue # Chequeo que el estado sea distinto de OK o DEFAULT estado = direccion[-1] if estado in ["OK", "DEFAULT"]: file_exito.write("Contacto: %s - Estado Localizacion: %s\n" % (id_contacto, estado)) count_done += 1 continue # Obtengo calle, altura, provincia y pais if direccion[0] != None: calle = direccion[0].replace(u"\xd1", "N").replace(u"\xf1", "n") if int(direccion[1]) != -1: altura = int(direccion[1]) else: altura = "" else: calle = "Av. Colon" altura = 4045 pais = "Argentina" # Genero un tiempo de espera aleatorio sleep_time = random.randint(0, 30) print "BUSCO: %s %s, %s, %s, %s" % (calle, altura, provincia, ciudad, pais) print "ANALIZADOS %s REGISTROS DE %s. SLEEPING %s seconds." % (iter, len(datos), sleep_time) time.sleep(sleep_time) # Intento obtener localizacion de la direccion en (lat, long) try: place, (lat, long) = g.geocode("%s %s, %s, %s, %s" % (calle, altura, provincia, ciudad, pais)) estado_localizacion = "OK" count_ok += 1 except Exception, e: print "Error: geocode -> %s" % (e) # Si el error es por encontrar mas de un lugar, marco por defecto Colon 4045 # if "Didn't find exactly one placemark" in str(e): place, (lat, long) = "Av. Colon 4045", COLON_4045 estado_localizacion = "DEFAULT" count_default += 1 file_error.write( "Error: geocode: %s for ID=%s - Dir: %s\n - Estado: %s" % ( e, id_contacto, ", ".join([calle.encode("ascii", "replace"), str(altura), ciudad, provincia, pais]), estado_localizacion, ) ) # Si encontro Cordoba o un lugar fuera de Cordoba, seteo COLON 4045 if ( "CORDOBA" == ciudad and place == u"C\xf3rdoba, Cordoba, Argentina" or u"C\xf3rdoba, Cordoba, Argentina" not in place ): print "Place not found!...searching new address" place, (lat, long) = "Av. Colon 4045", COLON_4045 estado_localizacion = "DEFAULT" count_default += 1 print "New Place = %s - (%s,%s)" % (place, lat, long) # Logging y UPDATE a la base de datos file_exito.write( "Contacto: %s - Dir: %s - (%s,%s)\n" % (id_contacto, place.encode("ascii", "replace"), lat, long) ) print "%s: Dir.: %s - (%s,%s)\n\n" % (estado_localizacion, place, lat, long) sql = ( "UPDATE \ contacts_cstm SET latitud_c = '%s', longitud_c='%s', estado_localizacion_c = '%s' \ WHERE \ contacts_cstm.id_c='%s' " % (lat, long, estado_localizacion, id_contacto) ) # print sql # continue try: # Execute the SQL command cursor.execute(sql) # Commit your changes in the database db.commit() except: # Rollback in case there is any error db.rollback() else: file_error.write("Error: ID contacto= %s - Direcciones= %s\n" % (id_contacto, str(direcciones))) print "Se encontraron %s direcciones para %s\n" % (len(direcciones), id_contacto) # print sql continue
def m2c_generator(max_num_sample): ''' m2c Generator Input : a testing sample index Output : Chord Label (n, 16) Monophony Melody Label (n, 2) BPM float Average Elasped Time for one sample : 0.16 sec ''' # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') cpu_device = torch.device('cpu') # Load Data chord_dic = pd.read_pickle(CONFIG_ALL['data']['chord_dic']) # prepare features all_files = find_files(CONFIG_ALL['data']['test_dir'], '*.mid') input_dic = [] for i_file in all_files: _ = midi_feature(i_file, sampling_fac=2) _ = np.reshape(_, (1, _.shape[0], _.shape[1])) input_dic.append({'midi': i_file, 'm_embed': _}) print 'Total Number of files : ', len(input_dic) # training model = BiRNN(CONFIG_ALL['model']['input_size'], CONFIG_ALL['model']['lstm_hidden_size'], CONFIG_ALL['model']['fc_hidden_size'], CONFIG_ALL['model']['num_layers'], CONFIG_ALL['model']['num_classes_cf'], CONFIG_ALL['model']['num_classes_c'], device).to(device) # Load Model path = os.path.join(CONFIG_ALL['model']['log_dir'], CONFIG_ALL['model']['exp_name'], 'models/', CONFIG_ALL['model']['eval_model']) model.load_state_dict(torch.load(path)) # Test the model with torch.no_grad(): while True: test_idx = yield if test_idx >= max_num_sample or test_idx < 0: print "Invalid sample index" continue m_embedding = input_dic[test_idx]['m_embed'] out_cf, out_c = model( torch.tensor(m_embedding, dtype=torch.float).to(device)) out_c = out_c.data.cpu().numpy() _, pred_cf = torch.max(out_cf.data, 1) pred_cf = pred_cf.data.cpu().numpy() i_out_tn1 = -1 i_out_tn2 = -1 i_out_tn3 = -1 i_out_t = -1 predicted = [] c_threshold = 0.825 f_threshold = 0.35 #ochord_threshold = 1.0 for idx, i_out in enumerate(out_c): # Seventh chord #T_chord_label = [0, 1, 2, 3, 4, 5, 102, 103, 104] #D_chord_label = [77, 78, 79, 55, 56, 57] #R_chord_label = [132] # Triad Chord T_chord_label = [0, 1, 37] D_chord_label = [20, 28] R_chord_label = [48] O_chord_label = [ i for i in range(0, 48) if not (i in T_chord_label) or ( i in D_chord_label) or (i in R_chord_label) ] # Bean Search for repeated note if pred_cf[idx] == 0: L = np.argsort( -np.asarray([i_out[i] for i in T_chord_label])) if i_out_tn1 == T_chord_label[ L[0]] and i_out_tn2 == T_chord_label[L[0]]: i_out_t = T_chord_label[L[1]] else: i_out_t = T_chord_label[L[0]] elif pred_cf[idx] == 1: i_out_t = D_chord_label[np.argmax( [i_out[i] for i in D_chord_label])] elif pred_cf[idx] == 3: L = np.argsort( -np.asarray([i_out[i] for i in O_chord_label])) if i_out_tn1 == O_chord_label[ L[0]] and i_out_tn2 == O_chord_label[L[0]]: i_out_t = O_chord_label[L[1]] else: i_out_t = O_chord_label[L[0]] else: i_out_t = 48 predicted.append(i_out_t) i_out_tn2 = i_out_tn1 i_out_tn1 = i_out_t i_out_last = i_out # Write file to midi midi_original = pretty_midi.PrettyMIDI(input_dic[test_idx]['midi']) midi_chord = pro_chordlabel_to_midi( predicted, chord_dic, inv_beat_resolution=CONFIG_ALL['data']['chord_resolution'], constant_tempo=midi_original.get_tempo_changes()[1]) midi_chord.instruments[0].name = "Predicted_w_func" midi_original.instruments.append(midi_chord.instruments[0]) out_path = os.path.join('eval_test/', str(test_idx) + '.mid') ensure_dir(out_path) midi_original.write(out_path) print "Write Files to : ", out_path out_mc = midi_to_list(midi_original, predicted) yield { 'melody': out_mc['melody'], 'chord': out_mc['chord'], 'BPM': float(midi_original.get_tempo_changes()[1]) }
def read_scores_from_file(file_name: str) -> Dict[str, float]: scores_dict = dict() with open(file_name, "r", encoding=UTF_8) as scores_file: for i, line in enumerate(scores_file): if i == 0: # skip the header row continue line_split = line.strip().split(",") word = line_split[0] score = float(line_split[1]) scores_dict[word] = score return scores_dict if __name__ == "__main__": ensure_dir(OUTPUT_DIR) all_lemmas_set = set() all_lemmas = list() all_automated_scores = list() all_manual_scores = list() for group in LEMMA_GROUPS_LIST: # read scores from automated analysis automated_scores_file_name = get_search_results_file_name(group) automated_scores = read_scores_from_file(automated_scores_file_name) automated_scores_list = list() # read scores from manual analysis manual_scores_file_name = get_manual_scores_file_name(group) manual_scores = read_scores_from_file(manual_scores_file_name) manual_scores_list = list() # print paired scores into the individual output file, and put scores into all_lemmas, all_automated_scores, all_manual_scores output_file_name = OUTPUT_FILE_NAME_F.format(group)