def create_bias_fieldmap(self): if os.path.exists(os.path.join(self.fieldmap_dir(),'phase.nii.gz')): return False fieldmap_dirs = sorted(glob("{}/*gre_field_mapping*".format(self._raw_path))) magnitude_dir = fieldmap_dirs[0] phase_dir = fieldmap_dirs[1] mag1_dir = os.path.join(magnitude_dir, 'mag1') mag2_dir = os.path.join(magnitude_dir, 'mag2') if(not os.path.exists(mag1_dir)): second_fieldmap_files = sorted(glob("{}/*_1*.dcm".format(magnitude_dir))) all_fieldmap_files = glob("{}/*.dcm".format(magnitude_dir)) first_fieldmap_files = sorted(list(set(all_fieldmap_files) - set(second_fieldmap_files))) os.mkdir(mag1_dir) os.mkdir(mag2_dir) for file in first_fieldmap_files: shutil.move(file,mag1_dir) for file in second_fieldmap_files: shutil.move(file,mag2_dir) if not os.path.exists(self.fieldmap_dir()): os.mkdir(self.fieldmap_dir()) self.__dcm_convert__(mag1_dir, self.fieldmap_dir(), 'mag1', '20') self.__dcm_convert__(mag2_dir, self.fieldmap_dir(), 'mag2', '20') self.__dcm_convert__(phase_dir, self.fieldmap_dir(), 'phase', '20') return True
def glob(*patterns, exclude=None, parent=None): ''' Wrapper for `glob2.glob()` that accepts an arbitrary number of patterns and matches them. The paths are normalized with `normpath()`. If called from within a module, relative patterns are assumed relative to the modules parent directory. If *exclude* is specified, it must be a string or a list of strings that is/contains glob patterns or filenames to be removed from the result before returning.''' if not parent and module: parent = module.project_dir result = [] for pattern in patterns: if not isabs(pattern): pattern = join(parent, pattern) result += glob2.glob(normpath(pattern)) if isinstance(exclude, str): exclude = [exclude] if exclude is not None: for pattern in exclude: if not isabs(pattern): pattern = join(parent, pattern) if not isglob(pattern): print('>>>', normpath(pattern), result) result.remove(normpath(pattern)) else: for item in glob2.glob(normpath(pattern)): result.remove(item) return result
def test_non_glob(self): # Test without patterns. assert_equals(glob2.glob(__file__, True), [ (__file__, ()) ]) assert_equals(glob2.glob(__file__), [ (__file__) ])
def test_non_glob(self): # Test without patterns. assert glob2.glob(__file__, True) == [ (__file__, ()) ] assert glob2.glob(__file__) == [ (__file__) ]
def find_pairs(basedir): homogs = glob2.glob(os.path.join(basedir, '**/*homog*.prn*')) cands = list() for homog in homogs: n = os.path.basename(os.path.splitext(homog)[0]).split('_')[-1] cand = glob2.glob(os.path.join(basedir, '**/*candidate*' + n + '.*')) cands.append(cand[0]) return zip(cands, homogs)
def run_searchlight(op, subjectdir, conf, output_dir,TR=2): mask_name = conf.mask_name conditions = conf.conditions_to_compare flavor = conf.flavor study_path = op.study_dir() subcode = subjectdir.subcode() for condition in conditions: did_run = True output = _opj(output_dir, '*{}*'.format(conf.get_cond_prefix(condition))) if conf.num_of_permutations > 0: output = "{}_perm{}".format(output,conf.num_of_permutations) if len(glob(output)) == 0: did_run = False if did_run: print "already ran all sl for {}".format(output_dir) return fds = conf.get_ds(study_path, subcode, conf, mask_name, flavor, TR) print fds.summary() warp = glob(_opj(study_path,'sub{:0>3d}'.format(subcode), '**', conf.mvpa_tasks[0], 'reg', 'example_func2standard_warp.nii.gz'))[0] if not os.path.exists(output_dir): os.makedirs(output_dir) for pair in conditions: permute = AttributePermutator('condition', limit='chunks') print conf.num_of_permutations+1 for j in xrange(conf.num_of_permutations+1): prefix = conf.get_cond_prefix(pair) cond_ds = fds[np.array([c in pair for c in fds.sa['condition']])] if j > 0: cond_ds = permute(cond_ds) prefix = "{}_perm{}".format(prefix,j) print prefix output_basename = os.path.join(output_dir, prefix) if(len(glob(output_basename+"*")) > 0): print "sl already ran {}".format(j) continue kwa = {'voxel_indices': conf.get_neighbourhood_strategy(cond_ds)} qe = IndexQueryEngine(**kwa) # init the searchlight with the queryengine sl = Searchlight(conf.get_sl_measure(), queryengine=qe, roi_ids=None, enable_ca=['roi_sizes', 'roi_feature_ids']) print "starting sl {}".format(datetime.now()) sl_map = sl(cond_ds) print "finished sl {}".format(datetime.now()) pickle.dump(sl_map, open("{}_sl_map.p".format(output_basename), "wb")) acc_results = map2nifti(sl_map, imghdr=fds.a.imghdr) acc_nii_filename = '{}-acc.nii.gz'.format(output_basename) acc_results.to_filename(acc_nii_filename) #do_searchlight(cond_ds,k,os.path.join(output_dir, prefix)) out_filename = acc_nii_filename.replace('.nii.gz', '_mni.nii.gz') apply_warp(acc_nii_filename, warp, out_filename)
def issue_no_mako_warnings(): set_has_mako = set([]) set_no_python_mako = set([]) for path in glob2.glob('./scenarios/**/*.mako'): set_has_mako.add(os.path.dirname(path)) for path in glob2.glob('./scenarios/**/python.mako'): set_no_python_mako.add(os.path.dirname(path)) print 'The following dont have a python.mako file. Look into it!' print set_has_mako.difference(set_no_python_mako)
def test_non_glob(self): # Ensure that a certain codepath (when the basename is globbed # with ** as opposed to the dirname) does not cause # the root directory to be part of the result. # -> b/ is NOT in the result! assert_equals(glob2.glob(__file__, True), [ (__file__, ()) ]) assert_equals(glob2.glob(__file__), [ (__file__) ])
def __dcm_convert__(self, source_directory, target_directory, target_filename, rename_prefix, erase=False): cmd = "dcm2nii -o {} {} > /dev/null".format(target_directory, source_directory) os.system(cmd) nii_file = glob("{}/{}*".format(target_directory, rename_prefix))[0] os.rename(nii_file, os.path.join(target_directory, '{}.nii.gz'.format(target_filename))) if erase: for file_name in glob("{}/*".format(target_directory)): if target_filename not in file_name: os.remove(file_name)
def live(): """Run livereload server""" from livereload import Server server = Server(app) map(server.watch, glob2.glob("application/pages/**/*.*")) # pages map(server.watch, glob2.glob("application/macros/**/*.html")) # macros map(server.watch, glob2.glob("application/static/**/*.*")) # public assets server.serve(port=PORT)
def glob(patterns, parent=None, excludes=(), include_dotfiles=False): """ Wrapper for :func:`glob2.glob` that accepts an arbitrary number of patterns and matches them. The paths are normalized with :func:`norm`. Relative patterns are automaticlly joined with *parent*. If the parameter is omitted, it defaults to the currently executed build scripts project directory. If *excludes* is specified, it must be a string or a list of strings that is/contains glob patterns or filenames to be removed from the result before returning. .. note:: Every file listed in *excludes* will only remove **one** item from the result list that was generated from *patterns*. Thus, if you want to exclude some files with a pattern except for a specific file that would also match that pattern, simply list that file another time in the *patterns*. :param patterns: A list of glob patterns or filenames. :param parent: The parent directory for relative paths. :param excludes: A list of glob patterns or filenames. :param include_dotfiles: If True, ``*`` and ``**`` can also capture file or directory names starting with a dot. :return: A list of filenames. """ argspec.validate("patterns", patterns, {"type": [list, tuple]}) argspec.validate("excludes", excludes, {"type": [list, tuple]}) argspec.validate("parent", parent, {"type": [None, str]}) if not parent: parent = getcwd() result = [] for pattern in patterns: if not isabs(pattern): pattern = join(parent, pattern) result += glob2.glob(norm(pattern)) for pattern in excludes: if not isabs(pattern): pattern = join(parent, pattern) pattern = norm(pattern) if not isglob(pattern): result.remove(pattern) else: for item in glob2.glob(pattern): result.remove(item) return result
def create(ctx, filename, recursive, dry_run, debug): """Either '-f' option or subcommand required.""" if ctx.invoked_subcommand: return 'defer to subcommand' if not filename: #click.echo('error: Missing option "-f".') click.echo(create.get_help(ctx)) sys.exit(0) realpath = os.path.realpath(filename) manifests = [] if os.path.isfile(filename): manifests.extend([realpath]) if os.path.isdir(filename): if recursive: manifests.extend([f for f in glob2.glob(realpath + '/**/*.j2')]) manifests.extend([f for f in glob2.glob(realpath + '/**/*.yml')]) manifests.extend([f for f in glob2.glob(realpath + '/**/*.yaml')]) manifests = [f for f in manifests if os.path.isfile(f)] else: manifests.extend([realpath+'/'+f for f in os.listdir(realpath) if os.path.isfile(realpath+'/'+f) and f.endswith(('.j2','.yaml','.yml'))]) if not manifests: click.echo('no manifest files found') sys.exit(0) if debug: click.echo(manifests) for manifest in manifests: definitions = None t = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.dirname(os.path.realpath(manifest)))) t.filters['json_dump'] = json_dump definitions = t.get_template(os.path.basename(manifest)).render() if debug: print definitions if definitions else '' for definition in yaml.load_all(definitions): # import ipdb; ipdb.set_trace() if not dry_run: resp, status = kreate(definition)
def __get_coverage_reports(self): self.log.info('getting coverage reports from {0}'.format( self.config['ARTIFACTS_DIR'])) coverage_reports = [] src_dir = self.config['ARTIFACTS_DIR'] xml_coverage_glob_pattern = os.path.join( src_dir, '**/**/**/shippable/codecoverage/**/*.xml') self.log.debug('Looking for coverage reports in {0}'.format( xml_coverage_glob_pattern)) xml_coverage_report_filenames = glob2.glob(xml_coverage_glob_pattern) csv_coverage_glob_pattern = os.path.join( src_dir, '**/**/**/shippable/codecoverage/**/*.csv') self.log.debug('Looking for coverage reports in {0}'.format( csv_coverage_glob_pattern)) csv_coverage_report_filenames = glob2.glob(csv_coverage_glob_pattern) coverage_report_filenames = ( xml_coverage_report_filenames + csv_coverage_report_filenames) if len(coverage_report_filenames) > 0: self.log.debug('Found {0} coverage reports'.format( len(coverage_report_filenames))) self.log.debug(coverage_report_filenames) for coverage_report_filename in coverage_report_filenames: current_filesize_bytes = os.path.getsize(coverage_report_filename) if (self.total_report_filesize_bytes + current_filesize_bytes >= self.config['MAX_USER_REPORT_SIZE_BYTES']): continue self.total_report_filesize_bytes += current_filesize_bytes with open(coverage_report_filename) as report_file: coverage_reports.append({ 'content': report_file.read(), 'filename': coverage_report_filename }) self.log.info('got coverage reports: {0}'.format(coverage_reports)) else: self.log.info('No coverage reports to upload') return coverage_reports
def resolve_contents(self, env=None, force=False): """Returns contents, with globbed patterns resolved to actual filenames. Set ``force`` to ignore any cache, and always re-resolve glob patterns. """ env = self._get_env(env) # TODO: We cache the values, which in theory is problematic, since # due to changes in the env object, the result of the globbing may # change. Not to mention that a different env object may be passed # in. We should find a fix for this. if getattr(self, '_resolved_contents', None) is None or force: l = [] for item in self.contents: if isinstance(item, basestring): # We only go through glob() if this actually is a # pattern; this means that invalid filenames will # remain in the content set, and only raise an error # at a later point in time. # TODO: This is possible a good place to check for # a file's existence though; currently, when in debug # mode, no error would be raised at all, and simply a # broken url sent to the browser. if has_magic(item): path = env.abspath(item) for f in glob.glob(path): l.append(f[len(path)-len(item):]) else: l.append(item) else: l.append(item) self._resolved_contents = l return self._resolved_contents
def build_package(): """ Copies all the included files to the build directory """ # Erase and rebuild the build directory if os.path.exists(BUILD_DIR): shutil.rmtree(BUILD_DIR) mkdir(BUILD_DIR) mkdir(FILES_DIR) for include in INCLUDES: # Glob all the files! files = glob(include) for file in files: # Filter out ignored file types if split_compound_ext(file)[1].lower() in IGNORE_TYPES: continue # Copy the file newfile = os.path.join(FILES_DIR, file) newdir = os.path.dirname(newfile) if not os.path.exists(newdir): mkdir(newdir) shutil.copy(file, newfile)
def main(globstr, beat_subdivisions, fs, quantized, wrap, save_img, debug): for filepath in glob.glob(globstr): try: data = pm.PrettyMIDI(filepath) b = data.get_beats() beats = interpolate_between_beats(b, beat_subdivisions) if quantized: quantize(data, beats) if not fs: cur_fs = 1./beats[1] while cur_fs > wrap: cur_fs = cur_fs * 0.5 else: cur_fs = fs print("{}, {}".format(filepath, cur_fs)) # proll = data.get_piano_roll(fs=fs, times=beats) proll = data.get_piano_roll(fs=cur_fs).astype(int) if np.isnan(proll).any(): print("{} had NaN cells".format(filepath)) # automatically appends .npy fo filename np.save(filepath, proll) # save image if save_img: plt.imsave(filepath+'_o.png', proll) plt.imsave(filepath+'_f.png', np.flipud(proll)) except: print filepath, sys.exc_info()[0] if debug: traceback.print_exc() continue
def get_module_string(tests_path): path = get_module_path() tests_path = os.path.abspath(os.path.join(path, os.path.pardir, tests_path)) test_files = glob2.glob(tests_path) relative_test_files = [get_ws_relative_route(test_file) for test_file in test_files] module_strings = [".".join(test_file)[:-3] for test_file in relative_test_files] return module_strings
def load(__builtin__, profile_path, folder=None): import os profile_path = os.path.abspath(os.path.expanduser(profile_path)) log_header = '[IProfile] Importing profile:' files = None if os.path.isdir(profile_path): from glob2 import glob startup = os.path.join(profile_path, 'startup') if os.path.isdir(startup): if folder: path = '{}/{}/**/*.py'.format(startup, folder) else: path = '{}/**/*.py'.format(startup) files = glob(path) elif os.path.isfile(profile_path): files = [profile_path] if files: import imp for startup_file in files: module = imp.load_source('imported_module', startup_file) valid_items = ( (x, y) for x, y in module.__dict__.items() if y and not x.startswith('__') ) for name, mod in valid_items: setattr(__builtin__, name, mod) else: print( '{} Nothing was imported from "{}{}"'.format( log_header, profile_path, '/{}'.format(folder) if folder else '') )
def get_file_listing(paths, files, extensions): """Generate listing with all files that should be check.""" result = [] if files: result += files # pylint: disable=E1101 for path in paths: if extensions: for extension in extensions: result += glob2.glob("%s/**/*.%s" % (path, extension)) else: result += glob2.glob("%s/**/*" % path) # pylint: enable=E1101 return result
def matched_paths(base: str, include_patterns: t.Union[t.List['str'], str] = ["**", "**/.*"], exclude_patterns: t.List[str] = None) -> t.List[str]: """ All matching paths in base directory and it's child directories. :param base: base directory :param include_patterns: patterns that match the paths that should be included :param exclude_patterns: patterns that match the paths that should be excluded :return: matching paths """ typecheck_locals(base=str, include_pattern=List(Str())|Str(), exclude_patterns=Optional(List(Str()))) if isinstance(include_patterns, list): ret = [] for pattern in include_patterns: ret.extend(matched_paths(base, pattern, exclude_patterns)) return ret cwd = os.getcwd() os.chdir(abspath(base)) import glob2, globster names = glob2.glob(include_pattern) if exclude_patterns: exclude_globster = globster.Globster(exclude_patterns) names = [x for x in names if not exclude_globster.match(x)] names = list(map(abspath, names)) return names
def get_spec_modules(self): """Get modules to tests""" files = glob("**/%s*.py" % self.spec_file_prefix) for fn in files: fn = fn.replace(os.path.sep, ".") mn = re.sub("\.py$", "", fn) yield importlib.import_module(mn)
def test_only_directories(self): # Return directories only assert sorted(glob2.glob('**/', True)) == [ ('a/', ('a',)), ('a/foo/', ('a/foo',)), ('b/', ('b',)), ]
def load(self): self._assets = [] gl_path = glob.glob(os.path.join(self.pre, self._path)) for _file in gl_path: filename = os.path.relpath(_file, self.pre) self.add(FileAsset(filename, pre=self.pre, environment=self.environment, storage=self.storage)) return super(GlobAsset,self).load()
def test_parent_dir(self): # Make sure ".." can be used os.chdir(path.join(self.basedir, 'b')) assert sorted(glob2.glob('../a/**/*.py', True)), [ ('../a/bar.py', ('', 'bar')), ('../a/foo/hello.py', ('foo', 'hello')) ]
def construct_response(scenario_name): # load up response data data = json.load(open('scenario.cache','r')) lookup = TemplateLookup(directories=['./scenarios']) for path in glob2.glob('./scenarios/**/request.mako'): if path != scenario_name: continue event_name = path.split('/')[-2] template = Template("${response}") try: response = data[event_name].get('response', {}) text = template.render(response=response).strip() response = json.loads(text) del response["links"] for key, value in list(response.items()): response = value[0] type = key resource = balanced.Resource() object_type = resource.registry[type] object_instance = object_type() for key, value in list(response.items()): setattr(object_instance, key, value) text = template.render(response=object_instance) except KeyError: text = '' return text
def __get_test_reports(self): self.log.info('Getting test reports from {0}'.format( self.config['ARTIFACTS_DIR'])) test_reports = [] src_dir = self.config['ARTIFACTS_DIR'] test_results_glob_pattern = os.path.join( src_dir, '**/**/**/shippable/testresults/**/*.xml') self.log.debug('Looking for test results in {0}'.format( test_results_glob_pattern)) test_report_filenames = glob2.glob(test_results_glob_pattern) if len(test_report_filenames) > 0: self.log.debug('Found {0} test reports'.format( len(test_report_filenames))) self.log.debug(test_report_filenames) for test_report_filename in test_report_filenames: with open(test_report_filename) as test_file: test_reports.append({ 'content': test_file.read(), 'filename': test_report_filename }) self.log.info('Test reports parsed : {0}'.format(test_reports)) else: self.log.info('No test reports to upload') return test_reports
def find_files(path, patterns): if not isinstance(patterns, (list, tuple, set)): patterns = [patterns] files = [] for pattern in patterns: files += glob2.glob(path + os.sep + pattern) return list(set(files))
def test_only_directories(self): # Return directories only assert_equals(glob2.glob('**/', True), [ ('a/', ('a',)), ('b/', ('b',)), ('a/foo/', ('a/foo',)) ])
def load(__builtin__, folder): from iprofile.settings.registry import settings import os files = None settings.read(ignore_errors=True) log_header = '[IProfile] Importing folder:' folder = os.path.join( settings.get('path'), settings.get('last'), 'startup', folder ) if os.path.isdir(folder): from glob2 import glob import imp files = glob('{}/**/*.py'.format(folder)) for startup_file in files: module = imp.load_source('imported_module', startup_file) valid_items = ( (x, y) for x, y in module.__dict__.items() if y and not x.startswith('__') ) for name, mod in valid_items: setattr(__builtin__, name, mod) else: print( '{} Nothing was imported from "{}"'.format(log_header, folder) )
def test(self): self.makedirs('dir1', 'dir22') self.touch( 'dir1/a-file', 'dir1/b-file', 'dir22/a-file', 'dir22/b-file') assert glob2.glob('dir?/a-*', True) == [ ('dir1/a-file', ('1', 'file')) ]
""" Created on Wed May 1 13:51:16 2019 @author: wevonosky """ import numpy as np import glob2 as glob import os import matplotlib.pyplot as plt #Where to find the data base = 'C:/Users/sreynolds/Desktop/D2T4S4_clean_v2/' filePath = "C:\\Users\\sreynolds\\Desktop\\shiftFiles\\" #Load in the file paths files = glob.glob(base + '*.npy') #**********initializing all variables to be used*********** #variable for plt.figure fig = plt.figure() #first image variable ax1 = fig.add_subplot(122) #second image variable ax2 = fig.add_subplot(121) #list to hold first image coordinates of fiducials coordsIm1 = [] #list to hold second imag fiducial coordinates coordsIm2 = [] #count of how many fiducials are on first image fidCountIm1 = 0 #count of how many fiducials are on the second image
path_pattern = os.path.join(dir_to_validate, '*.csv') mergedPath = os.path.join(dir_to_validate, 'MERGED.csv') dataFrames = [] headers = ['type', 'name', 'input_id', 'time', 'team', 'meas_id'] types = { 'type': str, 'name': str, 'input_id': str, 'time': numpy.float64, 'team': str, 'meas_id': str } for file_path in filter(lambda fn: not fn.endswith('MERGED.csv'), glob2.glob(path_pattern)): print('Processing file: %s' % file_path) df = pandas.read_csv(file_path, header=0, names=headers, low_memory=False, dtype=types) valid = True valid &= validate_column(df, 'type', '^(?:QUEUE|START|END)$', file_path) valid &= validate_column( df, 'name', '^(?:Tokenize|Collect|ComputeScalar|ComputeCosine)\d*$', file_path) valid &= validate_column( df, 'input_id', '^(?:Pride|Sense)[1-6](?:_(?:Pride|Sense)[1-6])?$', file_path) valid &= validate_column(df, 'meas_id', '^(?:Pride)[1-6]_(?:Sense)[1-6]$',
# This script was intended to see if timestamps match between files relative to the same date but that contain different variables import sys import glob2 import netCDF4 as nc import argparse DEFAULT_PATH = '/media/degas/model/ECMWF/ERA5/' # PARSE ARGUMENTS parser = argparse.ArgumentParser(description='Compare times between pairs of files of different variables.') parser.add_argument('variables', nargs=2, metavar=('VAR'), help='Variables to be compared. (Should refer to the beginning of filenames)') parser.add_argument('-p', '--path', help='Root path to find variables', default=DEFAULT_PATH) args = parser.parse_args() d1 = glob2.glob(args.path + '**/' + args.variables[0] + '*.nc') d2 = glob2.glob(args.path + '**/' + args.variables[1] + '*.nc') d1.sort() d2.sort() if len(d1) != len(d2): print('They dont have the same amount of files') sys.exit() for f in range(len(d1)): print(f"Testing [{f+1}/{len(d1)}] {d1[f]} {d2[f]}") dd1 = nc.Dataset(d1[f]) dd2 = nc.Dataset(d2[f]) if not (dd1.variables['time'][:] == dd2.variables['time'][:]).all(): print(f"\033[1;31m{d1[f]} and {d2[f]} times differ\033[m") if not (dd1.variables['time'][:] >= 0).all():
def process_logs(settings_file): """ Convert all lcm logs in a directory to rosbags. Args: dir_settings (dict): Container for all settings for the operation """ # Load settings with open(settings_file, 'r') as f: dir_settings = json.load(f) dir_lcm_logs = os.path.expanduser(dir_settings["dir_lcm_logs"]) if not os.path.isdir(dir_lcm_logs): sys.stderr.write("%s is not a directory." % dir_lcm_logs) sys.exit(1) dir_rosbags = os.path.expanduser(dir_settings["dir_rosbags"]) if os.path.exists(dir_rosbags): if not os.path.isdir(dir_rosbags): sys.stderr.write("%s exists but it's not a directory." % dir_rosbags) sys.exit(1) elif os.listdir(dir_rosbags) and not dir_settings["override"]: sys.stderr.write("The given directory, %s, exists but it's not empty.\n" "Set 'override' to true if this is intended." % dir_rosbags) sys.exit(1) else: os.makedirs(dir_rosbags) log_list = [f for f in glob2.glob(os.path.join(dir_lcm_logs,'**/*.log'))] num_logs = len(log_list) print("Converting %r logs from %s to %s" % (num_logs, dir_lcm_logs, dir_rosbags)) # Helper function for parallel processing def getArgsForParallelMap(logname): log_path = os.path.join(dir_lcm_logs, logname) rosbag_path = os.path.join(dir_rosbags, os.path.splitext(logname)[0] + '.bag') csv_path = os.path.join(dir_lcm_logs, os.path.splitext(logname)[0] + '_poses_centered.csv') rosbag_path = rosbag_path.replace(dir_lcm_logs, dir_rosbags) os.makedirs(os.path.split(rosbag_path)[0]) return (log_path, rosbag_path, csv_path, dir_settings) tasks = [getArgsForParallelMap(log) for log in log_list] threads = Pool() # Shows progress. for _ in tqdm(threads.imap_unordered(process_one_log, tasks), total=len(tasks)): pass threads.close() threads.join() # for i, log_name in enumerate(log_list): # # print overall progress # progress = (i+1)*100./num_logs # print("%d%%\t" % progress) # log_path = os.path.join(dir_lcm_logs, log_name) # rosbag_path = os.path.join(dir_rosbags, os.path.splitext(log_name)[0] + '.bag') # print dir_rosbags # csv_path = os.path.join(dir_lcm_logs, os.path.splitext(log_name)[0] + '_poses_centered.csv') # rosbag_path = rosbag_path.replace(dir_lcm_logs, dir_rosbags) # os.makedirs(os.path.split(rosbag_path)[0]) # print rosbag_path # print log_path # print csv_path # process_one_log((log_path, rosbag_path, csv_path, dir_settings)) print("Done writing logs to %s ." % dir_rosbags)
def main(self): self.log.info('Start') self.log.debug('Started: ' + os.path.abspath(__file__)) self.log.debug('Setting SIGTERM, SIGINT handlers') signal.signal(signal.SIGTERM, self.exit_handler) signal.signal(signal.SIGINT, self.exit_handler) # Read cam configs cam_cfg_dir = os.path.join(self.cfg_dir, self.cfg['cam_cfg_mask']) self.log.debug('Configs search path: ' + cam_cfg_dir) cam_cfg_list = glob2.glob( os.path.join(self.cfg_dir, self.cfg['cam_cfg_mask'])) cam_cfg_list.remove(self.cfg_file) self.log.debug('Found configs: %s' % cam_cfg_list) if len(cam_cfg_list) == 0: self.log.critical('No cam config found. Exit') sys.exit(0) for cur_cam_cfg in cam_cfg_list: self.log.debug('Read cam config: ' + cur_cam_cfg) tmp_cfg = Config(open(cur_cam_cfg)) cur_cam_cfg_active_flag = True try: tmp_cfg['active'] except AttributeError: self.log.debug('active flag not found') else: cur_cam_cfg_active_flag = tmp_cfg['active'] if cur_cam_cfg_active_flag: self.cam_cfg.append(tmp_cfg) self.cam_cfg_resolver_dict.clear() merger = ConfigMerger(resolver=self.configs_resolver) merger.merge(self.cam_cfg[-1], self.cfg) for key in self.cam_cfg_resolver_dict: self.cam_cfg[-1][key] = self.cam_cfg_resolver_dict[key] self.log.debug('Loaded settings for: ' + self.cam_cfg[-1]['name']) else: self.log.debug('Cam config is skipped due active flag: ' + cur_cam_cfg) # End Read cam configs # Cleaner self.cfg['cleaner_max_removes_per_run'] = self.replacer( str(self.cfg['cleaner_max_removes_per_run']), 0) schedule.every(self.cfg['cleaner_run_every_minutes']).minutes.do( self.cleaner) # End Cleaner # PIDs full path for iterator, cam in enumerate(self.cam_cfg): try: pid_streamer = cam['pid_streamer'] except AttributeError: self.log.debug('pid_streamer not found for cam: ' + cam['name']) try: pid_streamer = self.cfg['pid_streamer'] except AttributeError: self.log.critical("Can't find pid_streamer in config") sys.exit(1) try: pid_capturer = cam['pid_capturer'] except AttributeError: self.log.debug('pid_capturer not found for cam: ' + cam['name']) try: pid_capturer = self.cfg['pid_capturer'] except AttributeError: self.log.critical("Can't find pid_capturer in config") sys.exit(1) self.cam_streamer_pid.append( self.replacer(os.path.join(self.cfg['pid_dir'], pid_streamer), iterator)) self.cam_capturer_pid.append( self.replacer(os.path.join(self.cfg['pid_dir'], pid_capturer), iterator)) # End PIDs full path self.kill_cams_process() self.write_main_pid() while self.main_loop_active_flag: for iterator, cam in enumerate(self.cam_cfg): if len(self.cam_streamer) == iterator: # Create cam cap dir only if cap_cmd is not False try: cap_cmd = self.cam_cfg[iterator]['cap_cmd'] except AttributeError: cap_cmd = None self.log.debug('Capture command not found') if cap_cmd is not False: cap_dir_cam = self.replacer(self.cfg['cap_dir_cam'], iterator) if not os.path.exists(cap_dir_cam): try: os.makedirs(cap_dir_cam) except OSError: self.log.critical( 'Failed to create directory: ' + cap_dir_cam) sys.exit(1) # End Create cam cap dir self.cam_streamer_start_flag.append(True) self.cam_streamer.append(None) self.cam_streamer_start_time.append(0) self.cam_streamer_poll_flag.append(False) self.cam_capturer.append(None) self.cam_capturer_start_flag.append(False) self.cam_capturer_check_flag.append(False) else: if self.cam_streamer[iterator].poll() is None: self.log.debug('Streamer "%s" is alive' % cam['name']) else: self.log.warn('Streamer "%s" is dead (exit code: %s)' % (cam['name'], self.cam_streamer[iterator].returncode)) self.cam_streamer_start_flag[iterator] = True # Capturer alive check if self.cam_capturer_check_flag[iterator]: if self.cam_capturer[iterator].poll() is None: self.log.debug('Capturer "%s" is alive' % cam['name']) else: self.log.warn('Capturer "%s" is dead (exit code: %s)' % (cam['name'], self.cam_capturer[iterator].returncode)) self.cam_streamer_poll_flag[iterator] = True self.cam_capturer_check_flag[iterator] = False # End Capturer alive check # Run streamer if self.cam_streamer_start_flag[iterator]: self.log.info('Run "%s" streamer in background' % cam['name']) self.cam_streamer[iterator] = self.bg_run( cam['cmd'].strip(), self.cam_streamer_pid[iterator]) self.cam_streamer_start_time[iterator] = time.time() self.cam_streamer_poll_flag[iterator] = True self.cam_streamer_start_flag[iterator] = False # End Run streamer # Poll streamer if self.cam_streamer_poll_flag[iterator]: cap_url = self.cfg['cap_url'] cap_url = self.replacer(cap_url, iterator) self.log.debug('Getting HTTP status: ' + cap_url) http_code = 0 try: http_code = requests.head(cap_url, timeout=1).status_code except requests.exceptions.RequestException: self.log.warn('Failed to connect: ' + cap_url) if http_code != 0: self.log.info('Checked "%s", status: %s' % (cam['name'], http_code)) if http_code == 200: self.cam_capturer_start_flag[iterator] = True self.cam_streamer_poll_flag[iterator] = False start_time_delta = time.time( ) - self.cam_streamer_start_time[iterator] if self.cam_streamer_poll_flag[iterator]: if start_time_delta > cam['max_start_seconds']: self.log.warn('Time outed waiting data from: ' + cam['name']) self.log.info('Kill: ' + cam['name']) self.kill_cam_processes(iterator, cam_reset_flag=True) self.cam_streamer_start_flag[iterator] = True else: self.log.info('Attempt "%s": [%i/%i]' % (cam['name'], start_time_delta, cam['max_start_seconds'])) # End Poll streamer # Run capturer if self.cam_capturer_start_flag[iterator]: if self.cam_capturer[ iterator] is not None and self.cam_capturer[ iterator].poll() is None: self.log.warn('Capturer "%s" is STILL alive' % cam['name']) else: cap_cmd = None try: cap_cmd = self.cam_cfg[iterator]['cap_cmd'] except AttributeError: self.log.debug( 'Capture command not found in cam config. Using global' ) try: cap_cmd = self.cfg['cap_cmd'] except AttributeError: self.log.critical( 'Capture command not found. Exit') self.exit_handler(None, None, log_signal=False, exit_code=1) if cap_cmd is not False: cap_cmd = self.replacer(cap_cmd, iterator) self.log.info('Run "%s" capturer in background' % cam['name']) self.cam_capturer[iterator] = self.bg_run( cap_cmd, self.cam_capturer_pid[iterator]) self.cam_capturer_check_flag[iterator] = True else: self.log.info('Capturer "%s" is turned off' % cam['name']) self.cam_capturer_start_flag[iterator] = False # End Run capturer schedule.run_pending() time.sleep(1) self.log.info('Finish')
'V0'].calibration_SerialNumber ds_out.title = 'Oceanographic mooring data deployment of {platform_code} at latitude {geospatial_lat_max:3.1f} longitude {geospatial_lon_max:3.1f} depth {geospatial_vertical_max:3.0f} (m) instrument {instrument} serial {instrument_serial_number}' # add creating and history entry ds_out.setncattr("date_created", datetime.utcnow().strftime(ncTimeFormat)) # update the history attribute try: hist = ds.history + "\n" except AttributeError: hist = "" # keep the history so we know where it came from ds_out.setncattr( 'history', hist + datetime.utcnow().strftime("%Y-%m-%d") + " calculate DOX2 from file " + os.path.basename(netCDFfile)) ds.close() ds_out.close() return out_file if __name__ == "__main__": files = [] for f in sys.argv[1:]: files.extend(glob(f)) for f in files: extract_sbe43(f)
fig1 = figure('Ip adapta', figsize=(6, 3)) ax7 = fig1.add_subplot(121) ax8 = fig1.add_subplot(122) ### Panel A and B: axonal currents recording in an exmaple cell date = 20191114 retina = 'C' cell = 1 cell_name = '%i %s %i' % (date, retina, cell) ### Loading the data # path_to_cell = glob2.glob('data/RGC data/' + str(int(date)) + '*' + '/retina '+ str(retina) +'/cell ' + str(int(cell)))[0] path_to_cell = glob2.glob('/Users/sarah/Documents/Data/Martijn Sierksma/' + str(int(date)) + '*' + '/retina ' + str(retina) + '/cell ' + str(int(cell)))[0] ### -60 mV abf60 = pyabf.ABF(path_to_cell + '/VC threshold adaptation/2019_11_14_0044.abf') fs60 = abf60.dataRate * Hz # sampling rate dt60 = 1. / fs60 t = dt60 * arange(len(abf60.sweepY)) I = [] V = [] for sweepNumber in abf60.sweepList: abf60.setSweep(sweepNumber) I.append(abf60.sweepY)
build_exe['include_msvcr'] = True build_exe['icon'] = 'pathomx/static/icon.ico' # FIXME: The following is a hack to correctly copy all files required for # numpy, scipy and nmrglue on Windows. At present cx_Freeze misses a number of # the .pyd files. The fix is to copy *all* of them regardless if they're used. # This means bigger binaries (.msi) but they work. import os, glob2, numpy, scipy, nmrglue explore_dirs = [ os.path.dirname(numpy.__file__), os.path.dirname(scipy.__file__), os.path.dirname(nmrglue.__file__), ] files = [] for d in explore_dirs: files.extend( glob2.glob( os.path.join(d, '**', '*.pyd') ) ) # Now we have a list of .pyd files; iterate to build a list of tuples into # include files containing the source path and the basename for f in files: build_all['include_files'].append( (f, os.path.basename(f) ) ) shortcut_table = [ ("DesktopShortcut", # Shortcut "DesktopFolder", # Directory_ "Pathomx", # Name "TARGETDIR", # Component_ "[TARGETDIR]Pathomx.exe",# Target None, # Arguments None, # Description None, # Hotkey
def __init__( self, test_train='train', data_id: Union[str, int] = '0', fraction=1, Y_field=None, shuffle=True, split: float = 0.1, ): self.test_train = test_train self.data_id = data_id self.Y_field = Y_field self.split = split if self.test_train is 'train' else 0 self.shuffle = shuffle if self.test_train is 'train' else False self.fraction = fraction if self.test_train is 'train' else 1 self.csv_filename = f"{settings['dir']['data']}/train.csv" self.csv_data = pd.read_csv(self.csv_filename).set_index( 'image_id', drop=True).astype('category') self.csv_data['grapheme'] = self.csv_data['grapheme'].cat.codes.astype( 'category') self.image_filenames = sorted( glob2.glob( f"{settings['dir']['data']}/{test_train}_image_data_{data_id}.parquet" )) self.X: Dict[AnyStr, np.ndarray] = { "train": np.ndarray((0, )), "valid": np.ndarray((0, )) } self.Y: Dict[AnyStr, Union[pd.DataFrame, Dict]] = { "train": pd.DataFrame(), "valid": pd.DataFrame() } self.ID: Dict[AnyStr, np.ndarray] = { "train": np.ndarray((0, )), "valid": np.ndarray((0, )) } for filename in self.image_filenames: raw = {} raw['train'], raw['valid'] = pd.read_parquet(filename), None if self.fraction < 1: raw['train'], discard = train_test_split( raw['train'], train_size=self.fraction, shuffle=self.shuffle, random_state=0) del discard if self.split != 0: raw['train'], raw['valid'] = train_test_split( raw['train'], test_size=self.split, shuffle=self.shuffle, random_state=0) if raw['valid'] is None: raw['valid'] = pd.DataFrame(columns=raw['train'].columns) # Attempt to save memory by doing transform_X() within the loop # X can be transformed before np.concatenate, but multi-output Y must be done after pd.concat() for key, value in raw.items(): X = self.transform_X(value) if len(self.X[key]) == 0: self.X[key] = X else: self.X[key] = np.concatenate( [self.X[key], self.transform_X(value)]) self.Y[key] = pd.concat([self.Y[key], value[['image_id']]]) self.ID[key] = np.concatenate( [self.ID[key], value['image_id'].values]) del X, raw gc.collect() self.Y = { key: self.transform_Y(value) for key, value in self.Y.items() } pass
import glob2 import datetime filename = datetime.datetime.now() outfile = glob2.glob("file" + "*.txt") #def create_file(): """This function creates a file named after the current daytime""" with open((filename.strftime("%Y-%m-%d-%H:%M:%S")+".txt"), "w") as file: for f in outfile: with open(f) as infile: file.write(infile.read()+"\n") #create_file(lst) #here we're calling the function
print('Preparing new target...') target_data = pd.read_csv(DATA_INPUT_TARGET, sep=',', header=0) target_data['ImageId'], target_data['ClassId'] = target_data[ 'ID'].str.rsplit('_', 1).str target_data = pd.pivot_table(target_data[['ImageId', 'ClassId', 'Label']], values='Label', index='ImageId', columns='ClassId', aggfunc=np.sum).reset_index() target_data.to_csv(DATA_OUTPUT_TARGET, sep=';', header=True, index=False) print('Preparing new test data...') stats_m1, stats_m2 = [], [] test_dcm_list = glob2.glob(os.path.join(DATA_INPUT_TEST, '**/*.dcm')) for file in tqdm(test_dcm_list): idx = file.split('/')[-1][:-4] # get index of the image # ignore corrupted images if idx == 'ID_6431af929': continue img_mean, img2_mean = convert_image(filename=file, views=VIEWS, sz=target_sz, is_test=True, add_contrast=False)
Created on Feb 7, 2017 @author: mstirling ''' import preprocess_ETL #@UnresolvedImport import pandas as pd, numpy as np import glob2 import shutil import os in_folder = 'C:\Users\mstirling\Desktop\Shared\RW\CTR Files\RW_ECR_Release_13/ctr_ouput/'.replace( '\\', '/') out_folder = in_folder + 'mtm_compare_k2_underlying/' out_file = 'k2_underlying.csv' try: os.stat(out_folder[:-1]) except: os.mkdir(out_folder[:-1]) #concatenate with open(out_folder + out_file, 'wb') as wfd: for f in glob2.glob(in_folder + 'K2_CSM_*.csv'): print f with open(f, 'rb') as fd: shutil.copyfileobj(fd, wfd, 1024 * 1024 * 10) print 'done.' print 'to ' + out_folder + out_file
from multiprocessing import Pool import multiprocessing from functools import reduce try: from tqdm import tqdm # long waits are not fun XD except: print('TQDM does make much nicer wait bars...') tqdm = lambda x: x from WordsMap import * lac = thulac.thulac(seg_only=True) json_path = './json/**/' file_list = glob(json_path + "*.json") print(str(len(file_list)) + ' json file loaded.') with codecs.open('country.txt', 'r', 'utf-8') as f: countries = [line.strip() for line in f] # alias mode ---------------------------------------------- countries_set = set(countries) # must ensure that alias name in alias.txt are aligned with the country.txt with codecs.open('country_alias.txt', 'r', 'utf-8') as f: alias = [line.strip().split(',') for line in f] alias.extend([[]] * (len(countries) - len(alias))) alias_map = {} for i, l in enumerate(alias): for alia in l:
def main(): files = glob("file[0-9].txt") for afile in files: print(afile)
def check_overlinking(m, files): pkg_name = m.get_value('package/name') errors = [] run_reqs = [req.split(' ')[0] for req in m.meta.get('requirements', {}).get('run', [])] # sysroots and whitelists are similar, but the subtle distinctions are important. sysroots = glob(os.path.join(m.config.build_prefix, '**', 'sysroot')) whitelist = [] if 'target_platform' in m.config.variant and m.config.variant['target_platform'] == 'osx-64': if not len(sysroots): sysroots = ['/usr/lib', '/opt/X11', '/System/Library/Frameworks'] whitelist = ['/opt/X11/', '/usr/lib/libSystem.B.dylib', '/usr/lib/libcrypto.0.9.8.dylib', '/usr/lib/libobjc.A.dylib', '/System/Library/Frameworks/Accelerate.framework/*', '/System/Library/Frameworks/AGL.framework/*', '/System/Library/Frameworks/AppKit.framework/*', '/System/Library/Frameworks/ApplicationServices.framework/*', '/System/Library/Frameworks/AudioToolbox.framework/*', '/System/Library/Frameworks/AudioUnit.framework/*', '/System/Library/Frameworks/AVFoundation.framework/*', '/System/Library/Frameworks/CFNetwork.framework/*', '/System/Library/Frameworks/Carbon.framework/*', '/System/Library/Frameworks/Cocoa.framework/*', '/System/Library/Frameworks/CoreAudio.framework/*', '/System/Library/Frameworks/CoreFoundation.framework/*', '/System/Library/Frameworks/CoreGraphics.framework/*', '/System/Library/Frameworks/CoreMedia.framework/*', '/System/Library/Frameworks/CoreBluetooth.framework/*', '/System/Library/Frameworks/CoreMIDI.framework/*', '/System/Library/Frameworks/CoreMedia.framework/*', '/System/Library/Frameworks/CoreServices.framework/*', '/System/Library/Frameworks/CoreText.framework/*', '/System/Library/Frameworks/CoreVideo.framework/*', '/System/Library/Frameworks/CoreWLAN.framework/*', '/System/Library/Frameworks/DiskArbitration.framework/*', '/System/Library/Frameworks/Foundation.framework/*', '/System/Library/Frameworks/GameController.framework/*', '/System/Library/Frameworks/GLKit.framework/*', '/System/Library/Frameworks/ImageIO.framework/*', '/System/Library/Frameworks/IOBluetooth.framework/*', '/System/Library/Frameworks/IOKit.framework/*', '/System/Library/Frameworks/IOSurface.framework/*', '/System/Library/Frameworks/OpenAL.framework/*', '/System/Library/Frameworks/OpenGL.framework/*', '/System/Library/Frameworks/Quartz.framework/*', '/System/Library/Frameworks/QuartzCore.framework/*', '/System/Library/Frameworks/Security.framework/*', '/System/Library/Frameworks/StoreKit.framework/*', '/System/Library/Frameworks/SystemConfiguration.framework/*', '/System/Library/Frameworks/WebKit.framework/*'] whitelist += m.meta.get('build', {}).get('missing_dso_whitelist', []) for f in files: path = os.path.join(m.config.host_prefix, f) if not is_obj(path): continue warn_prelude = "WARNING ({},{})".format(pkg_name, f) err_prelude = " ERROR ({},{})".format(pkg_name, f) info_prelude = " INFO ({},{})".format(pkg_name, f) msg_prelude = err_prelude if m.config.error_overlinking else warn_prelude needed = inspect_linkages(path, resolve_filenames=True, recurse=False) for needed_dso in needed: if needed_dso.startswith(m.config.host_prefix): in_prefix_dso = os.path.normpath(needed_dso.replace(m.config.host_prefix + '/', '')) n_dso_p = "Needed DSO {}".format(in_prefix_dso) and_also = " (and also in this package)" if in_prefix_dso in files else "" pkgs = list(which_package(in_prefix_dso, m.config.host_prefix)) in_pkgs_in_run_reqs = [pkg for pkg in pkgs if pkg.quad[0] in run_reqs] in_whitelist = any([glob2.fnmatch.fnmatch(in_prefix_dso, w) for w in whitelist]) if in_whitelist: print_msg(errors, '{}: {} found in the whitelist'. format(info_prelude, n_dso_p)) elif len(in_pkgs_in_run_reqs) == 1 and m.config.verbose: print_msg(errors, '{}: {} found in {}{}'.format(info_prelude, n_dso_p, in_pkgs_in_run_reqs[0], and_also)) elif len(in_pkgs_in_run_reqs) == 0 and len(pkgs) > 0: print_msg(errors, '{}: {} found in {}{}'.format(msg_prelude, n_dso_p, [p.quad[0] for p in pkgs], and_also)) print_msg(errors, '{}: .. but {} not in reqs/run, i.e. it is overlinked' ' (likely) or a missing dependency (less likely)'. format(msg_prelude, [p.quad[0] for p in pkgs])) elif len(in_pkgs_in_run_reqs) > 1: print_msg(errors, '{}: {} found in multiple packages in run/reqs: {}{}' .format(warn_prelude, in_prefix_dso, [p.quad[0] for p in in_pkgs_in_run_reqs], and_also)) else: if in_prefix_dso not in files: print_msg(errors, '{}: {} not found in any packages'.format(msg_prelude, in_prefix_dso)) elif m.config.verbose: print_msg(errors, '{}: {} found in this package'.format(info_prelude, in_prefix_dso)) elif needed_dso.startswith(m.config.build_prefix): print_msg(errors, "ERROR: {} found in build prefix; should never happen".format( needed_dso)) else: # A system or ignored dependency. We should be able to find it in one of the CDT o # compiler packages on linux or at in a sysroot folder on other OSes. These usually # start with '$RPATH/' which indicates pyldd did not find them, so remove that now. if needed_dso.startswith('$RPATH/'): needed_dso = needed_dso.replace('$RPATH/', '') in_whitelist = any([glob2.fnmatch.fnmatch(needed_dso, w) for w in whitelist]) if in_whitelist: n_dso_p = "Needed DSO {}".format(needed_dso) print_msg(errors, '{}: {} found in the whitelist'. format(info_prelude, n_dso_p)) elif m.config.verbose and len(sysroots): # Check id we have a CDT package. dso_fname = os.path.basename(needed_dso) sysroot_files = [] for sysroot in sysroots: sysroot_files.extend(glob(os.path.join(sysroot, '**', dso_fname))) if len(sysroot_files): # Removing config.build_prefix is only *really* for Linux, though we could # use CONDA_BUILD_SYSROOT for macOS. We should figure out what to do about # /opt/X11 too. # Find the longest suffix match. rev_needed_dso = needed_dso[::-1] match_lens = [len(os.path.commonprefix([s[::-1], rev_needed_dso])) for s in sysroot_files] idx = max(range(len(match_lens)), key=match_lens.__getitem__) in_prefix_dso = os.path.normpath(sysroot_files[idx].replace( m.config.build_prefix + '/', '')) n_dso_p = "Needed DSO {}".format(in_prefix_dso) pkgs = list(which_package(in_prefix_dso, m.config.build_prefix)) if len(pkgs): print_msg(errors, '{}: {} found in CDT/compiler package {}'. format(info_prelude, n_dso_p, pkgs[0])) else: print_msg(errors, '{}: {} not found in any CDT/compiler package,' ' nor the whitelist?!'. format(msg_prelude, n_dso_p)) else: print_msg(errors, "{}: {} not found in sysroot, is this binary repackaging?" " .. do you need to use install_name_tool/patchelf?". format(msg_prelude, needed_dso)) else: print_msg(errors, "{}: did not find - or even know where to look for: {}". format(msg_prelude, needed_dso)) if len(errors): sys.exit(1)
def cleaner(self): self.log.debug('Cleaner started') clean_flag = False store_file_list = None if int(self.cfg['cleaner_store_max_gb']) != 0: store_file_list = glob2.glob( os.path.join(self.cfg['cap_dir'], '**')) self.log.debug('Found files: %s' % store_file_list) store_file_total_size_bytes = 0 for store_file in store_file_list: store_file_total_size_bytes += os.path.getsize(store_file) store_file_total_size_gigabytes = 1.0 * store_file_total_size_bytes / 1024 / 1024 / 1024 self.log.debug('Store files size, Gb: %f' % store_file_total_size_gigabytes) if store_file_total_size_gigabytes > float( self.cfg['cleaner_store_max_gb']): self.log.info( 'Current store size / Configured max store size, Gb: %.3f/%.3f' % (store_file_total_size_gigabytes, self.cfg['cleaner_store_max_gb'])) clean_flag = True if int(self.cfg['cleaner_store_keep_free_gb']) != 0: store_stat = os.statvfs(self.cfg['cap_dir']) store_free_gb = 1.0 * store_stat.f_bavail * store_stat.f_frsize / 1024 / 1024 / 1024 self.log.debug('Store free space, Gb: %f' % store_free_gb) if store_free_gb < float(self.cfg['cleaner_store_keep_free_gb']): self.log.info( 'Current store free space / Configured keep store free space, Gb: %.3f/%.3f' % (store_free_gb, self.cfg['cleaner_store_keep_free_gb'])) clean_flag = True if clean_flag: self.log.info('Clean is active') if store_file_list is None: store_file_list = glob2.glob( os.path.join(self.cfg['cap_dir'], '**')) store_file_list_sorted = SortedDict() for store_file in store_file_list: store_file_list_sorted.update( {os.path.getmtime(store_file): store_file}) self.log.debug( 'Sorted files list (with last modification date): %s' % store_file_list_sorted) self.log.debug( 'Sorted files list (by last modification date): %s' % store_file_list_sorted.values()) removes = 0 for file_name in store_file_list_sorted.values(): if os.path.isfile(file_name): file_size = os.path.getsize(file_name) self.log.info('Remove file: ' + file_name) os.remove(file_name) if file_size > int( self.cfg['cleaner_force_remove_file_less_bytes']): removes += 1 else: self.log.warn( 'Removed "%s" file with the "%s" bytes size' % (file_name, file_size)) if removes == int(self.cfg['cleaner_max_removes_per_run']): self.log.debug('Max removes reached: ' + self.cfg['cleaner_max_removes_per_run']) break self.log.debug('Cleaner finished')
def __init__(self, **kwargs): """! In this initialization pyto rch dataloader function there is some checking for all the available arguments. \warning Input dataset dir should have the following structure: ./dataset_dir ./train ./uid mixture_wav clean_wavs ./test ./val """ self.kwargs = kwargs self.dataset_dirpath = self.get_arg_and_check_validness( 'input_dataset_p', known_type=str, extra_lambda_checks=[lambda x: os.path.lexists(x)]) self.dataset_samples_folders = glob2.glob( os.path.join(self.dataset_dirpath, '*')) self.n_items = len(self.dataset_samples_folders) self.get_top = self.get_arg_and_check_validness('get_top') if isinstance(self.get_top, int): self.n_items = min(self.n_items, self.get_top) self.dataset_samples_folders = \ self.dataset_samples_folders[:self.n_items] self.n_jobs = self.get_arg_and_check_validness( 'n_jobs', known_type=int, extra_lambda_checks=[lambda x: x <= psutil.cpu_count()]) self.batch_size = self.get_arg_and_check_validness( 'batch_size', known_type=int, extra_lambda_checks=[lambda x: x <= self.n_items]) # The following name is considered: # ['bpd' -> Binary Phase Difference Mask # 'ds' -> Binary Dominating Source Mask # 'rpd' -> Continuous Raw Phase Difference Mask] # 'mixture_wav' -> The wav of the mixture on mic 1 (1d numpy) # 'bpd_sources_wavs' -> Wavs for K sources reconstructed by # applying bpd mask and istft (2d (K rows) numpy) # recorded on mic 1 # 'clean_sources_wavs'-> Wavs for K clean sources # (2d (K rows) numpy) recorded on mic 1 self.return_items = self.get_arg_and_check_validness( 'return_items', known_type=list, choices=[ 'mixture_wav', 'clean_sources_wavs', 'mic1_wav_downsampled', 'clean_sources_wavs_downsampled', 'mixture_wav_norm', 'clean_sources_wavs_norm' ]) self.n_batches = int(self.n_items / self.batch_size) self.n_sources = self.infer_n_sources()
import glob2 import cv2 filelist=glob2.glob("*.jpg") for file in filelist: print("filename = %s" % file) img=cv2.imread(file,1) #parameter 2: 1=colour, 0=grayscale, -1=colour with transparent capability img_resized=cv2.resize(img,(100,100)) cv2.imshow("Hey", img_resized) cv2.waitKey(0) cv2.destroyAllWindows() cv2.imwrite("resized_"+file, img_resized) #cv2.imshow("Galaxy",img) #Title, image object #cv2.imshow("Galaxy resized", resized_image) #cv2.imshow("Galaxy resized ratio", resized_image_ratio) #cv2.imwrite("Galaxy_resized_ratio.jpg",resized_image_ratio) #cv2.waitKey(0) #Functionality to close the window - 0=closes when user presses any button # 2000 = 2000 milliseconds #cv2.destroyAllWindows()
import glob2 import json all_files = glob2.glob('data/**/*.json') flight_reports = [] altitude_reports = [] speed_reports = [] messages = 0 for file in all_files: with open(file, 'r') as f: data = json.loads(f.readline()) aircrafts = data.get('aircraft') for aircraft in aircrafts: # Flights flight = aircraft.get('hex') flight_reports.append(flight) # Altitudes altitude = aircraft.get('altitude', 0) if type(altitude) is int: altitude_reports.append(altitude) # Speed speed = aircraft.get('speed', 0) speed_reports.append(speed) messages += aircraft.get('messages', 0) print('Number of aircraft seen: %s' % len(set(flight_reports))) average_altitute = int(sum(set(altitude_reports)) / len(set(altitude_reports))) print('Average altitude seen: %s' % average_altitute) print('Highest speed seen: %s' % max(speed_reports))
def convert_many(self, mask): for fl in glob2.glob(mask): self.convert_one(fl)
def get_all_ref(folder_path): all_ref_folder = glob(os.path.join(folder_path, '*')) all_hard_truth_num = 0 all_gra_truth_num = 0 all_hard_segments_num = 0 all_gra_segments_num = 0 all_normal_segments_num = 0 all_write_segments_num = 0 for i in all_ref_folder: if cmp(i.split(os.sep)[-1], '2005') == 0: continue all_xml_in_this_folder = glob(os.path.join(i, '*.xml')) for xml in all_xml_in_this_folder: with open(xml) as f: lines = f.readlines() frame_num = int(lines[1].split('"')[-2]) hard_truth, gra_truth = get_labels_TRECViD(xml) all_hard_truth_num += len(hard_truth) all_gra_truth_num += len(gra_truth) normal_segments = [] hard_segments = [] gra_segments = [] end = 15 hard_truth_index = 0 gra_truth_index = 0 flag_hard = False flag_gra = False while end < frame_num: if hard_truth_index < len(hard_truth) and if_overlap_hard(end-15, end, hard_truth[hard_truth_index][0], hard_truth[hard_truth_index][1]): hard_segments.append([end-15, end]) flag_hard = True elif gra_truth_index < len(gra_truth) and if_overlap_segment(end-15, end, gra_truth[gra_truth_index][0], gra_truth[gra_truth_index][1]): gra_segments.append([end-15, end]) flag_gra = True else: normal_segments.append([end-15,end]) end += 8 if hard_truth_index < len(hard_truth) and flag_hard == True and end - 15 >= hard_truth[hard_truth_index][1]: flag_hard = False hard_truth_index += 1 if gra_truth_index < len(gra_truth) and flag_gra == True and end - 15 - gra_truth[gra_truth_index][1] > -5: flag_gra = False gra_truth_index += 1 write_hard = [str(line_hard[0])+'\t'+str(line_hard[1])+'\t2\n' for line_hard in hard_segments] write_gra = [str(line_gra[0])+'\t'+str(line_gra[1])+'\t1\n' for line_gra in gra_segments] write_normal = [str(line_normal[0])+'\t'+str(line_normal[1])+'\t0\n' for line_normal in normal_segments if random.random() < 0.05] all_write_segments_num += len(write_normal) write = [] write.extend(write_hard) write.extend(write_gra) write.extend(write_normal) with open('/home/C3D/ref2_test/'+ xml.split('.')[0].split(os.sep)[-1] + '.txt', 'w') as f: f.writelines(write) all_hard_segments_num += len(hard_segments) all_gra_segments_num += len(gra_segments) all_normal_segments_num += len(normal_segments) print 'a' print'a'
def create_records(audio_path, output_path, dataset): """ Pre-processes the raw audio and generates TFRecords. This function computes the mfcc features, encodes string transcripts into integers, and generates sequence examples for each utterance. Multiple sequence records are then written into TFRecord files. Parameters ---------- audio_path: Path to dataset. output_path: Where to write .tfrecords. dataset: Either 'librispeech' or 'commonvoice'. Determines which dataset format to parse. """ assert os.path.exists( audio_path), f'Invalid audio path: {audio_path}. Path doesn\'t exist.' assert dataset.lower() in ['librispeech', 'commonvoice'], f'Invalid dataset parameter: {dataset}. ' \ f'Must be one of "librispeech", "commonvoice"' dataset = dataset.lower() size_json = defaultdict(int) partitions = itertools.chain.from_iterable([ glob2.glob(os.path.join(audio_path, pattern)) for pattern in ['dev*', 'train*', 'test*'] ]) for partition in sorted(partitions): if dataset == 'librispeech': if os.path.isfile(partition): continue print('Processing ' + partition) feats, transcripts, utt_len = process_librispeech_data(partition) write_suffix = partition.split(os.path.sep)[-1] elif dataset == 'commonvoice': if os.path.isdir(partition) or any( e in partition for e in ['invalidated', 'other']): continue print('Processing ' + partition) feats, transcripts, utt_len = process_common_voice_data(partition) write_suffix, _ = os.path.splitext(os.path.basename(partition)) else: raise NotImplementedError sorted_utts = sorted(utt_len, key=utt_len.get) # bin into groups of 100 frames. max_t = int(utt_len[sorted_utts[-1]] / 100) min_t = int(utt_len[sorted_utts[0]] / 100) # Create destination directory write_dir = os.path.join(output_path, write_suffix) if os.path.exists(write_dir): shutil.rmtree(write_dir) os.makedirs(write_dir) if 'train' in os.path.basename(partition): # Create multiple TFRecords based on utterance length for training writer = {} count = {} print('Processing training files...') for i in range(min_t, max_t + 1): filename = os.path.join(write_dir, 'train' + '_' + str(i) + '.tfrecords') writer[i] = tf.io.TFRecordWriter(filename) count[i] = 0 for utt in tqdm(sorted_utts, desc='Writing TFRecords'): example = make_example(utt_len[utt], feats[utt].tolist(), transcripts[utt]) index = int(utt_len[utt] / 100) writer[index].write(example) count[index] += 1 for i in range(min_t, max_t + 1): writer[i].close() print(count) # Remove bins which have fewer than 20 utterances for i in range(min_t, max_t + 1): if count[i] < 20: os.remove( os.path.join(write_dir, 'train' + '_' + str(i) + '.tfrecords')) count[i] = 0 # Save dataset size size_json['train_size'] = sum(count.keys()) else: # Create single TFRecord for dev and test partition filename = os.path.join(write_dir, os.path.basename(write_dir) + '.tfrecords') print('Creating', filename) record_writer = tf.io.TFRecordWriter(filename) for utt in tqdm(sorted_utts, desc='Writing TFRecords'): example = make_example(utt_len[utt], feats[utt].tolist(), transcripts[utt]) record_writer.write(example) record_writer.close() partition_size = len(sorted_utts) partition_name = os.path.basename(partition) if 'dev' in partition_name: size_json['validation_size'] = partition_size elif 'test' in partition_name: size_json['test_size'] = partition_size else: raise ValueError(f'Invalid partition {partition_name}') json_path = os.path.join(output_path, 'size.json') loaded_json = defaultdict(int) if os.path.exists(json_path): with open(os.path.join(json_path), 'r') as f: loaded_json = {k: int(v) for k, v in json.loads(f.read()).items()} for key in ['train_size', 'validation_size', 'test_size']: if size_json[key]: loaded_json[key] = size_json[key] # Save size.json to dataset output directory with open(os.path.join(output_path, 'size.json'), 'w') as f: f.truncate() json.dump(loaded_json, f) print(f'Processed partitions: {dict(size_json)}')
def main(command, argument, argument2, paths_to_mutate, backup, runner, tests_dir, test_time_multiplier, test_time_base, swallow_output, use_coverage, dict_synonyms, cache_only, version, suspicious_policy, untested_policy, pre_mutation, post_mutation, use_patch_file, paths_to_exclude, simple_output): """return exit code, after performing an mutation test run. :return: the exit code from executing the mutation tests :rtype: int """ if version: print("mutmut version {}".format(__version__)) return 0 if use_coverage and use_patch_file: raise click.BadArgumentUsage( "You can't combine --use-coverage and --use-patch") valid_commands = ['run', 'results', 'apply', 'show', 'junitxml', 'html'] if command not in valid_commands: raise click.BadArgumentUsage( '{} is not a valid command, must be one of {}'.format( command, ', '.join(valid_commands))) if command == 'results' and argument: raise click.BadArgumentUsage( 'The {} command takes no arguments'.format(command)) dict_synonyms = [x.strip() for x in dict_synonyms.split(',')] if command in ('show', 'diff'): if not argument: print_result_cache() return 0 if argument == 'all': print_result_cache(show_diffs=True, dict_synonyms=dict_synonyms, print_only_filename=argument2) return 0 if os.path.isfile(argument): print_result_cache(show_diffs=True, only_this_file=argument) return 0 print(get_unified_diff(argument, dict_synonyms)) return 0 if use_coverage and not exists('.coverage'): raise FileNotFoundError( 'No .coverage file found. You must generate a coverage file to use this feature.' ) if command == 'results': print_result_cache() return 0 if command == 'junitxml': print_result_cache_junitxml(dict_synonyms, suspicious_policy, untested_policy) return 0 if command == 'html': create_html_report(dict_synonyms) return 0 if command == 'apply': do_apply(argument, dict_synonyms, backup) return 0 if paths_to_mutate is None: paths_to_mutate = guess_paths_to_mutate() if not isinstance(paths_to_mutate, (list, tuple)): paths_to_mutate = [x.strip() for x in paths_to_mutate.split(',')] if not paths_to_mutate: raise click.BadOptionUsage( '--paths-to-mutate', 'You must specify a list of paths to mutate. Either as a command line argument, or by setting paths_to_mutate under the section [mutmut] in setup.cfg' ) tests_dirs = [] for p in tests_dir.split(':'): tests_dirs.extend(glob(p, recursive=True)) for p in paths_to_mutate: for pt in tests_dir.split(':'): tests_dirs.extend(glob(p + '/**/' + pt, recursive=True)) del tests_dir current_hash_of_tests = hash_of_tests(tests_dirs) os.environ[ 'PYTHONDONTWRITEBYTECODE'] = '1' # stop python from creating .pyc files using_testmon = '--testmon' in runner output_legend = { "killed": "🎉", "timeout": "⏰", "suspicious": "🤔", "survived": "🙁", "skipped": "🔇", } if simple_output: output_legend = { key: key.upper() for (key, value) in output_legend.items() } print(""" - Mutation testing starting - These are the steps: 1. A full test suite run will be made to make sure we can run the tests successfully and we know how long it takes (to detect infinite loops for example) 2. Mutants will be generated and checked Results are stored in .mutmut-cache. Print found mutants with `mutmut results`. Legend for output: {killed} Killed mutants. The goal is for everything to end up in this bucket. {timeout} Timeout. Test suite took 10 times as long as the baseline so were killed. {suspicious} Suspicious. Tests took a long time, but not long enough to be fatal. {survived} Survived. This means your tests need to be expanded. {skipped} Skipped. Skipped. """.format(**output_legend)) if runner is DEFAULT_RUNNER: try: import pytest except ImportError: runner = 'python -m unittest' baseline_time_elapsed = time_test_suite( swallow_output=not swallow_output, test_command=runner, using_testmon=using_testmon, current_hash_of_tests=current_hash_of_tests, ) if hasattr(mutmut_config, 'init'): mutmut_config.init() if using_testmon: copy('.testmondata', '.testmondata-initial') # if we're running in a mode with externally whitelisted lines covered_lines_by_filename = None coverage_data = None if use_coverage or use_patch_file: covered_lines_by_filename = {} if use_coverage: coverage_data = read_coverage_data() check_coverage_data_filepaths(coverage_data) else: assert use_patch_file covered_lines_by_filename = read_patch_data(use_patch_file) if command != 'run': raise click.BadArgumentUsage("Invalid command {}".format(command)) mutations_by_file = {} paths_to_exclude = paths_to_exclude or '' if paths_to_exclude: paths_to_exclude = [ path.strip() for path in paths_to_exclude.split(',') ] config = Config( total=0, # we'll fill this in later! swallow_output=not swallow_output, test_command=runner, covered_lines_by_filename=covered_lines_by_filename, coverage_data=coverage_data, baseline_time_elapsed=baseline_time_elapsed, backup=backup, dict_synonyms=dict_synonyms, using_testmon=using_testmon, cache_only=cache_only, tests_dirs=tests_dirs, hash_of_tests=current_hash_of_tests, test_time_multiplier=test_time_multiplier, test_time_base=test_time_base, pre_mutation=pre_mutation, post_mutation=post_mutation, paths_to_mutate=paths_to_mutate, ) parse_run_argument(argument, config, dict_synonyms, mutations_by_file, paths_to_exclude, paths_to_mutate, tests_dirs) config.total = sum( len(mutations) for mutations in mutations_by_file.values()) print() print('2. Checking mutants') progress = Progress(total=config.total, output_legend=output_legend) try: run_mutation_tests(config=config, progress=progress, mutations_by_file=mutations_by_file) except Exception as e: traceback.print_exc() return compute_exit_code(progress, e) else: return compute_exit_code(progress) finally: print() # make sure we end the output with a newline # Close all active multiprocessing queues to avoid hanging up the main process close_active_queues()
step=step): if flatten: xw.append(window.flatten()) else: xw.append(window) yw.append(label) xw = np.array(xw) yw = np.array(yw).reshape(-1, 1) return xw, yw if __name__ == '__main__': # 1. read folders "220617", "260617", ... # loop = tqdm(glob('./data/raw/*')) # loop = tqdm(glob('F:/datasets/shl/shl-3users/*')) loop = tqdm(list(glob('F:/datasets/shl_user1_hips/user1/*'))) for d in loop: y_file = "Label.txt" # modes = ["Hand_Motion.txt", "Torso_Motion.txt"] modes = ["Hips_Motion.txt"] for x_file in modes: # Load X, Y loop.set_description("Loading") X_path, Y_path = join(d, x_file), join(d, y_file) # X = pd.DataFrame(np.loadtxt(X_path), columns=["Time(ms)"] + feature_columns) X = pd.read_csv(X_path, sep=' ', header=None, names=["Time(ms)"] + feature_columns) X.set_index(pd.to_datetime(X['Time(ms)'], unit='ms'), inplace=True)
def find_files(): """find all markdown files""" md_files = glob2.glob('*.md') return md_files
#their positions from __future__ import print_function import numpy as np import yt import caesar import ipdb from glob2 import glob #directory = '/ufrc/narayanan/desika.narayanan/gizmo_runs/simba/m25n512/output/Groups//' directory = '/orange/narayanan/desika.narayanan/gizmo_runs/simba/m25n512_filtered/output/Groups/' NGALAXIES_MAX = 10000 TESTING = False outfile = '/ufrc/narayanan/desika.narayanan/pd_runs/simba/m25n512/simba_m25n512.galaxies_pos_for_pd.npz' DRIVER_FORMAT = False MEMBERS = np.sort(glob('%s/caesar*.hdf5' % (directory))) pos = {} ngalaxies = {} for nh in range(NGALAXIES_MAX): pos['galaxy' + str(nh)] = {} if TESTING: MEMBERS = [MEMBERS[-1]] for file in MEMBERS: #this gets snaps in 3 digit format if DRIVER_FORMAT: #if the caesar files were written with driver snapnum = file[file.find('caesar_') + 8:file.find('_z')] else: #else they were written with the CLI snapnum = file[file.find('caesar_') + 16:file.find('.hdf5')]
def scanFile(pyfile, repodir): repodir = os.path.dirname(repodir) sys.path.extend(glob2.glob(os.path.join(repodir, "**", "**"))) fid = open(pyfile, 'rb') lines = fid.readlines() lines = map(lambda x: x.strip(), lines) quotinBig = False quotinSmall = False val = [] for i, line in enumerate(lines): if (line.strip().startswith('"""') or line.strip().endswith('"""') ) and line.find('IMPORTERATOR') == -1: if not quotinBig: quotinBig = True else: quotinBig = False if line.strip().startswith("'''") or line.strip().endswith("'''"): if not quotinSmall: quotinSmall = True else: quotinSmall = False if line.find('import') != -1 and not (quotinBig or quotinSmall): value = parseImport(line, i) for j in range(len(value)): if value[j][0]: print value[j] val.extend(list([value[j]])) # return for statement in val: # print 'importing {}'.format(statement[0]) # imported = __import__(statement[0]) imported = importlib.import_module(statement[0], package=None) linenums = getLineNums(statement, val) if len(linenums) > 1: print 'import {} on line number {} seems to be imported multiple times'.format( statement[0], statement[2] + 1) copylines = getLines(lines, linenums) findthiswith = [] if statement[1][0] and not statement[1][1]: lookdeeper = True findthis = statement[1][0] elif not statement[1][0] and statement[1][1]: lookdeeper = False findthis = statement[1][1] else: lookdeeper = True findthis = statement[0] # checker = 'ccrs' if lookdeeper: findthiswith = [ o[0] for o in inspect.getmembers(imported) if ((callable(o[1])) and not o[0].startswith('_')) ] breakmain = False FOUND = False for line in copylines: #need to add more logic here TODO if import after ''' or ignore everything after a # thats not in quotes. if line.startswith("'''") or line.startswith('#'): continue if findthiswith: searchstring = findthis + '.' # print searchstring if searchstring in line: # if findthis == checker: # print searchstring FOUND = True breakmain = True break for thing in findthiswith: searchstring = findthis + '.' + thing + '(' searchstring2 = findthis + '.' + thing + '.' # if findthis == checker: # print searchstring,searchstring2 # print searchstring if searchstring in line or searchstring2 in line: FOUND = True breakmain = True break if breakmain: break else: searchstring = findthis + '(' searchstring2 = findthis + '.' searchstring3 = findthis + '.' + thing + '.' # if findthis == checker: # print searchstring if searchstring in line or searchstring2 in line or searchstring3 in line: FOUND = True break if not FOUND: print 'import {}.{} on line {} does not seem to be used.'.format( statement[0], statement[1][1], statement[2] + 1)
import glob2 from datetime import datetime filenames = glob2.glob( "/home/digger/Classes/Python_Mega/Exercises/Files/S03L72/*.txt") with open( "/home/digger/Classes/Python_Mega/Exercises/Files/S03L72/" + datetime.now().strftime("%Y-%m%d-%H-%M-%S-%f") + ".txt", "w") as file: for filename in filenames: with open(filename, "r") as f: file.write(f.read() + "\n")
def CreateMongoDataBase(): scrapedCounties = {} ## https://inkplant.com/code/state-latitudes-longitudes statesOfUSA = { 'AK': { "StateName": 'Alaska', "Latitude": 61.370716, "Longitude": -152.404419 }, 'AL': { "StateName": 'Alabama', "Latitude": 32.806671, "Longitude": -86.791130 }, 'AR': { "StateName": 'Arkansas', "Latitude": 34.969704, "Longitude": 92.373123 }, 'AZ': { "StateName": 'Arizona', "Latitude": 33.729759, "Longitude": -111.431221 }, 'CA': { "StateName": 'California', "Latitude": 36.116203, "Longitude": -119.681564 }, 'CO': { "StateName": 'Colorado', "Latitude": 39.059811, "Longitude": -105.311104 }, 'CT': { "StateName": 'Connecticut', "Latitude": 41.597782, "Longitude": -72.755371 }, 'DC': { "StateName": 'District of Columbia', "Latitude": 38.897438, "Longitude": -77.026817 }, 'DE': { "StateName": 'Delaware', "Latitude": 39.318523, "Longitude": -75.507141 }, 'FL': { "StateName": 'Florida', "Latitude": 27.766279, "Longitude": -81.686783 }, 'GA': { "StateName": 'Georgia', "Latitude": 33.040619, "Longitude": -83.643074 }, 'HI': { "StateName": 'Hawaii', "Latitude": 21.094318, "Longitude": -157.498337 }, 'IA': { "StateName": 'Iowa', "Latitude": 42.011539, "Longitude": -93.210526 }, 'ID': { "StateName": 'Idaho', "Latitude": 44.240459, "Longitude": -114.478828 }, 'IL': { "StateName": 'Illinois', "Latitude": 40.349457, "Longitude": -88.986137 }, 'IN': { "StateName": 'Indiana', "Latitude": 39.849426, "Longitude": -86.258278 }, 'KS': { "StateName": 'Kansas', "Latitude": 38.526600, "Longitude": -96.726486 }, 'KY': { "StateName": 'Kentucky', "Latitude": 37.668140, "Longitude": -84.670067 }, 'LA': { "StateName": 'Louisiana', "Latitude": 31.169546, "Longitude": -91.867805 }, 'MA': { "StateName": 'Massachusetts', "Latitude": 42.230171, "Longitude": -71.530106 }, 'MD': { "StateName": 'Maryland', "Latitude": 39.063946, "Longitude": -76.802101 }, 'ME': { "StateName": 'Maine', "Latitude": 44.693947, "Longitude": -69.381927 }, 'MI': { "StateName": 'Michigan', "Latitude": 43.326618, "Longitude": -84.536095 }, 'MN': { "StateName": 'Minnesota', "Latitude": 45.694454, "Longitude": -93.900192 }, 'MO': { "StateName": 'Missouri', "Latitude": 38.456085, "Longitude": -92.288368 }, 'MS': { "StateName": 'Mississippi', "Latitude": 32.741646, "Longitude": -89.678696 }, 'MT': { "StateName": 'Montana', "Latitude": 46.921925, "Longitude": -110.454353 }, 'NC': { "StateName": 'North Carolina', "Latitude": 35.630066, "Longitude": -79.806419 }, 'ND': { "StateName": 'North Dakota', "Latitude": 47.528912, "Longitude": -99.784012 }, 'NE': { "StateName": 'Nebraska', "Latitude": 41.125370, "Longitude": -98.268082 }, 'NH': { "StateName": 'New Hampshire', "Latitude": 43.452492, "Longitude": -71.563896 }, 'NJ': { "StateName": 'New Jersey', "Latitude": 40.298904, "Longitude": -74.521011 }, 'NM': { "StateName": 'New Mexico', "Latitude": 34.840515, "Longitude": -106.248482 }, 'NV': { "StateName": 'Nevada', "Latitude": 38.313515, "Longitude": -117.055374 }, 'NY': { "StateName": 'New York', "Latitude": 42.165726, "Longitude": -74.948051 }, 'OH': { "StateName": 'Ohio', "Latitude": 40.388783, "Longitude": -82.764915 }, 'OK': { "StateName": 'Oklahoma', "Latitude": 35.565342, "Longitude": -96.928917 }, 'OR': { "StateName": 'Oregon', "Latitude": 44.572021, "Longitude": -122.070938 }, 'PA': { "StateName": 'Pennsylvania', "Latitude": 40.590752, "Longitude": -77.209755 }, 'RI': { "StateName": 'Rhode Island', "Latitude": 41.680893, "Longitude": -71.511780 }, 'SC': { "StateName": 'South Carolina', "Latitude": 33.856892, "Longitude": -80.945007 }, 'SD': { "StateName": 'South Dakota', "Latitude": 44.299782, "Longitude": -99.438828 }, 'TN': { "StateName": 'Tennessee', "Latitude": 33.040619, "Longitude": -86.692345 }, 'TX': { "StateName": 'Texas', "Latitude": 31.054487, "Longitude": -97.563461 }, 'UT': { "StateName": 'Utah', "Latitude": 40.150032, "Longitude": -111.862434 }, 'VA': { "StateName": 'Virginia', "Latitude": 37.769337, "Longitude": -78.169968 }, 'VT': { "StateName": 'Vermont', "Latitude": 44.045876, "Longitude": -72.710686 }, 'WA': { "StateName": 'Washington', "Latitude": 47.400902, "Longitude": -121.490494 }, 'WI': { "StateName": 'Wisconsin', "Latitude": 44.268543, "Longitude": -89.616508 }, 'WV': { "StateName": 'West Virginia', "Latitude": 38.491226, "Longitude": -80.954453 }, 'WY': { "StateName": 'Wyoming', "Latitude": 42.755966, "Longitude": -107.302490 } } filePath = "" print('CreateMongoDataBase') # Scrap the counties and store in the dictionary for lookup website_url = requests.get( "https://en.wikipedia.org/w/index.php?title=User:Michael_J/County_table&oldid=368803236" ).text wikiBaseURL = "https://en.wikipedia.org" Soup = BeautifulSoup(website_url, 'lxml') CountyGeoLocTbl = Soup.find('table', {'class': 'wikitable sortable'}) # Perform lookup from the scraped data for the geo # locations and other facts # skip the first row of the scraped data as it is the header. for tr in CountyGeoLocTbl.find_all('tr')[1:]: tds = tr.find_all('td') # Removed the superscripts e.g '[4]' with '' by usage of Regex ScrapedCounty = re.compile('[a-zA-Z ]{6,}', re.I).findall(tds[3].text.strip()) CountyWikiLink = wikiBaseURL + tds[3].a.get('href') # Check for blanks / '' in scraped county if (len(ScrapedCounty) > 0): ScrapedCounty = ScrapedCounty[0].rstrip().lstrip() else: ScrapedCounty = tds[3].text.strip() # if ( tds[1].text in statesOfUSA and isinstance(statesOfUSA[tds[1].text],dict)): data = { "StateShortName": tds[1].text, "CountyName": ScrapedCounty, "Population": tds[5].text, "TotalArea": tds[11].text, "Latitude": tds[12].text.strip(), "Longitude": tds[13].text.strip(), "CountyWikiLink": CountyWikiLink, "StateLatitude": statesOfUSA[tds[1].text]["Latitude"], "StateLongitude": statesOfUSA[tds[1].text]["Longitude"], } # # Add to dictiory for quick retrival scrapedCounties[data["StateShortName"] + data["CountyName"]] = data # if (tds[1].text =='NJ'): # print(ScrapedCounty) xlsFilesOnly = glob(filePath + "*.xls") # parse all xls file(s) only StateList = [] for xlsfile in xlsFilesOnly: yearReported = xlsfile[:4] wb = xlrd.open_workbook(xlsfile, ragged_rows=True) if (wb != None): sh = wb.sheet_by_name('OutcomesFactorsSubRankings') CountyList = [] if (sh != None): for row_index in range(sh.nrows): HealthyCounty = {} if (row_index > 2): StateShortName = sh.cell(row_index, 13).value CountyName = sh.cell(row_index, 2).value StateName = sh.cell(row_index, 1).value QualityofLife = { "Z-Score": sh.cell(row_index, 3).value, "Rank": sh.cell(row_index, 4).value, } HealthBehaviours = { "Z-Score": sh.cell(row_index, 5).value, "Rank": sh.cell(row_index, 6).value, } ClinicalCare = { "Z-Score": sh.cell(row_index, 7).value, "Rank": sh.cell(row_index, 8).value, } EconomicFactors = { "Z-Score": sh.cell(row_index, 9).value, "Rank": sh.cell(row_index, 10).value, } PhysicalEnvironment = { "Z-Score": sh.cell(row_index, 11).value, "Rank": sh.cell(row_index, 12).value, } # Make sure we have the county exists in dictionary if (StateShortName + CountyName in scrapedCounties): c = scrapedCounties[StateShortName + CountyName] # Perform lookup from the scraped data for the geo # Populate the county dictionary HealthyCounty = { "CountyName": CountyName, "County FIPS": sh.cell(row_index, 0).value, "QualityofLife": QualityofLife, "HealthBehaviours": HealthBehaviours, "ClinicalCare": ClinicalCare, "EconomicFactors": EconomicFactors, "PhysicalEnvironment": PhysicalEnvironment, "Population": c["Population"], "TotalArea": c["TotalArea"], "Latitude": c["Latitude"], "Longitude": c["Longitude"], "CountyWikiLink": c["CountyWikiLink"], "StateLatitude": c["StateLatitude"], "StateLongitude": c["StateLongitude"] } County = {"County": HealthyCounty} # Add only when we have found the facts on the county CountyList.append(County) if (row_index == sh.nrows - 1): State = { "StateName": StateName, "StateShortName": StateShortName, "Year": yearReported, "FIPS": sh.cell(row_index, 0).value, "Counties": CountyList } StateList.append(State) #Creating a json file to display the jsonified data jsonfile = "StateCountyData" + '.json' with open(jsonfile, 'w') as f: json.dump(StateList, f, indent=4) jsonfile = "ScrappedStateCountyData" + '.json' with open(jsonfile, 'w') as f: json.dump(scrapedCounties, f, indent=4) #### Connection for local host conn = 'mongodb://*****:*****@ds255332.mlab.com:55332/healthi_db' # client = pymongo.MongoClient(conn,ConnectTimeoutMS=30000) # db = client.get_default_database() #create list of categories Category = [ "QualityofLife", "EconomicFactors", "ClinicalCare", "HealthBehaviours", "PhysicalEnvironment" ] #Create a dictionary with the list Category. dropdown = {"cat": Category} #drop/create collection Category db.Category.drop() category = db.Category #insert into Category collection category.insert(dropdown) #drop/create collection State. db.State.drop() state = db.State #insert into State collection result = state.insert_many(StateList) print("Multiple States {0}".format(result.inserted_ids))
import glob2 from datetime import datetime all_files = glob2.glob("*.txt") all_files.sort() print(all_files) def merge_files(files, archive_name): with open(archive_name + ".txt", "w") as myfile: for file in files: with open(file, "r") as content_file: #myfile.write(content_file.read() + "\n") content = content_file.readline() myfile.write("{}\n".format(content)) merge_files(all_files, datetime.now().strftime("%Y-%d-%m-%H-%M-%S-%s")) # datetime.now().strftime("%Y-%d-%m-%H-%M-%S-%s") # 2018-18-11-20-50-24-1542581424 #1. Consider using the glob2 third party library to generate a list of filenames # to iterate through. #2. Use a with statement to create a new text file and then iterate through the # file list inside that with statement and open and read existing file contents in # each iteration and write them to new text file.