def DownloadUpdate(self, file): self.log('Downloading: %s' % file) dirfile = os.path.join(self.UpdateTempDir,file) dirname, filename = os.path.split(dirfile) if not os.path.isdir(dirname): try: os.makedirs(dirname) except: self.log('Error creating directory: ' +dirname) url = self.SVNPathAddress+urllib.quote(file) try: if re.findall(".xbt",url): self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0]) urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8")) else: urllib.urlretrieve( url.decode("utf-8"), dirfile.decode("utf-8") ) self.DownloadedFiles.append(urllib.unquote(url)) return 1 except: try: time.sleep(2) if re.findall(".xbt",url): self.totalsize = int(re.findall("File length: ([0-9]*)",urllib2.urlopen(url+"?view=log").read())[0]) urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8")) else: urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8") ) urllib.urlretrieve(url.decode("utf-8"), dirfile.decode("utf-8")) self.DownloadedFiles.append(urllib.unquote(url)) return 1 except: self.log("Download failed: %s" % url) self.DownloadFailedFiles.append(urllib.unquote(url)) return 0
def export_data(input_path, output_path, output_format): import qiime2.util import qiime2.sdk import distutils result = qiime2.sdk.Result.load(input_path) if output_format is None: if isinstance(result, qiime2.sdk.Artifact): output_format = result.format.__name__ else: output_format = 'Visualization' result.export_data(output_path) else: if isinstance(result, qiime2.sdk.Visualization): error = '--output-format cannot be used with visualizations' click.secho(error, fg='red', bold=True, err=True) click.get_current_context().exit(1) else: source = result.view(qiime2.sdk.parse_format(output_format)) if os.path.isfile(str(source)): if os.path.isfile(output_path): os.remove(output_path) else: # create directory (recursively) if it doesn't exist yet os.makedirs(os.path.dirname(output_path), exist_ok=True) qiime2.util.duplicate(str(source), output_path) else: distutils.dir_util.copy_tree(str(source), output_path) output_type = 'file' if os.path.isfile(output_path) else 'directory' success = 'Exported %s as %s to %s %s' % (input_path, output_format, output_type, output_path) click.secho(success, fg='green')
def on_login_succeed(self): '''callback called on login succeed''' self._save_login_dimensions() self.config.save(self.config_path) plugin_manager = get_pluginmanager() plugin_manager.scan_directory('plugins') plugin_dir = self.config_dir.join('plugins') if not os.path.exists(plugin_dir): os.makedirs(plugin_dir) plugin_manager.scan_directory(plugin_dir) self.draw_main_screen() self.session.config.get_or_set('l_active_plugins', []) for plugin in self.session.config.l_active_plugins: plugin_manager.plugin_start(plugin, self.session) # hack: where do we start this? how do we generalize for other # extensions? if plugin == "music": extension.get_and_instantiate('listening to', self.window.content) self.set_default_extensions_from_config() self._sync_emesene1() self.logged_in = True if self.network_checker is not None: self.network_checker.set_new_session(self.session)
def buildInstaller(): """ Build the installer""" logger = build_util.getLogger('build_Installer') makensis = os.path.normpath(Settings.config.get("env","NSIS")) nsi_file = os.path.normpath(os.path.join(module_path, "installer", "kylo.nsi")) version = build_util.VersionFormat() nsis_defs = { "APP_DIR": Settings.prefs.kylo_build_dir, "BUILD_ID": Settings.config.get("App", "BuildID"), "WIN_VERSION": version.win, "FULL_VERSION": version.full, "FILENAME_VERSION": version.full.replace(".","_"), "VERSION_MAJOR": version.ints[0], "VERSION_MINOR": version.ints[1], "DISPLAY_VERSION": version.display, "OUT_FILE_DIR": Settings.prefs.dist_dir, "LOCALE": "en-US", } # Create dist_dir if it doesn't exist if not os.path.exists(Settings.prefs.dist_dir): logger.info("Creating dist directory: %s", Settings.prefs.dist_dir) os.makedirs(Settings.prefs.dist_dir) args= [makensis] + ["/D%s=%s" % (k,v) for (k,v) in nsis_defs.iteritems()] + [nsi_file] logger.debug("Running: **" + " ".join(args)) build_util.runSubprocess(args, logger)
def processJarSection(self, jarinfo, jardir): '''Internal method called by makeJar to actually process a section of a jar.mn file. ''' # chromebasepath is used for chrome registration manifests # {0} is getting replaced with chrome/ for chrome.manifest, and with # an empty string for jarfile.manifest chromebasepath = '{0}' + os.path.basename(jarinfo.name) if self.outputFormat == 'jar': chromebasepath = 'jar:' + chromebasepath + '.jar!' chromebasepath += '/' jarfile = os.path.join(jardir, jarinfo.base, jarinfo.name) jf = None if self.outputFormat == 'jar': # jar jarfilepath = jarfile + '.jar' try: os.makedirs(os.path.dirname(jarfilepath)) except OSError, error: if error.errno != errno.EEXIST: raise jf = ZipFile(jarfilepath, 'a', lock=True) outHelper = self.OutputHelper_jar(jf)
def __enter__(self): # Create directory if it doesn't exist dir_name = os.path.dirname(FILE_NAME) try: os.makedirs(dir_name) except OSError as e: if e.errno != errno.EEXIST: raise # Open file with a lock and create it if it doesn't exist flag = os.O_RDWR if self._write is True else os.O_RDONLY mode = "rb+" if self._write is True else "rb" self._file = os.fdopen(os.open(FILE_NAME, os.O_CREAT | flag), mode) # Acquire a file lock op = fcntl.LOCK_EX if self._write is True else fcntl.LOCK_SH fcntl.flock(self._file.fileno(), op) try: self.data = pickle.load(self._file) except EOFError: self.data = { 'jobs': [], 'schedules': [], 'workers': deque(), 'next_job_id': 1, 'next_schedule_id': 1, 'next_worker_id': 1 } if self._write is False: self._file.close() return self.data
def generate_report(task, db): date = dt.now() date = date.strftime('%d-%m-%Y_%H-%M') directory = os.path.join(task['directory'], 'reports') if not os.path.exists(directory): os.makedirs(directory) filename = "%s/%s.txt" %(directory, date) with open(filename, 'w') as f: f.write("\n======PROJECT PARAMS======\n") for k, v in task.items(): if k not in ["action", "status","msg", "date", "creation_date", "_id"]: if k == "creation_date": v = task[k].strftime('%d-%m-%Y %H-%M-%S') f.write(str(k)+": "+str(v)+"\n") try: f.write(str(k)+": "+str(v)+"\n") except Exception: pass f.write(db.show_stats()) f.write("\n\n======HISTORY OF THE PROJECT======\n") date_list = [n.strftime('%d-%m-%Y %H-%M-%S') for n in task["date"]] status_list = list(zip(task["action"], task["status"],task["msg"], date_list)) for msg in status_list: f.write("\n-"+str(msg)) print "Your report is ready!\nCheck here: %s" %(filename) return True
def buildApp(): # ---------------------- # Under Windows, we're just going to grab the xulrunner-stub.exe and run Resource Hacker # to rename it to "Kylo", set the icon, and set some version info, etc. reshack_temp_dir = os.path.join(Settings.prefs.build_dir, "stub") if not os.path.exists(reshack_temp_dir): os.makedirs(reshack_temp_dir) build_stub.main(Settings.config.get('App','Version'), Settings.config.get('App','BuildID'), temp_dir = reshack_temp_dir, stub_dir = os.path.join(Settings.prefs.build_dir, "application")) # ---------------------- # We also need mozilla DLLS for lib in ["mozcrt19.dll", "mozutils.dll", "gkmedias.dll"]: f = os.path.join(Settings.prefs.xul_dir, lib) if (os.path.isfile(f)): shutil.copy2(f, os.path.join(Settings.prefs.build_dir, "application")) # ---------------------- # Now let's grab the XULRunner directory and drop it in to our working application directory xulrunner_dir = os.path.join(Settings.prefs.build_dir, "application", "xulrunner") if not os.path.exists(xulrunner_dir): os.makedirs(xulrunner_dir) build_util.syncDirs(Settings.prefs.xul_dir, xulrunner_dir, exclude=["xulrunner-stub.exe"])
def _exec(self, args, run_in_check_mode=False, check_rc=True): if not self.module.check_mode or (self.module.check_mode and run_in_check_mode): cmd = self.executable + args if self.glbl: cmd.append('--global') if self.production: cmd.append('--production') if self.ignore_scripts: cmd.append('--ignore-scripts') if self.name: cmd.append(self.name_version) if self.registry: cmd.append('--registry') cmd.append(self.registry) #If path is specified, cd into that path and run the command. cwd = None if self.path: self.path = os.path.abspath(os.path.expanduser(self.path)) if not os.path.exists(self.path): os.makedirs(self.path) if not os.path.isdir(self.path): self.module.fail_json(msg="path %s is not a directory" % self.path) cwd = self.path rc, out, err = self.module.run_command(cmd, check_rc=check_rc, cwd=cwd) return out return ''
def consume_in_thread(self): """Runs the ZmqProxy service.""" ipc_dir = CONF.rpc_zmq_ipc_dir consume_in = "tcp://%s:%s" % \ (CONF.rpc_zmq_bind_address, CONF.rpc_zmq_port) consumption_proxy = InternalContext(None) try: os.makedirs(ipc_dir) except os.error: if not os.path.isdir(ipc_dir): with excutils.save_and_reraise_exception(): LOG.error(_LE("Required IPC directory does not exist at" " %s") % (ipc_dir, )) try: self.register(consumption_proxy, consume_in, zmq.PULL) except zmq.ZMQError: if os.access(ipc_dir, os.X_OK): with excutils.save_and_reraise_exception(): LOG.error(_LE("Permission denied to IPC directory at" " %s") % (ipc_dir, )) with excutils.save_and_reraise_exception(): LOG.error(_LE("Could not create ZeroMQ receiver daemon. " "Socket may already be in use.")) super(ZmqProxy, self).consume_in_thread()
def create_directory(dir) : if os.path.exists(dir) : logger.info('Directory exists: ' + dir, __name__) else : os.makedirs(dir) #os.fchmod(dir,770) logger.info('Directory created: ' + dir, __name__)
def cat_counter_references(counter_references=None, target_dir=curdir, path_to_bowtie2='bowtie2', logger=None, **kwargs): if counter_references is None: return try: makedirs(target_dir, mode=0755) except OSError: pass debug('Validating counter-references and building counter-reference index') valid_references = validate_references(references=counter_references, target_dir=target_dir, path_to_bowtie2=path_to_bowtie2, logger=logger, environ_key= 'SOT_DEFAULT_COUNTER_REFERENCES') crefs_fa = open(join(target_dir, 'counter_references.fa'), 'w') for ref in valid_references: Popen([path_to_bowtie2 + '-inspect', ref], stdout=crefs_fa).wait() crefs_index = join(target_dir, counter_references) args = [path_to_bowtie2 + '-build', crefs_fa, crefs_index] P = Popen(args, stderr=PIPE) stderr = P.communicate()[1] if stderr.startswith('Error'): critical(stderr) critical('No counter-references will be used.') return crefs_index
def populate(): files = {} folders = {} new_folders = {} path = os.getcwd() #read all the files and get all the extentions all_dirs = os.listdir(path) for folders_file in all_dirs: if isfile(join(path , folders_file)): #it is a file #get the extension of the files temp = folders_file.split('.') extn = temp[-1]; files[folders_file] = extn print(extn) new_folders[extn] = True else: #it is a directory #print folders_file folders[folders_file] = True #create all the necessary folders for f in new_folders: if f not in folders: #creata a new folder os.makedirs(path+'/'+f) #move the files for f in files: # do somethning os.rename(path+'/'+f , path+'/' + files[f]+'/'+f)
def validate_references(references=None, path_to_bowtie2='bowtie2', logger=None, environ_key='SOT_DEFAULT_REFERENCES', target_dir=curdir, **kwargs): makedirs(target_dir, mode=0755) debug('Validating references') new_references = [] if references is None: if environ_key in environ: references = environ[environ_key].split() else: critical('no reference genomes specified') return [] for r in references: bowtie2_index = find_bowtie2_index(r, path_to_bowtie2=path_to_bowtie2) if bowtie2_index is None: if exists(r): debug('Attempting to build bowtie2 index from %s' % r) new_index = fasta_to_bowtie2(r, target_dir=target_dir, path_to_bowtie2=path_to_bowtie2) if new_index is not None: new_references.append(new_index) continue else: critical('Failed to build bowtie2 index.') critical('bowtie2 could not find the index for %s', r) critical('we will not align to %s', r) else: new_references.append(bowtie2_index) return new_references
def main(): path_to_chromedriver = "/Users/xuanzhang/Developer/python/scrapping/chromedriver" browser0 = webdriver.Chrome(executable_path = path_to_chromedriver) website = "http://www.sephora.com/" # categories to work for now categories = ['face-wash-facial-cleanser',\ 'facial-toner-skin-toner',\ 'night-cream',\ 'eye-cream-dark-circles',\ 'moisturizer-skincare',\ 'bb-cream-cc-cream',\ 'face-serum'] category = categories[-1] # if no such directory for the html, create on category_path = '../scraping/sephora/' + category if not os.path.isdir(category_path): os.makedirs(category_path) # in case aborted, check what pages have been scraped files = [f for f in os.listdir(category_path) if f[-4:]=='html' ] product_list = [re.findall(r'(P\w+)',file)[0] for file in files] page_number = 0 # scrap and hit 'arrow-right' to scrap while EC.element_to_be_clickable((By.CLASS_NAME, "arrow arrow-right")): page_number += 1 # pageSize = -1 s.t. each page has 300 items url = website + category + '?pageSize=-1¤tPage=' + str(page_number) browser0.get(url) # get item list by tag sku-item, and go through the list to scrap items = browser0.find_elements_by_class_name('sku-item') for item in items: product_id = item.get_attribute('data-product_id') product_path = category_path + '/' + product_id # if product page is already scrapped, skip to next product if (product_id in product_list) and\ (product_id+'_reviews' in product_list): continue time.sleep(.5) # open a new browser window for reviews browser1 = webdriver.Chrome(executable_path = path_to_chromedriver) browser1.get(item.get_attribute('href')) # scrap maincontent or reviews, whichever is not scraped if product_id not in product_list: flag1 = scrap_maincontent(browser1, product_path) else: flag1 = True if product_id+'_reviews' not in product_list: flag2 = scrap_reviews(browser1, product_path, product_id) else: flag2 = True print product_id, flag1, flag2
def convert_mp3_to_wav(filename, sample_frequency): ext = filename[-4:] if(ext != '.mp3'): return files = filename.split('/') orig_filename = files[-1][0:-4] new_path = '' if(filename[0] == '/'): new_path = '/' for i in xrange(len(files) - 1): new_path += files[i] + '/' tmp_path = new_path + 'tmp' new_path += 'wave' if not os.path.exists(new_path): os.makedirs(new_path) if not os.path.exists(tmp_path): os.makedirs(tmp_path) filename_tmp = tmp_path + '/' + orig_filename + '.mp3' new_name = new_path + '/' + orig_filename + '.wav' sample_freq_str = "{0:.1f}".format(float(sample_frequency) / 1000.0) cmd = 'lame -a -m m {0} {1}'.format(quote(filename), quote(filename_tmp)) os.system(cmd) cmd = 'lame --decode {0} {1} --resample {2}'.format( quote(filename_tmp), quote(new_name), sample_freq_str ) os.system(cmd) return new_name
def clean_destination(self): create = self.cleaned_data.get("create") dest = self.cleaned_data.get("destination") dest = os.path.abspath(dest.strip().replace("..", "")) if not self.jail: jail = self.cleaned_data.get("jail") if jail: self.jail = Jails.objects.get(jail_host=jail) if not self.jail: raise forms.ValidationError( _("This shouldn't happen, but the jail could not be found") ) full = "%s/%s%s" % (self.jc.jc_path, self.jail.jail_host, dest) if len(full) > 88: raise forms.ValidationError( _("The full path cannot exceed 88 characters") ) if not os.path.exists(full): os.makedirs(full) return dest
def save_template(data=None): """\ Returns an out file name and template description for saving a template """ dlg = templates_ui.TemplateInfoDialog(None, -1, "") if data is not None: dlg.template_name.SetValue( misc.wxstr(os.path.basename(os.path.splitext(data.filename)[0]))) dlg.author.SetValue(misc.wxstr(data.author)) dlg.description.SetValue(misc.wxstr(data.description)) dlg.instructions.SetValue(misc.wxstr(data.instructions)) ret = None retdata = Template() if dlg.ShowModal() == wx.ID_OK: ret = dlg.template_name.GetValue().strip() retdata.author = dlg.author.GetValue() retdata.description = dlg.description.GetValue() retdata.instructions = dlg.instructions.GetValue() if not ret: wx.MessageBox(_("Can't save a template with an empty name"), _("Error"), wx.OK|wx.ICON_ERROR) dlg.Destroy() name = ret if ret: d = os.path.join(config._get_appdatapath(), '.wxglade', 'templates') if not os.path.exists(d): try: os.makedirs(d) except (OSError, IOError), e: print _("ERROR creating %s: %s") % (d, e) return None, retdata ret = os.path.join(d, ret + '.wgt')
def doCopy(self, res): # now copy tree to workdir fromdir = os.path.join(self.builder.basedir, self.srcdir) todir = os.path.join(self.builder.basedir, self.workdir) if runtime.platformType != "posix": d = threads.deferToThread(shutil.copytree, fromdir, todir) def cb(_): return 0 # rc=0 def eb(f): self.sendStatus( {'header': 'exception from copytree\n' + f.getTraceback()}) return -1 # rc=-1 d.addCallbacks(cb, eb) return d if not os.path.exists(os.path.dirname(todir)): os.makedirs(os.path.dirname(todir)) if os.path.exists(todir): # I don't think this happens, but just in case.. log.msg( "cp target '%s' already exists -- cp will not do what you think!" % todir) command = ['cp', '-R', '-P', '-p', fromdir, todir] c = runprocess.RunProcess(self.builder, command, self.builder.basedir, sendRC=False, timeout=self.timeout, maxTime=self.maxTime, logEnviron=self.logEnviron, usePTY=False) self.command = c d = c.start() d.addCallback(self._abandonOnFailure) return d
def configure(self): # configure for 64-bit build self.updatecfg('configopts', "-b 64") if self.getcfg('ignorethrottling'): # ignore CPU throttling check # this is not recommended, it will disturb the measurements done by ATLAS # used for the EasyBuild demo, to avoid requiring root privileges self.updatecfg('configopts', '-Si cputhrchk 0') # if LAPACK is found, instruct ATLAS to provide a full LAPACK library # ATLAS only provides a few LAPACK routines natively if self.getcfg('full_lapack'): lapack = get_software_root('LAPACK') if lapack: self.updatecfg('configopts', ' --with-netlib-lapack=%s/lib/liblapack.a' % lapack) else: self.log.error("netlib's LAPACK library not available,"\ " required to build ATLAS with a full LAPACK library.") # enable building of shared libraries (requires -fPIC) if self.getcfg('sharedlibs') or self.toolkit().opts['pic']: self.log.debug("Enabling -fPIC because we're building shared ATLAS libs, or just because.") self.updatecfg('configopts', '-Fa alg -fPIC') # ATLAS only wants to be configured/built in a separate dir' try: objdir = "obj" os.makedirs(objdir) os.chdir(objdir) except OSError, err: self.log.error("Failed to create obj directory to build in: %s" % err)
def _get_cache_dir(): confcache = os.path.join(config_dir, u'caches') if isportable: return confcache if 'CALIBRE_CACHE_DIRECTORY' in os.environ: return os.path.abspath(os.environ['CALIBRE_CACHE_DIRECTORY']) if iswindows: w = plugins['winutil'][0] try: candidate = os.path.join(w.special_folder_path(w.CSIDL_LOCAL_APPDATA), u'%s-cache'%__appname__) except ValueError: return confcache elif isosx: candidate = os.path.join(os.path.expanduser(u'~/Library/Caches'), __appname__) else: candidate = os.environ.get('XDG_CACHE_HOME', u'~/.cache') candidate = os.path.join(os.path.expanduser(candidate), __appname__) if isinstance(candidate, bytes): try: candidate = candidate.decode(filesystem_encoding) except ValueError: candidate = confcache if not os.path.exists(candidate): try: os.makedirs(candidate) except: candidate = confcache return candidate
def populate_dir(path, files): if not os.path.exists(path): os.makedirs(path) for (name, content) in files.iteritems(): with open(os.path.join(path, name), "w") as fp: fp.write(content) fp.close()
def _create_output_dir(self, path, type_): try: if not os.path.exists(path): os.makedirs(path) except EnvironmentError, err: raise DataError("Creating %s file directory '%s' failed: %s" % (type_.lower(), path, err.strerror))
def extract(self, file, dir): if not dir.endswith(':') and not os.path.exists(dir): os.mkdir(dir) zf = zipfile.ZipFile(file) # create directory structure to house files self._createstructure(file, dir) num_files = len(zf.namelist()) percent = self.percent divisions = 100 / percent perc = int(num_files / divisions) # extract files to directory structure for i, name in enumerate(zf.namelist()): if self.verbose == True: print "Extracting %s" % name elif perc > 0 and (i % perc) == 0 and i > 0: complete = int (i / perc) * percent if not name.endswith('/'): try: (path,namet) = os.path.split(os.path.join(dir, name)) os.makedirs( path) except: pass outfile = open(os.path.join(path, namet), 'wb') outfile.write(zf.read(name)) outfile.flush() outfile.close()
def save_data(self, filename, data): ''' Saves the data structure using pickle. If the addon data path does not exist it will be automatically created. This save function has the same restrictions as the pickle module. Args: filename (string): name of the file you want to save data to. This file will be saved in your addon's profile directory. data (data object/string): you want to save. Returns: True on success False on failure ''' profile_path = self.get_profile() try: os.makedirs(profile_path) except: pass save_path = os.path.join(profile_path, filename) try: pickle.dump(data, open(save_path, 'wb')) return True except pickle.PickleError: return False
def make_dir(dir): ''' Creates a directory if it does not exist dir: absolute path to directory to create ''' if not os.path.isdir(dir): os.makedirs(dir)
def generatePara(filename): _config = ConfigParser.ConfigParser() _config.optionxform=str _config.read(filename) _filenames = [] sec = 'modelABrc' Vals, sizes, = {}, [] for k in KEYS: Vals[k] = _config.get(sec,k) _path = 'c'+Vals['chiN']+'m'+Vals['miuN']+'b'+Vals['Beta'] if not os.path.exists(_path): os.makedirs(_path) with open(Vals['listName'],'r') as f: for l in f.readlines(): sizes = json.loads(l) count = len(glob.glob(os.path.join(_path,'Para*.ini'))) for k in sizes: for name in sz: _config.set(sec,name,k[name]) _fn = 'Para_' + str(count) + '.ini' count += 1 with open(_path + '//' + _fn,'wb') as f: _config.write(f) _filenames.append(_fn) for ff in FILELIST: if not os.path.isfile(os.path.join(_path,ff)): #shutil.copy('.//'+ff,_path+'//'+ff) os.symlink(os.path.join(os.path.realpath('.'),ff), \ os.path.join(_path,ff)) #if not os.path.isfile(_path+'//scfRigid'): # shutil.copy('.//scfRigid',_path+'//scfRigid') return _path, _filenames
def initDb(self): path=os.path.dirname(self.jdoc_db_path) if not os.path.exists(path): os.makedirs(path) create_jdoc_sql="create table jdoc(class_name varchar(100), package varchar(200), homeid int )" create_links_sql="create table homelinks( id INTEGER PRIMARY KEY, url varchar(200))" self.dbm.update([create_jdoc_sql, create_links_sql])
def download_entire_dataset(dataset_name, num_data, labels, method, cache_dir): """Downloads the train/valid/test parts of a dataset and stores them in the cache directory. Args: dataset_name: Dataset to be downloaded. num_data: Amount of data samples to be parsed from the dataset. labels: Target labels for regression. method: Method name. See `parse_arguments`. cache_dir: Directory to store the dataset to. """ print('Downloading {}...'.format(dataset_name)) preprocessor = preprocess_method_dict[method]() # Select the first `num_data` samples from the dataset. target_index = numpy.arange(num_data) if num_data >= 0 else None dataset_parts = D.molnet.get_molnet_dataset(dataset_name, preprocessor, labels=labels, target_index=target_index) dataset_parts = dataset_parts['dataset'] # Cache the downloaded dataset. if not os.path.exists(cache_dir): os.makedirs(cache_dir) for i, part in enumerate(['train', 'valid', 'test']): filename = dataset_part_filename(part, num_data) path = os.path.join(cache_dir, filename) NumpyTupleDataset.save(path, dataset_parts[i]) return dataset_parts
def create_dir(path): """Create directory if it does not exist.""" try: makedirs(path) except OSError as e: if e.errno != EEXIST: raise
# -*- coding: utf-8 -*- from util.dataset import load_data_v5 from keras.models import Sequential from keras.layers import Dense from keras.layers import SimpleRNN as RNN from keras.utils import np_utils import numpy as np import os nb_epoch = 100 batch_size = 2048 # fix random seed for reproducibility np.random.seed(7) # define the raw dataset if not os.path.exists('models'): os.makedirs('models') def rnn(in_array, target_array): # create and fit the model model = Sequential() model.add(RNN(512, input_shape=(in_array.shape[1], 1))) model.add(Dense(target_array.shape[1], activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model if __name__ == '__main__': in_array, target_array, test_in_array, test_target_array = load_data_v5( in_array, target_array, test_in_array, test_target_array)
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--bert_model", default=None, type=str, required=True, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.") parser.add_argument("--output_dir", default=None, type=str, required=True, help="The output directory where the model checkpoints will be written.") ## Other parameters parser.add_argument("--train_file", default=None, type=str, help="SQuAD json for training. E.g., train-v1.1.json") parser.add_argument("--predict_file", default=None, type=str, help="SQuAD json for predictions. E.g., dev-v1.1.json or test-v1.1.json") parser.add_argument("--max_seq_length", default=384, type=int, help="The maximum total input sequence length after WordPiece tokenization. Sequences " "longer than this will be truncated, and sequences shorter than this will be padded.") parser.add_argument("--doc_stride", default=128, type=int, help="When splitting up a long document into chunks, how much stride to take between chunks.") parser.add_argument("--max_query_length", default=64, type=int, help="The maximum number of tokens for the question. Questions longer than this will " "be truncated to this length.") parser.add_argument("--do_train", default=False, action='store_true', help="Whether to run training.") parser.add_argument("--do_predict", default=False, action='store_true', help="Whether to run eval on the dev set.") parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.") parser.add_argument("--predict_batch_size", default=8, type=int, help="Total batch size for predictions.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument("--warmup_proportion", default=0.1, type=float, help="Proportion of training to perform linear learning rate warmup for. E.g., 0.1 = 10% " "of training.") parser.add_argument("--n_best_size", default=20, type=int, help="The total number of n-best predictions to generate in the nbest_predictions.json " "output file.") parser.add_argument("--max_answer_length", default=30, type=int, help="The maximum length of an answer that can be generated. This is needed because the start " "and end predictions are not conditioned on one another.") parser.add_argument("--verbose_logging", default=False, action='store_true', help="If true, all of the warnings related to data processing will be printed. " "A number of warnings are expected for a normal SQuAD evaluation.") parser.add_argument("--no_cuda", default=False, action='store_true', help="Whether not to use CUDA when available") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument('--gradient_accumulation_steps', type=int, default=1, help="Number of updates steps to accumulate before performing a backward/update pass.") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument('--optimize_on_cpu', default=False, action='store_true', help="Whether to perform optimization and keep the optimizer averages on CPU") parser.add_argument('--fp16', default=False, action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument('--loss_scale', type=float, default=128, help='Loss scaling, positive power of 2 values can improve fp16 convergence.') parser.add_argument('--do_lower_case', action="store_true", default=True, help="Lowercase the input") args = parser.parse_args() if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') if args.fp16: logger.info("16-bits training currently not supported in distributed training") args.fp16 = False # (see https://github.com/pytorch/pytorch/pull/13496) logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits trainiing: {}".format( device, n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_predict: raise ValueError("At least one of `do_train` or `do_predict` must be True.") if args.do_train: if not args.train_file: raise ValueError( "If `do_train` is True, then `train_file` must be specified.") if args.do_predict: if not args.predict_file: raise ValueError( "If `do_predict` is True, then `predict_file` must be specified.") if os.path.exists(args.output_dir) and os.listdir(args.output_dir): raise ValueError("Output directory () already exists and is not empty.") os.makedirs(args.output_dir, exist_ok=True) tokenizer = BertTokenizer.from_pretrained(args.bert_model) train_examples = None num_train_steps = None if args.do_train: train_examples = read_squad_examples( input_file=args.train_file, is_training=True) num_train_steps = int( len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs) # Prepare model model = BertForQuestionAnswering.from_pretrained(args.bert_model) if args.fp16: model.half() model.to(device) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer if args.fp16: param_optimizer = [(n, param.clone().detach().to('cpu').float().requires_grad_()) \ for n, param in model.named_parameters()] elif args.optimize_on_cpu: param_optimizer = [(n, param.clone().detach().to('cpu').requires_grad_()) \ for n, param in model.named_parameters()] else: param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0} ] optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=num_train_steps) global_step = 0 if args.do_train: train_features = convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=True) logger.info("***** Running training *****") logger.info(" Num orig examples = %d", len(train_examples)) logger.info(" Num split examples = %d", len(train_features)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.long) all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.long) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions) if args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) model.train() for _ in trange(int(args.num_train_epochs), desc="Epoch"): ep = 0 for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")): if n_gpu == 1: batch = tuple(t.to(device) for t in batch) # multi-gpu does scattering it-self input_ids, input_mask, segment_ids, start_positions, end_positions = batch loss = model(input_ids, segment_ids, input_mask, start_positions, end_positions) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.fp16 and args.loss_scale != 1.0: # rescale loss for fp16 training # see https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html loss = loss * args.loss_scale if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps loss.backward() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16 or args.optimize_on_cpu: if args.fp16 and args.loss_scale != 1.0: # scale down gradients for fp16 training for param in model.parameters(): if param.grad is not None: param.grad.data = param.grad.data / args.loss_scale is_nan = set_optimizer_params_grad(param_optimizer, model.named_parameters(), test_nan=True) if is_nan: logger.info("FP16 TRAINING: Nan in gradients, reducing loss scaling") args.loss_scale = args.loss_scale / 2 model.zero_grad() continue optimizer.step() copy_optimizer_params_to_model(model.named_parameters(), param_optimizer) else: optimizer.step() model.zero_grad() global_step += 1 torch.save(model.state_dict(), (args.output_dir+ "train_epoch" + ep + ".json")) ep = ep +1 if args.do_predict: eval_examples = read_squad_examples( input_file=args.predict_file, is_training=False) eval_features = convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=args.max_seq_length, doc_stride=args.doc_stride, max_query_length=args.max_query_length, is_training=False) logger.info("***** Running predictions *****") logger.info(" Num orig examples = %d", len(eval_examples)) logger.info(" Num split examples = %d", len(eval_features)) logger.info(" Batch size = %d", args.predict_batch_size) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) if args.local_rank == -1: eval_sampler = SequentialSampler(eval_data) else: eval_sampler = DistributedSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.predict_batch_size) model.eval() all_results = [] logger.info("Start evaluating") for input_ids, input_mask, segment_ids, example_indices in tqdm(eval_dataloader, desc="Evaluating"): if len(all_results) % 1000 == 0: logger.info("Processing example: %d" % (len(all_results))) input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) with torch.no_grad(): batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask) for i, example_index in enumerate(example_indices): start_logits = batch_start_logits[i].detach().cpu().tolist() end_logits = batch_end_logits[i].detach().cpu().tolist() eval_feature = eval_features[example_index.item()] unique_id = int(eval_feature.unique_id) all_results.append(RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) output_prediction_file = os.path.join(args.output_dir, "predictions.json") output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") write_predictions(eval_examples, eval_features, all_results, args.n_best_size, args.max_answer_length, args.do_lower_case, output_prediction_file, output_nbest_file, args.verbose_logging)
# Validate the chromedriver def chromedriver_path(): if ((len(sys.argv) > 1) and (len(sys.argv[1]) > 0)): return sys.argv[1] return str(Path('ChromeDriver/chromedriver')) # Define the location chromedriver = chromedriver_path() os.environ["webdriver.chrome.driver"] = chromedriver # ====================================================== # save your files save_path = Path("Data/scraped/individual") os.makedirs(save_path, exist_ok=True) save_dir = Path("Data/scraped/indexes") curr_csvs = [ os.path.join(save_dir, x) for x in os.listdir(save_dir) if x.endswith("csv") ] for file in curr_csvs: # Create your df df = pd.read_csv(file) # How many organizations have websites? n_websites = sum(df["org_url"].notna())
def check_data_dir_exists(): os.makedirs(Config().QUERY_DATA_DIR, exist_ok=True) os.makedirs(Config().USER_DATA_DIR, exist_ok=True)
def _make_build_dir(build_dir): os.makedirs(build_dir) write_delete_marker_file(build_dir)
def _config_i3(): _download("https://gist.githubusercontent.com/taesiri/ea3f5c6154ebd31e0c2092606a236a22/raw/6f2314ea55704c6c940aa1a4f8eb89a9a5453577/config", "i3.conf") os.makedirs('/root/.config/i3/', exist_ok=True) shutil.move("i3.conf", "/root/.config/i3/config")
templates = json.load(f) f.close() with open('data/types.json', 'r') as f: wp_types = json.load(f) f.close() with open('data/dotcom_faces.json', 'r') as f: faces = json.load(f) f.close() template = templates[0] homedir = os.environ['HOME'] try: working_dir = '{}/Downloads/renders/{}px'.format(homedir, template['name']) os.makedirs(working_dir, exist_ok=True) except FileExistsError: pass i = 0 ad = OfferSpotlight(drawBot, template) COLORS = ['_a', '_b', '_c'] copy = wp_types[2]['headline'] cta = "Learn more" for t in wp_types[2]['data']: c = COLORS[randint(0, 2)] frame_path = template['name'] + c + '@2x' face = faces[randint(0, len(faces) - 1)] ad.render(frame_path, face, copy, cta, t)
test_size = batch_size # flag to determine whether or not you want to load the file from a checkpoint load_checkpoint = 0 np.seterr(all='ignore') # Define save directories if alternate_steps_flag == 0: save_folder = opt.model + '_vAN' + str(opt.priorWeight_nsteps) + '_vAT' + str(opt.netWeights_psteps) + '_' + str(numRestart) + 'start_' + data_use + '_pre' + str(num_pretrain_steps) + '_CA' + str(closest_anchor_flag) + '_M' + str(M) + '_z' + str(z_dim) + '_A' + str(opt.num_anchor) + '_batch' + str(opt.batch_size) + '_rw' + str(opt.recon_weight) + '_pol1' + str(opt.post_l1_weight) + '_poR' + str(opt.post_TO_weight)+ '_poC' + str(opt.post_cInfer_weight) + '_prl1' + str(opt.prior_l1_weight) + '_prR' + str(opt.prior_weight) + '_prC' + str(opt.prior_cInfer_weight) + '_g' + str(opt.gamma) + '_lr' + str(opt.lr) +'/' sample_dir = opt.model + '_vAN' + str(opt.priorWeight_nsteps) + '_vAT' + str(opt.netWeights_psteps) + '_' + str(numRestart) + 'start_' + data_use + '_pre' + str(num_pretrain_steps) + '_CA' + str(closest_anchor_flag) + '_M' + str(M) + '_z' + str(z_dim) + '_A' + str(opt.num_anchor) + '_batch' + str(opt.batch_size) + '_rw' + str(opt.recon_weight) + '_pol1' + str(opt.post_l1_weight) + '_poR' + str(opt.post_TO_weight)+ '_poC' + str(opt.post_cInfer_weight) + '_prl1' + str(opt.prior_l1_weight) + '_prR' + str(opt.prior_weight) + '_prC' + str(opt.prior_cInfer_weight) + '_g' + str(opt.gamma) + '_lr' + str(opt.lr) +'_samples/' else: save_folder = opt.model + '_vAN' + str(opt.priorWeight_nsteps) + '_vAT' + str(opt.netWeights_psteps) + '_' + str(numRestart) + 'start_' + data_use + '_pre' + str(num_pretrain_steps) + '_CA' + str(closest_anchor_flag) + '_M' + str(M) + '_z' + str(z_dim) + '_A' + str(opt.num_anchor) + '_batch' + str(opt.batch_size) + '_rw' + str(opt.recon_weight) + '_pol1' + str(opt.post_l1_weight) + '_poR' + str(opt.post_TO_weight)+ '_poC' + str(opt.post_cInfer_weight) + '_prl1' + str(opt.prior_l1_weight) + '_prR' + str(opt.prior_weight) + '_prC' + str(opt.prior_cInfer_weight) + '_g' + str(opt.gamma) + '_lr' + str(opt.lr) + '_nst' + str(num_net_steps) + 'pst' + str(num_psi_steps) + '/' sample_dir= opt.model + '_vAN' + str(opt.priorWeight_nsteps) + '_vAT' + str(opt.netWeights_psteps) + '_' + str(numRestart) + 'start_' + data_use + '_pre' + str(num_pretrain_steps) + '_CA' + str(closest_anchor_flag) + '_M' + str(M) + '_z' + str(z_dim) + '_A' + str(opt.num_anchor) + '_batch' + str(opt.batch_size) + '_rw' + str(opt.recon_weight) + '_pol1' + str(opt.post_l1_weight) + '_poR' + str(opt.post_TO_weight)+ '_poC' + str(opt.post_cInfer_weight) + '_prl1' + str(opt.prior_l1_weight) + '_prR' + str(opt.prior_weight) + '_prC' + str(opt.prior_cInfer_weight) + '_g' + str(opt.gamma) + '_lr' + str(opt.lr) + '_nst' + str(num_net_steps) + 'pst' + str(num_psi_steps) + '_samples/' if not os.path.exists(save_folder): os.makedirs(save_folder) if not os.path.exists(sample_dir): os.makedirs(sample_dir) # Start log file logging.basicConfig(filename=save_folder + 'output.log',level = logging.DEBUG) # Initialize the networks and datasets if data_use == 'concen_circle': # Load data for concentric circle dataset nTrain = 400 noise_std = 0.01 learn_anchor_flag = 1 mapMat = np.random.uniform(-1,1,(x_dim,z_dim)) sio.savemat(save_folder + 'mapMat_circleHighDim_nonLinear_z' + str(z_dim) + '_x' + str(x_dim) +'.mat',{'mapMat':mapMat}) from fullyConnectedModel import Encoder
def __init__(self): self.poster_folder = os.path.join('userdata', 'posters') if not os.path.exists(self.poster_folder): os.makedirs(self.poster_folder)
def _process_requirements(requirements, cmd, cwd, saltenv, user): ''' Process the requirements argument ''' cleanup_requirements = [] if requirements is not None: if isinstance(requirements, string_types): requirements = [r.strip() for r in requirements.split(',')] elif not isinstance(requirements, list): raise TypeError('requirements must be a string or list') treq = None for requirement in requirements: logger.debug('TREQ IS: %s', str(treq)) if requirement.startswith('salt://'): cached_requirements = _get_cached_requirements( requirement, saltenv) if not cached_requirements: ret = { 'result': False, 'comment': 'pip requirements file \'{0}\' not found'.format( requirement) } return None, ret requirement = cached_requirements if user: # Need to make a temporary copy since the user will, most # likely, not have the right permissions to read the file if not treq: treq = tempfile.mkdtemp() __salt__['file.chown'](treq, user, None) current_directory = None if not current_directory: current_directory = os.path.abspath(os.curdir) logger.info( '_process_requirements from directory,' + '%s -- requirement: %s', cwd, requirement) if cwd is None: r = requirement c = cwd requirement_abspath = os.path.abspath(requirement) cwd = os.path.dirname(requirement_abspath) requirement = os.path.basename(requirement) logger.debug( '\n\tcwd: %s -> %s\n\trequirement: %s -> %s\n', c, cwd, r, requirement) os.chdir(cwd) reqs = _resolve_requirements_chain(requirement) os.chdir(current_directory) logger.info('request files: {0}'.format(str(reqs))) for req_file in reqs: req_filename = os.path.basename(req_file) logger.debug('TREQ N CWD: %s -- %s -- for %s', str(treq), str(cwd), str(req_filename)) source_path = os.path.join(cwd, req_filename) target_path = os.path.join(treq, req_filename) logger.debug('S: %s', source_path) logger.debug('T: %s', target_path) target_base = os.path.dirname(target_path) if not os.path.exists(target_base): os.makedirs(target_base, mode=0o755) __salt__['file.chown'](target_base, user, None) if not os.path.exists(target_path): logger.debug('Copying %s to %s', source_path, target_path) __salt__['file.copy'](source_path, target_path) logger.debug( 'Changing ownership of requirements file \'{0}\' to ' 'user \'{1}\''.format(target_path, user)) __salt__['file.chown'](target_path, user, None) req_args = os.path.join(treq, requirement) if treq else requirement cmd.extend(['--requirement', req_args]) cleanup_requirements.append(treq) logger.debug('CLEANUP_REQUIREMENTS: %s', str(cleanup_requirements)) return cleanup_requirements, None
if np.isnan(prob): prob = -np.inf return prob # Parse command line opts = parse_commandline() if opts.doNoGW170817: baseplotDir = os.path.join(opts.plotDir,'standard_candles','GRB_GW','no') else: baseplotDir = os.path.join(opts.plotDir,'standard_candles','GRB_GW','all') plotDir = os.path.join(baseplotDir,opts.analysis_type) plotDir = os.path.join(plotDir,"%.2f"%opts.errorbudget) if not os.path.isdir(plotDir): os.makedirs(plotDir) color1 = 'cornflowerblue' color2 = 'coral' color3 = 'palegreen' color4 = 'pink' color5 = 'cyan' color_names = [color1, color2, color3, color4, color5] pickle_samples = opts.pickle_samples.split(",") data_struct = {} for pickle_sample in pickle_samples: pickle_sample_split = pickle_sample.split("/") f = open(pickle_sample, 'r') if "GRB_GW" in pickle_sample:
def create_padded_primer_products(orthoExon_path, padded_primer_product_path, unpadded_primer_product_path, alternate_sp_fn, paragon_fn): # create handles for all .fasta files in fasta directory fasta_fn = { name.split('.full.fasta')[0]: orthoExon_path + name for name in os.listdir(orthoExon_path) if ((".full.fasta" in name) and (".full.fasta.fai" not in name)) } # read and parse fasta files for each species fasta = {} for ortho in fasta_fn.keys(): fasta[ortho] = { seq_record.id: seq_record for seq_record in SeqIO.parse( fasta_fn[ortho], "fasta", alphabet=IUPAC.ambiguous_dna) } # load alternate_sp with open(alternate_sp_fn, 'r') as f: alternate_sp = json.load(f) # load paragon data with open(paragon_fn) as f: lines = f.readlines() df = pd.DataFrame([line.strip().split() for line in lines[1:]], columns=lines[0].strip().split()) # parse ints df.amp_start = df.amp_start.astype(int) df.amp_end = df.amp_end.astype(int) df.ampInsert_start = df.ampInsert_start.astype(int) df.ampInsert_end = df.ampInsert_end.astype(int) df.amp_len = df.amp_len.astype(int) df.index = df.loci_ID # add columns df["species"] = df.loci_ID.apply( lambda ortho: [req.id for req in fasta[ortho].values()]) df["consensus"] = df.loci_ID.apply( lambda ortho: consensus([req.seq for req in fasta[ortho].values()])) df["left_primer"] = df.apply( lambda req: req["consensus"][req["amp_start"]:req["ampInsert_start"]], axis=1) df["insert"] = df.apply(lambda req: req["consensus"][req[ "ampInsert_start"]:req["ampInsert_end"]], axis=1) df["right_primer"] = df.apply( lambda req: req["consensus"][req["ampInsert_end"]:req["amp_end"]], axis=1) # set column names lowercase df.columns = map(str.lower, df.columns) # output unpadded primer products shutil.rmtree(unpadded_primer_product_path, ignore_errors=True) os.makedirs(unpadded_primer_product_path, exist_ok=True) for ortho in df.loci_id: with open(unpadded_primer_product_path + ortho + ".fasta", 'w') as f: start = df.loc[ortho]["ampinsert_start"] end = df.loc[ortho]["ampinsert_end"] for sp in full_species_list: if sp in df.loc[ortho]["species"]: seqReq = fasta[ortho][sp][start:end] f.write(seqReq.format("fasta")) # output padded primer products shutil.rmtree(padded_primer_product_path, ignore_errors=True) os.makedirs(padded_primer_product_path, exist_ok=True) for ortho in df.loci_id: with open(padded_primer_product_path + ortho + ".padded.fasta", 'w') as f: start = df.loc[ortho]["ampinsert_start"] end = df.loc[ortho]["ampinsert_end"] for sp in full_species_list: if sp in df.loc[ortho]["species"]: seqReq = fasta[ortho][sp][start:end] else: for alt_sp in alternate_sp[sp]: if alt_sp in fasta[ortho].keys(): seqReq = fasta[ortho][alt_sp][start:end] break seqReq.description = "" seqReq.id = sp seqReq.name = sp f.write(seqReq.format("fasta")) return df, fasta
fp.write('f' + ' ' + str(face[0] + 1) + ' ' + str(face[1] + 1) + ' ' + str(face[2] + 1) + '\n') # add empty line to be sure fp.write('\n') if __name__ == '__main__': parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('input', type=str, help='The input directory containing OFF files.') parser.add_argument('output', type=str, help='The output directory for OBJ files.') args = parser.parse_args() if not os.path.exists(args.input): print('Input directory does not exist.') exit(1) if not os.path.exists(args.output): os.makedirs(args.output) print('Created output directory.') else: print('Output directory exists; potentially overwriting contents.') for filename in os.listdir(args.input): filepath = args.input + '/' + filename vertices, faces = read_off(filepath) print('Read %s.' % filepath) filepath = args.output + '/' + filename[:-4] + '.obj' write_obj(filepath, vertices, faces) print('Wrote %s.' % filepath)
def test_002_mkdir(self): os.makedirs(self.mnt / "mydir") mnt_dirs = [i.relative_to(self.mnt) for i in self.mnt.iterdir()] src_dirs = [i.relative_to(self.src) for i in self.src.iterdir()] self.assertEqual(mnt_dirs, src_dirs) self.assertEqual(mnt_dirs, [Path("mydir")])
else: logs.append("Scanning directories...") dirslist = find_long_dirs(source_folder, destination_folder, dirslist) logs.append("Scanning directories complete.\n\nExtracting files...") extract_files(source_folder, destination_folder, source_folder) logs.append("Files extraction complete.\n\nDeleting empty remaining directories...") delete_dirs(dirslist) logs.append("Empty directories deletion complete.\nCreating restore files") try: os.makedirs((default_restore_folder)) except Exception as e: logs.append(str(e)) write_list_in_file(dirslist, chained_directories_list_file, "w+") write_list_in_file(klist, keys_list_file, "w+") write_list_in_file(vlist, values_list_file, "w+") logs.append("Restore files created. Do not delete or move restore files/folder and program file") logs.append("\nYou can restore your files by executing the program again.") except Exception as e: logs.append("Severe error.\n"+str(e)) finally:
for file in files: parts = file.split("/") for lang in languages: translations = json.load(io.open(file, 'r')) fname = file.replace("/en/US.json", "/" + lang + "/" + lang + ".json") iterate = True if os.path.isfile(fname): # print "Read" + fname i18n_translations = json.load(io.open(fname, 'r')) else: i18n_translations = translations for k, v in i18n_translations.iteritems(): i18n_translations[k] = "" iterate = False if iterate: for k, v in translations.iteritems(): ok = True try: i18n_translations[k] if blastKey == k: i18n_translations[k] = "" except: ok = False try: os.makedirs("/".join(parts[:-2]) + "/" + lang) except: pass json.dump(i18n_translations, open(file.replace("/en/US.json", "/" + lang + "/" + lang + ".json"), 'w'), indent=4, sort_keys=True)
def main(): # Parse the command line arguments cfg = parse_args(description='Performs inference on a dataset using the specified training result.') # Initialize the PyTorch device device = init_device(cfg) # Open the result result_dir = get_result_dir(cfg) if not os.path.isdir(result_dir): error('result does not exist') print('Result:', cfg.result) # Load the result config result_cfg = load_config(result_dir) cfg.features = result_cfg.features cfg.transfer = result_cfg.transfer cfg.model = result_cfg.model main_feature = get_main_feature(cfg.features) num_main_channels = len(get_dataset_channels(main_feature)) # Initialize the dataset data_dir = get_data_dir(cfg, cfg.input_data) image_sample_groups = get_image_sample_groups(data_dir, cfg.features) # Initialize the model model = get_model(cfg) model.to(device) # Load the checkpoint checkpoint = load_checkpoint(result_dir, device, cfg.num_epochs, model) epoch = checkpoint['epoch'] print('Epoch:', epoch) # Initialize the transfer function transfer = get_transfer_function(cfg) # Iterate over the images print() output_dir = os.path.join(cfg.output_dir, cfg.input_data) metric_sum = {metric : 0. for metric in cfg.metric} metric_count = 0 model.eval() # Inference function def infer(input, exposure): x = input.clone() # Apply the transfer function if transfer: color = x[:, 0:num_main_channels, ...] if main_feature == 'hdr': color *= exposure color = transfer.forward(color) x[:, 0:num_main_channels, ...] = color # Pad the output shape = x.shape x = F.pad(x, (0, round_up(shape[3], model.alignment) - shape[3], 0, round_up(shape[2], model.alignment) - shape[2])) # Run the inference if main_feature == 'sh1': # Iterate over x, y, z x = torch.cat([model(torch.cat((x[:, i:i+3, ...], x[:, 9:, ...]), 1)) for i in [0, 3, 6]], 1) else: x = model(x) # Unpad the output x = x[:, :, :shape[2], :shape[3]] # Sanitize the output x = torch.clamp(x, min=0.) # Apply the inverse transfer function if transfer: x = transfer.inverse(x) if main_feature == 'hdr': x /= exposure else: x = torch.clamp(x, max=1.) return x # Saves an image in different formats def save_images(path, image, image_srgb, suffix=main_feature): if suffix == 'sh1': # Iterate over x, y, z for i, axis in [(0, 'x'), (3, 'y'), (6, 'z')]: save_images(path, image[:, i:i+3, ...], image_srgb[:, i:i+3, ...], 'sh1' + axis) return image = tensor_to_image(image) image_srgb = tensor_to_image(image_srgb) filename_prefix = path + '.' + suffix + '.' for format in cfg.format: if format in {'exr', 'pfm', 'hdr'}: # Transform to original range if main_feature in {'sh1', 'nrm'}: image = image * 2. - 1. # [0..1] -> [-1..1] save_image(filename_prefix + format, image) else: save_image(filename_prefix + format, image_srgb) with torch.no_grad(): for group, input_names, target_name in image_sample_groups: # Create the output directory if it does not exist output_group_dir = os.path.join(output_dir, os.path.dirname(group)) if not os.path.isdir(output_group_dir): os.makedirs(output_group_dir) # Load metadata for the images if it exists tonemap_exposure = 1. metadata = load_image_metadata(os.path.join(data_dir, group)) if metadata: tonemap_exposure = metadata['exposure'] save_image_metadata(os.path.join(output_dir, group), metadata) # Load the target image if it exists if target_name: target = load_image_features(os.path.join(data_dir, target_name), main_feature) target = image_to_tensor(target, batch=True).to(device) target_srgb = transform_feature(target, main_feature, 'srgb', tonemap_exposure) # Iterate over the input images for input_name in input_names: print(input_name, '...', end='', flush=True) # Load the input image input = load_image_features(os.path.join(data_dir, input_name), cfg.features) # Compute the autoexposure value exposure = autoexposure(input) if main_feature == 'hdr' else 1. # Infer input = image_to_tensor(input, batch=True).to(device) output = infer(input, exposure) input = input[:, 0:num_main_channels, ...] # keep only the main feature input_srgb = transform_feature(input, main_feature, 'srgb', tonemap_exposure) output_srgb = transform_feature(output, main_feature, 'srgb', tonemap_exposure) # Compute metrics metric_str = '' if target_name and cfg.metric: for metric in cfg.metric: value = compare_images(output_srgb, target_srgb, metric) metric_sum[metric] += value if metric_str: metric_str += ', ' metric_str += f'{metric}={value:.4f}' metric_count += 1 # Save the input and output images output_name = input_name + '.' + cfg.result if cfg.num_epochs: output_name += f'_{epoch}' if cfg.save_all: save_images(os.path.join(output_dir, input_name), input, input_srgb) save_images(os.path.join(output_dir, output_name), output, output_srgb) # Print metrics if metric_str: metric_str = ' ' + metric_str print(metric_str) # Save the target image if it exists if cfg.save_all and target_name: save_images(os.path.join(output_dir, target_name), target, target_srgb) # Print summary if metric_count > 0: metric_str = '' for metric in cfg.metric: value = metric_sum[metric] / metric_count if metric_str: metric_str += ', ' metric_str += f'{metric}_avg={value:.4f}' print() print(f'{cfg.result}: {metric_str} ({metric_count} images)')
#fname = "/Users/kylebradley/Desktop/Kyle/Batch2/" + ext #for fname in fnames: content = open(fname).read() #print content #dot = fname.rfind('.') #fname = fname[:dot] dot = fname.rfind('.') fname = fname[:dot] slash = fname.rfind('/') fname = fname[(slash + 1):dot] #print fname if not os.path.exists(seg_dir): os.makedirs(seg_dir) print "current filename: ", fname soup = BeautifulSoup(content,'lxml') basic = [] for links in soup.find_all("dependencies"): if links.get("type") == "enhanced-dependencies": basic.append(links) parse = [] for links in soup.find_all("parse"): parse.append(links) tokens = soup.find_all("tokens")
def go(self): """ Go through all the bootstrap steps """ platform = Platform.current() self.c.log("Bootstrap", "go", "Bootstrapping Flock Agent", always=True) if platform == Platform.UNKNOWN: self.c.log( "Bootstrap", "go", "Unknown platform: Unable to make sure Flock Agent starts automatically", ) else: self.c.log( "Bootstrap", "go", "Making sure Flock Agent starts automatically" ) if platform == Platform.MACOS: autorun_dir = os.path.expanduser("~/Library/LaunchAgents") autorun_filename = "media.firstlook.flock-agent.plist" src_filename = self.c.get_resource_path( os.path.join("autostart/macos", autorun_filename) ) elif platform == Platform.LINUX: autorun_dir = appdirs.user_config_dir("autostart") autorun_filename = "media.firstlook.flock-agent.desktop" src_filename = self.c.get_resource_path( os.path.join("autostart/linux", autorun_filename) ) os.makedirs(autorun_dir, exist_ok=True) shutil.copy(src_filename, os.path.join(autorun_dir, autorun_filename)) if platform == Platform.UNKNOWN: self.c.log( "Bootstrap", "go", "Unknown platform: Unable to make sure osquery is installed", ) else: self.c.log("Bootstrap", "go", "Making sure osquery is installed") if platform == Platform.MACOS: if not os.path.exists("/usr/local/bin/osqueryd") or not os.path.exists( "/usr/local/bin/osqueryi" ): message = '<b>Osquery is not installed.</b><br><br>You can either install it with Homebrew, or download it from <a href="https://osquery.io/downloads">https://osquery.io/downloads</a>. Install osquery and then run Flock again.' Alert(self.c, message, contains_links=True).launch() return False elif platform == Platform.LINUX: if not os.path.exists("/usr/bin/osqueryd") or not os.path.exists( "/usr/bin/osqueryi" ): message = '<b>Osquery is not installed.</b><br><br>To add the osquery repository to your system and install the osquery package, follow the instructions at <a href="https://osquery.io/downloads">https://osquery.io/downloads</a> under "Alternative Install Options".<br><br>For Debian, Ubuntu, or Mint, follow the "Debian Linux" instructions, and for Fedora, Red Hat, or CentOS, follow the "RPM Linux" instructions.<br><br>Install osquery and then run Flock again.' Alert(self.c, message, contains_links=True).launch() return False self.c.log("Bootstrap", "go", "Making sure the Flock Agent daemon is running") try: connected = False for _ in range(10): try: self.c.daemon.ping() connected = True break except DaemonNotRunningException: self.c.log("Bootstrap", "go", "Failed to connect to daemon ...") time.sleep(1) if not connected: self.c.gui.daemon_not_running() return False except PermissionDeniedException: self.c.gui.daemon_permission_denied() return False self.c.log("Bootstrap", "go", "Bootstrap complete") return True
parser.add_argument('--eval_episodes', type=int, default=10, help='Number of evaluation steps for one trained agent') parser.add_argument('--trials', type=int, default=10, help='Number of trials evaluating the agent') parser.add_argument('--output', type=str, required=True, help='Path to the folder where all results will be stored at') args = parser.parse_args() # set logging level according to --debug logging.basicConfig() debug_level = logging.INFO if args.debug else logging.DEBUG logging.getLogger().setLevel(debug_level) # Create log dir & monitor training so that episode rewards are logged os.makedirs(args.logs, exist_ok=True) agent_name = args.agent # Create agent from experiment configuration if agent_name == 'Random': agent_type = BaselineHeuristic policy = RandomPolicy elif agent_name == 'FirstFit_1': agent_type = BaselineHeuristic policy = FirstFitPolicy elif agent_name == 'FirstFit_2': agent_type = BaselineHeuristic policy = FirstFitPolicy2
Authors: Seth Gottlieb {{content}} """ if __name__ == "__main__": blog_data = feedparser.parse("feed.atom") template = Template(MD_TEMPLATE) for entry in blog_data["entries"]: status = entry["blogger_status"] content_type = entry["blogger_type"] if status == "LIVE" and content_type == "POST": context = {} context["path"] = entry["blogger_filename"] context["title"] = entry["title"] context["publish_date"] = entry["published"] context["update_date"] = entry["updated"] if len(entry["content"]) > 1: print("more than one content") context["content"] = html2markdown.convert(entry["content"][0]["value"]) if "tags" in entry.keys(): context["tag_str"] = ", ".join([tag["term"] for tag in entry["tags"]]) file_path = "content/" + context["path"].replace(".html", ".md") os.makedirs(os.path.dirname(file_path), exist_ok=True) with open(file_path, "w") as f: f.write(template.render(**context))
def make_dir(path): dir_path = os.path.dirname(path) if not os.path.exists(dir_path): os.makedirs(dir_path) return True return False
q.y_val = t0 * t2 * t5 + t1 * t3 * t4 #y q.z_val = t1 * t2 * t4 - t0 * t3 * t5 #z return q # connect to the AirSim simulator client = airsim.CarClient() client.confirmConnection() car_controls = airsim.CarControls() # upload map numpy map = np.load(args.map) # create experiments directories experiment_dir = os.path.join(os.path.expanduser('~'), 'Documents\AirSim', datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) images_dir = os.path.join(experiment_dir, 'images') os.makedirs(images_dir) # create txt file airsim_rec = open(os.path.join(experiment_dir,"airsim_rec.txt"),"w") airsim_rec.write("TimeStamp\tPOS_X\tPOS_Y\tPOS_Z\tRoll\tPitch\tYaw\tRPM\tSpeed\tImageFile\n") initial_pose = client.simGetVehiclePose() idx = 0 while True: # create position and orientation vectors pose = copy.deepcopy(initial_pose) pose.position.x_val = pose.position.x_val + np.random.uniform(low=(map_x_range[0]-args.start_x)/100.0, high=(map_x_range[1]-args.start_x)/100.0) pose.position.y_val = pose.position.y_val + np.random.uniform(low=(map_y_range[0]-args.start_y)/100.0, high=(map_y_range[1]-args.start_y)/100.0) alpha = np.random.uniform(low=-math.pi, high=math.pi)
def setUp(self): self.storage = HashPathStorage() # make sure the profile upload folder exists if not os.path.exists(settings.MEDIA_ROOT): os.makedirs(settings.MEDIA_ROOT)
for dicts in feature_factory_manager.feature_factory_dict.values(): for factory in dicts.values(): factory.logger = None feature_factory_manager.logger = None with open(f"{output_dir}/feature_factory_manager.pickle", "wb") as f: pickle.dump(feature_factory_manager, f) ff_for_transformer.fit(df) ff_for_transformer.logger = None with open(f"{output_dir}/feature_factory_manager_for_transformer.pickle", "wb") as f: pickle.dump(ff_for_transformer, f) if __name__ == "__main__": if not is_debug: for _ in tqdm(range(wait_time)): time.sleep(1) output_dir = f"../output/{os.path.basename(__file__).replace('.py', '')}/{dt.now().strftime('%Y%m%d%H%M%S')}/" os.makedirs(output_dir, exist_ok=True) for lr in [1e-3]: for dropout in [0.5]: if is_debug: batch_size = 8 else: batch_size = 512 params = {"embed_dim": 256, "max_seq": 100, "batch_size": batch_size, "num_warmup_steps": 1000, "lr": lr, "dropout": dropout} main(params, output_dir=output_dir)
def main(argv): parser = ArgumentParser(usage=__doc__.lstrip()) parser.add_argument("--verbose", "-v", action="count", default=1, help="more verbosity") parser.add_argument( "--no-build", "-n", action="store_true", default=False, help="do not build the project (use system installed version)") parser.add_argument("--build-only", "-b", action="store_true", default=False, help="just build, do not run any tests") parser.add_argument("--doctests", action="store_true", default=False, help="Run doctests in module") parser.add_argument( "--coverage", action="store_true", default=False, help=("report coverage of project code. HTML output goes " "under build/coverage")) parser.add_argument( "--gcov", action="store_true", default=False, help=("enable C code coverage via gcov (requires GCC). " "gcov output goes to build/**/*.gc*")) parser.add_argument("--lcov-html", action="store_true", default=False, help=("produce HTML for C code coverage information " "from a previous run with --gcov. " "HTML output goes to build/lcov/")) parser.add_argument("--mode", "-m", default="fast", help="'fast', 'full', or something that could be " "passed to nosetests -A [default: fast]") parser.add_argument( "--submodule", "-s", default=None, help="Submodule whose tests to run (cluster, constants, ...)") parser.add_argument("--pythonpath", "-p", default=None, help="Paths to prepend to PYTHONPATH") parser.add_argument("--tests", "-t", action='append', help="Specify tests to run") parser.add_argument("--python", action="store_true", help="Start a Python shell with PYTHONPATH set") parser.add_argument("--ipython", "-i", action="store_true", help="Start IPython shell with PYTHONPATH set") parser.add_argument("--shell", action="store_true", help="Start Unix shell with PYTHONPATH set") parser.add_argument("--debug", "-g", action="store_true", help="Debug build") parser.add_argument("--show-build-log", action="store_true", help="Show build output rather than using a log file") parser.add_argument("--bench", action="store_true", help="Run benchmark suite instead of test suite") parser.add_argument("args", metavar="ARGS", default=[], nargs=REMAINDER, help="Arguments to pass to Nose, Python or shell") args = parser.parse_args(argv) if args.lcov_html: # generate C code coverage output lcov_generate() sys.exit(0) if args.pythonpath: for p in reversed(args.pythonpath.split(os.pathsep)): sys.path.insert(0, p) if args.gcov: gcov_reset_counters() if not args.no_build: site_dir = build_project(args) sys.path.insert(0, site_dir) os.environ['PYTHONPATH'] = site_dir extra_argv = args.args[:] if extra_argv and extra_argv[0] == '--': extra_argv = extra_argv[1:] if args.python: if extra_argv: # Don't use subprocess, since we don't want to include the # current path in PYTHONPATH. sys.argv = extra_argv with open(extra_argv[0], 'r') as f: script = f.read() sys.modules['__main__'] = imp.new_module('__main__') ns = dict(__name__='__main__', __file__=extra_argv[0]) exec_(script, ns) sys.exit(0) else: import code code.interact() sys.exit(0) if args.ipython: import IPython IPython.embed(user_ns={}) sys.exit(0) if args.shell: shell = os.environ.get('SHELL', 'sh') print("Spawning a Unix shell...") os.execv(shell, [shell] + extra_argv) sys.exit(1) if args.coverage: dst_dir = os.path.join(ROOT_DIR, 'build', 'coverage') fn = os.path.join(dst_dir, 'coverage_html.js') if os.path.isdir(dst_dir) and os.path.isfile(fn): shutil.rmtree(dst_dir) extra_argv += ['--cover-html', '--cover-html-dir=' + dst_dir] test_dir = os.path.join(ROOT_DIR, 'build', 'test') if args.build_only: sys.exit(0) elif args.submodule: modname = PROJECT_MODULE + '.' + args.submodule try: __import__(modname) if args.bench: test = sys.modules[modname].bench else: test = sys.modules[modname].test except (ImportError, KeyError, AttributeError) as e: print("Cannot run tests for %s (%s)" % (modname, e)) sys.exit(2) elif args.tests: def fix_test_path(x): # fix up test path p = x.split(':') p[0] = os.path.relpath(os.path.abspath(p[0]), test_dir) return ':'.join(p) tests = [fix_test_path(x) for x in args.tests] def test(*a, **kw): extra_argv = kw.pop('extra_argv', ()) extra_argv = extra_argv + tests[1:] kw['extra_argv'] = extra_argv from numpy.testing import Tester if args.bench: return Tester(tests[0]).bench(*a, **kw) else: return Tester(tests[0]).test(*a, **kw) else: __import__(PROJECT_MODULE) if args.bench: test = sys.modules[PROJECT_MODULE].bench else: test = sys.modules[PROJECT_MODULE].test # Run the tests under build/test try: shutil.rmtree(test_dir) except OSError: pass try: os.makedirs(test_dir) except OSError: pass shutil.copyfile(os.path.join(ROOT_DIR, '.coveragerc'), os.path.join(test_dir, '.coveragerc')) cwd = os.getcwd() try: os.chdir(test_dir) if args.bench: result = test(args.mode, verbose=args.verbose, extra_argv=extra_argv) else: result = test(args.mode, verbose=args.verbose, extra_argv=extra_argv, doctests=args.doctests, coverage=args.coverage) finally: os.chdir(cwd) if isinstance(result, bool): sys.exit(0 if result else 1) elif result.wasSuccessful(): sys.exit(0) else: sys.exit(1)
parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--save', default='./models', type=str, metavar='PATH', help='path to save prune model (default: current directory)') parser.add_argument('--arch', default='resnet', type=str, help='architecture to use') parser.add_argument('--scratch',default='', type=str, help='the PATH to the pruned model') args = parser.parse_args() device = torch.device('cuda:0') if not os.path.exists(args.save): os.makedirs(args.save) #数据加载及处理 train_transform = transforms.Compose([ transforms.Resize(224), transforms.RandomResizedCrop(224,scale=(0.6,1.0),ratio=(0.8,1.0)), transforms.RandomHorizontalFlip(), torchvision.transforms.ColorJitter(brightness=0.5, contrast=0, saturation=0, hue=0), torchvision.transforms.ColorJitter(brightness=0, contrast=0.5, saturation=0, hue=0), transforms.ToTensor(), transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]) ]) test_transform = transforms.Compose([ transforms.Resize(224), transforms.RandomResizedCrop(224,scale=(1.0,1.0),ratio=(1.0,1.0)),
def main(params: dict, output_dir: str): import mlflow print("start params={}".format(params)) model_id = "all" logger = get_logger() df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle") # df = pd.read_pickle("../input/riiid-test-answer-prediction/split10/train_0.pickle").sort_values(["user_id", "timestamp"]).reset_index(drop=True) if is_debug: df = df.head(30000) df["prior_question_had_explanation"] = df["prior_question_had_explanation"].fillna(-1) column_config = { ("content_id", "content_type_id"): {"type": "category"}, "user_answer": {"type": "leakage_feature"}, "answered_correctly": {"type": "leakage_feature"}, "part": {"type": "category"}, "prior_question_elapsed_time_bin300": {"type": "category"}, "duration_previous_content_bin300": {"type": "category"}, "prior_question_had_explanation": {"type": "category"}, "rating_diff_content_user_id": {"type": "numeric"}, "qq_table2_mean": {"type": "numeric"}, "qq_table2_min": {"type": "numeric"} } if not load_pickle or is_debug: feature_factory_dict = {"user_id": {}} feature_factory_dict["user_id"]["DurationPreviousContent"] = DurationPreviousContent() feature_factory_dict["user_id"]["ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder() feature_factory_dict["user_id"]["PreviousAnswer2"] = PreviousAnswer2(groupby="user_id", column="content_id", is_debug=is_debug, model_id=model_id, n=300) feature_factory_dict["user_id"]["UserContentRateEncoder"] = UserContentRateEncoder(rate_func="elo", column="user_id") feature_factory_dict["user_id"]["QuestionQuestionTableEncoder2"] = \ QuestionQuestionTableEncoder2( model_id=model_id, is_debug=is_debug, past_n=100, min_size=300 ) feature_factory_manager = FeatureFactoryManager(feature_factory_dict=feature_factory_dict, logger=logger, split_num=1, model_id=model_id, load_feature=not is_debug, save_feature=not is_debug) print("all_predict") df = feature_factory_manager.all_predict(df) df = df[["user_id", "content_id", "content_type_id", "part", "user_answer", "answered_correctly", "prior_question_elapsed_time_bin300", "duration_previous_content_bin300", "prior_question_had_explanation", "rating_diff_content_user_id", "qq_table2_mean", "qq_table2_min"]].replace(-99, -1) df["qq_table2_mean"] = df["qq_table2_mean"].fillna(0.65) df["qq_table2_min"] = df["qq_table2_min"].fillna(0.6) print(df.head(10)) print("data preprocess") train_idx = [] val_idx = [] np.random.seed(0) for _, w_df in df[df["content_type_id"] == 0].groupby("user_id"): if np.random.random() < 0.01: # all val val_idx.extend(w_df.index.tolist()) else: train_num = int(len(w_df) * 0.95) train_idx.extend(w_df[:train_num].index.tolist()) val_idx.extend(w_df[train_num:].index.tolist()) ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config, dict_path="../feature_engineering/", sequence_length=params["max_seq"], logger=logger) ff_for_transformer.make_dict(df=df) n_skill = len(ff_for_transformer.embbed_dict[("content_id", "content_type_id")]) if not load_pickle or is_debug: df["is_val"] = 0 df["is_val"].loc[val_idx] = 1 w_df = df[df["is_val"] == 0] w_df["group"] = (w_df.groupby("user_id")["user_id"].transform("count") - w_df.groupby("user_id").cumcount()) // params["max_seq"] w_df["user_id"] = w_df["user_id"].astype(str) + "_" + w_df["group"].astype(str) group = ff_for_transformer.all_predict(w_df) dataset_train = SAKTDataset(group, n_skill=n_skill, max_seq=params["max_seq"]) del w_df gc.collect() ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config, dict_path="../feature_engineering/", sequence_length=params["max_seq"], logger=logger) if not load_pickle or is_debug: group = ff_for_transformer.all_predict(df[df["content_type_id"] == 0]) dataset_val = SAKTDataset(group, is_test=True, n_skill=n_skill, max_seq=params["max_seq"]) os.makedirs("../input/feature_engineering/model097_all", exist_ok=True) if not is_debug and not load_pickle: with open(f"../input/feature_engineering/model097_all/train.pickle", "wb") as f: pickle.dump(dataset_train, f) with open(f"../input/feature_engineering/model097_all/val.pickle", "wb") as f: pickle.dump(dataset_val, f) if not is_debug and load_pickle: with open(f"../input/feature_engineering/model097_all/train.pickle", "rb") as f: dataset_train = pickle.load(f) with open(f"../input/feature_engineering/model097_all/val.pickle", "rb") as f: dataset_val = pickle.load(f) print("loaded!") 1/0 dataloader_train = DataLoader(dataset_train, batch_size=params["batch_size"], shuffle=True, num_workers=1) dataloader_val = DataLoader(dataset_val, batch_size=params["batch_size"], shuffle=False, num_workers=1) model = SAKTModel(n_skill, embed_dim=params["embed_dim"], max_seq=params["max_seq"], dropout=dropout) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=params["lr"], weight_decay=0.01, ) num_train_optimization_steps = int(len(dataloader_train) * epochs) scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=params["num_warmup_steps"], num_training_steps=num_train_optimization_steps) criterion = nn.BCEWithLogitsLoss() model.to(device) criterion.to(device) for epoch in range(epochs): loss, acc, auc, auc_val = train_epoch(model, dataloader_train, dataloader_val, optimizer, criterion, scheduler, device) print("epoch - {} train_loss - {:.3f} auc - {:.4f} auc-val: {:.4f}".format(epoch, loss, auc, auc_val)) preds = [] labels = [] with torch.no_grad(): for item in tqdm(dataloader_val): x = item["x"].to(device).long() target_id = item["target_id"].to(device).long() part = item["part"].to(device).long() label = item["label"].to(device).float() elapsed_time = item["elapsed_time"].to(device).long() duration_previous_content = item["duration_previous_content"].to(device).long() prior_question_had_explanation = item["prior_q"].to(device).long() user_answer = item["user_answer"].to(device).long() rate_diff = item["rate_diff"].to(device).float() qq_table_mean = item["qq_table_mean"].to(device).float() qq_table_min = item["qq_table_min"].to(device).float() output = model(x, target_id, part, elapsed_time, duration_previous_content, prior_question_had_explanation, user_answer, rate_diff, qq_table_mean, qq_table_min) preds.extend(torch.nn.Sigmoid()(output[:, -1]).view(-1).data.cpu().numpy().tolist()) labels.extend(label[:, -1].view(-1).data.cpu().numpy().tolist()) auc_transformer = roc_auc_score(labels, preds) print("single transformer: {:.4f}".format(auc_transformer)) df_oof = pd.DataFrame() # df_oof["row_id"] = df.loc[val_idx].index print(len(dataloader_val)) print(len(preds)) df_oof["predict"] = preds df_oof["target"] = labels df_oof.to_csv(f"{output_dir}/transformers1.csv", index=False) """ df_oof2 = pd.read_csv("../output/ex_237/20201213110353/oof_train_0_lgbm.csv") df_oof2.columns = ["row_id", "predict_lgbm", "target"] df_oof2 = pd.merge(df_oof, df_oof2, how="inner") auc_lgbm = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values) print("lgbm: {:.4f}".format(auc_lgbm)) print("ensemble") max_auc = 0 max_nn_ratio = 0 for r in np.arange(0, 1.05, 0.05): auc = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values*(1-r) + df_oof2["predict"].values*r) print("[nn_ratio: {:.2f}] AUC: {:.4f}".format(r, auc)) if max_auc < auc: max_auc = auc max_nn_ratio = r print(len(df_oof2)) """ if not is_debug: mlflow.start_run(experiment_id=10, run_name=os.path.basename(__file__)) for key, value in params.items(): mlflow.log_param(key, value) mlflow.log_metric("auc_val", auc_transformer) mlflow.end_run() torch.save(model.state_dict(), f"{output_dir}/transformers.pth") del model torch.cuda.empty_cache() with open(f"{output_dir}/transformer_param.json", "w") as f: json.dump(params, f) if is_make_feature_factory: # feature factory feature_factory_dict = {"user_id": {}} feature_factory_dict["user_id"]["DurationPreviousContent"] = DurationPreviousContent(is_partial_fit=True) feature_factory_dict["user_id"]["ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder() feature_factory_manager = FeatureFactoryManager(feature_factory_dict=feature_factory_dict, logger=logger, split_num=1, model_id="all", load_feature=not is_debug, save_feature=not is_debug) ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config, dict_path="../feature_engineering/", sequence_length=params["max_seq"], logger=logger) df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle") if is_debug: df = df.head(10000) df = df.sort_values(["user_id", "timestamp"]).reset_index(drop=True) feature_factory_manager.fit(df) df = feature_factory_manager.all_predict(df) for dicts in feature_factory_manager.feature_factory_dict.values(): for factory in dicts.values(): factory.logger = None feature_factory_manager.logger = None with open(f"{output_dir}/feature_factory_manager.pickle", "wb") as f: pickle.dump(feature_factory_manager, f) ff_for_transformer.fit(df) ff_for_transformer.logger = None with open(f"{output_dir}/feature_factory_manager_for_transformer.pickle", "wb") as f: pickle.dump(ff_for_transformer, f)
def run(self): """ Called when the process intializes. Determine if what is known in the Skyline DB blah """ # Log management to prevent overwriting # Allow the bin/<skyline_app>.d to manage the log if os.path.isfile(skyline_app_logwait): try: logger.info('removing %s' % skyline_app_logwait) os.remove(skyline_app_logwait) except OSError: logger.error('error :: failed to remove %s, continuing' % skyline_app_logwait) pass now = time() log_wait_for = now + 5 while now < log_wait_for: if os.path.isfile(skyline_app_loglock): sleep(.1) now = time() else: now = log_wait_for + 1 logger.info('starting %s run' % skyline_app) if os.path.isfile(skyline_app_loglock): logger.error('error :: bin/%s.d log management seems to have failed, continuing' % skyline_app) try: os.remove(skyline_app_loglock) logger.info('log lock file removed') except OSError: logger.error('error :: failed to remove %s, continuing' % skyline_app_loglock) pass else: logger.info('bin/%s.d log management done' % skyline_app) # See if I am known in the DB, if so, what are my variables # self.populate mysql # What is my host id in the Skyline panorama DB? # - if not known - INSERT hostname INTO hosts # What are the known apps? # - if returned make a dictionary # What are the known algorithms? # - if returned make a dictionary while 1: now = time() # Make sure Redis is up try: self.redis_conn.ping() if ENABLE_PANORAMA_DEBUG: logger.info('debug :: connected to Redis') except: logger.error('error :: cannot connect to redis at socket path %s' % ( settings.REDIS_SOCKET_PATH)) sleep(30) self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH) continue # Report app up try: self.redis_conn.setex(skyline_app, 120, now) logger.info('updated Redis key for %s up' % skyline_app) except: logger.error('error :: failed to update Redis key for %s up' % skyline_app) if ENABLE_PANORAMA_DEBUG: # Make sure mysql is available mysql_down = True while mysql_down: query = 'SHOW TABLES' results = self.mysql_select(query) if results: mysql_down = False logger.info('debug :: tested database query - OK') else: logger.error('error :: failed to query database') sleep(30) if ENABLE_PANORAMA_DEBUG: try: query = 'SELECT id, test FROM test' result = self.mysql_select(query) logger.info('debug :: tested mysql SELECT query - OK') logger.info('debug :: result: %s' % str(result)) logger.info('debug :: result[0]: %s' % str(result[0])) logger.info('debug :: result[1]: %s' % str(result[1])) # Works # 2016-06-10 19:07:23 :: 4707 :: result: [(1, u'test1')] except: logger.error( 'error :: mysql error - %s' % traceback.print_exc()) logger.error('error :: failed to SELECT') # self.populate the database metatdata tables # What is my host id in the Skyline panorama DB? host_id = False query = 'select id FROM hosts WHERE host=\'%s\'' % this_host results = self.mysql_select(query) if results: host_id = results[0][0] logger.info('host_id: %s' % str(host_id)) else: logger.info('failed to determine host id of %s' % this_host) # - if not known - INSERT hostname INTO host if not host_id: logger.info('inserting %s into hosts table' % this_host) query = 'insert into hosts (host) VALUES (\'%s\')' % this_host host_id = self.mysql_insert(query) if host_id: logger.info('new host_id: %s' % str(host_id)) if not host_id: logger.error( 'error :: failed to determine populate %s into the hosts table' % this_host) sleep(30) continue # Like loop through the panorama dir and see if anyone has left you # any work, etc # Make sure check_dir exists and has not been removed try: if settings.ENABLE_PANORAMA_DEBUG: logger.info('debug :: checking check dir exists - %s' % settings.PANORAMA_CHECK_PATH) os.path.exists(settings.PANORAMA_CHECK_PATH) except: logger.error('error :: check dir did not exist - %s' % settings.PANORAMA_CHECK_PATH) if python_version == 2: mode_arg = int('0755') if python_version == 3: mode_arg = mode=0o755 os.makedirs(settings.PANORAMA_CHECK_PATH, mode_arg) logger.info('check dir created - %s' % settings.PANORAMA_CHECK_PATH) os.path.exists(settings.PANORAMA_CHECK_PATH) # continue """ Determine if any metric has been added to add """ while True: metric_var_files = False try: metric_var_files = [f for f in listdir(settings.PANORAMA_CHECK_PATH) if isfile(join(settings.PANORAMA_CHECK_PATH, f))] except: logger.error('error :: failed to list files in check dir') logger.info(traceback.format_exc()) if not metric_var_files: logger.info('sleeping 20 no metric check files') sleep(20) # Discover metric anomalies to insert metric_var_files = False try: metric_var_files = [f for f in listdir(settings.PANORAMA_CHECK_PATH) if isfile(join(settings.PANORAMA_CHECK_PATH, f))] except: logger.error('error :: failed to list files in check dir') logger.info(traceback.format_exc()) if metric_var_files: break metric_var_files_sorted = sorted(metric_var_files) metric_check_file = '%s/%s' % (settings.PANORAMA_CHECK_PATH, str(metric_var_files_sorted[0])) logger.info('assigning anomaly for insertion - %s' % str(metric_var_files_sorted[0])) # Spawn processes pids = [] spawned_pids = [] pid_count = 0 now = time() for i in range(1, settings.PANORAMA_PROCESSES + 1): try: p = Process(target=self.spin_process, args=(i, metric_check_file)) pids.append(p) pid_count += 1 logger.info('starting %s of %s spin_process/es' % (str(pid_count), str(settings.PANORAMA_PROCESSES))) p.start() spawned_pids.append(p.pid) except: logger.error('error :: to start spin_process') logger.info(traceback.format_exc()) continue # Send wait signal to zombie processes # for p in pids: # p.join() # Self monitor processes and terminate if any spin_process has run # for longer than CRUCIBLE_TESTS_TIMEOUT p_starts = time() while time() - p_starts <= 20: if any(p.is_alive() for p in pids): # Just to avoid hogging the CPU sleep(.1) else: # All the processes are done, break now. time_to_run = time() - p_starts logger.info( '%s :: %s spin_process/es completed in %.2f seconds' % ( skyline_app, str(settings.PANORAMA_PROCESSES), time_to_run)) break else: # We only enter this if we didn't 'break' above. logger.info('%s :: timed out, killing all spin_process processes' % (skyline_app)) for p in pids: p.terminate() p.join() check_file_name = os.path.basename(str(metric_check_file)) if settings.ENABLE_PANORAMA_DEBUG: logger.info('debug :: check_file_name - %s' % check_file_name) check_file_timestamp = check_file_name.split('.', 1)[0] if settings.ENABLE_PANORAMA_DEBUG: logger.info('debug :: check_file_timestamp - %s' % str(check_file_timestamp)) check_file_metricname_txt = check_file_name.split('.', 1)[1] if settings.ENABLE_PANORAMA_DEBUG: logger.info('debug :: check_file_metricname_txt - %s' % check_file_metricname_txt) check_file_metricname = check_file_metricname_txt.replace('.txt', '') if settings.ENABLE_PANORAMA_DEBUG: logger.info('debug :: check_file_metricname - %s' % check_file_metricname) check_file_metricname_dir = check_file_metricname.replace('.', '/') if settings.ENABLE_PANORAMA_DEBUG: logger.info('debug :: check_file_metricname_dir - %s' % check_file_metricname_dir) metric_failed_check_dir = '%s/%s/%s' % (failed_checks_dir, check_file_metricname_dir, check_file_timestamp) fail_check(skyline_app, metric_failed_check_dir, str(metric_check_file))
def migrate_mappings(old_version, new_version, output, old_root, new_root): files = [ 'forced.txt', 'class_suggestions.txt', 'field_suggestions.txt', 'method_suggestions.txt', 'new_unmapped_fixed.txt' ] #sides = ['client', 'server'] sides = ['joined'] map = {} if not os.path.exists(new_root): os.makedirs(new_root) #========================================================================== # Read the mapping files from Depigifer, or Magidots. #========================================================================== migrate_root = os.path.join(output, '%s_to_%s' % (old_version, new_version)) for side in sides: for file in files: pig = os.path.join(migrate_root, 'pig/%s_%s' % (side, file)) magi = os.path.join(migrate_root, '%s_%s' % (side, file)) if (os.path.isfile(pig)): read_match(pig, map) else: read_match(magi, map) if (len(map) == 0): print('Failed to read any mapping data!') return zip = ZipFile(os.path.join(output, '%s/joined_a.jar' % new_version)) known_classes = [ n[:-6] for n in zip.namelist() if n.endswith('.class') and not 'minecraftforge' in n ] zip.close() #========================================================================== # Read the old srg file, and the new obf to notch mappings. # Generates the inital new SRG from the matcher output and this data. #========================================================================== from SRGSorter import load_srg_file old_srg = load_srg_file(os.path.join(old_root, 'joined.tsrg')) o_to_n = load_srg_file( os.path.join(output, '%s/joined_o_to_n.tsrg' % new_version)) srg = {'PK:': {}, 'CL:': {}, 'FD:': {}, 'MD:': {}} srg['PK:'] = old_srg['PK:'] for pt in ['CL:', 'FD:', 'MD:']: for k, v in old_srg[pt].items(): if k in map: srg[pt][map[k]] = v from SRGSorter import sort_srg_dict #sort_srg_dict(srg, 'test.srg') rg_idx_max = find_max_rg(old_srg, old_root) new_classes = {} obf_whitelist = [ ] # Entries that do not need to follow SRG naming, so we don't 'unfix' them below. meta = json.loads( open(os.path.join(output, '%s/joined_a_meta.json' % new_version), 'r').read()) meta = { k: v for k, v in meta.items() if not 'minecraftforge' in k } #Remove Forge's annotations, I should filter this in MappingToy.. err_f = open(os.path.join(migrate_root, 'migrate_errors.txt'), 'wb') add_new_classes(o_to_n, srg, new_classes, known_classes) fix_enums(obf_whitelist, srg, meta) fix_method_names(obf_whitelist, srg, meta) rg_idx_max = fix_override_methods(rg_idx_max, meta, srg, err_f, obf_whitelist, o_to_n) rg_idx_max = fix_unobfed_names(rg_idx_max, known_classes, new_classes, srg, o_to_n, obf_whitelist) fix_inner_class_shuffle(srg) rg_idx_max = update_constructors(rg_idx_max, old_root, new_root, srg, meta) rg_idx_max = create_new_entries(rg_idx_max, srg, o_to_n, meta, err_f) if len(new_classes) != 0: with open(os.path.join(migrate_root, 'new_classes.txt'), 'wb') as f: for cls in sorted(new_classes.values()): if 'C_' in cls: if not '$' in cls: f.write(('%s\n' % cls).encode()) else: try: int(cls.rsplit('$', 1)[1]) except ValueError: f.write(('%s\n' % cls).encode()) from SRGSorter import dump_tsrg dump_tsrg(srg, os.path.join(new_root, 'joined.tsrg'))