def setup(self): self.xform_writer = csv.writer(open(XFORM_FILENAME, 'w+b')) self.xform_writer.writerow(XFORM_HEADER) self.case_writer = csv.writer(open(CASE_FILE_NAME, 'w+b')) self.case_writer.writerow(CASE_HEADER) self.forms_accessor = FormAccessors(self.domain) self.case_accessors = CaseAccessors(self.domain)
def track_users(ids): """ Track users by id, writing to jsons folder. """ print 'tracking', len(ids), 'users' outf = io.open(make_output_file(), mode='wt', encoding='utf8') count = 0 for tweet in twutil.collect.track_user_ids(ids): try: outf.write(json.dumps(tweet, ensure_ascii=False, encoding='utf8')) outf.write(u'\n') outf.flush() count += 1 if count > 100000: outf.close() outf = io.open(make_output_file(), mode='wt', encoding='utf8') count = 0 except: e = sys.exc_info() print 'skipping error', e[0] print traceback.format_exc() twutil.collect.reinit() outf.close() track_users(ids) outf.close()
def main(dir): h = MyHTMLParser() getcharset_pattern = r'(?i)content=.*?charset=(.*?)["\']' for currentpath, folders, files in os.walk(dir): for f in files: if f.endswith('.html'): filename = os.path.join(currentpath, f) fr = open(filename, 'r') fc = fr.read() finds = re.findall(getcharset_pattern, fc) if len(finds)>0: htmlCharset = finds[0] else: htmlCharset ='' if htmlCharset == 'gb2312': fc = re.sub( getcharset_pattern, 'content="text/html; charset=utf-8"', fc) shutil.move(filename, filename+"~") with io.open(filename, 'w', encoding='utf-8') as fw: fw.write(fc.decode('gbk')) elif htmlCharset == 'utf-8': pass elif htmlCharset == 'iso-8859-1': fc = io.open(filename, 'r', encoding='iso-8859-1').read() fc = re.sub( getcharset_pattern, 'content="text/html; charset=utf-8"', fc) shutil.move(filename, filename+"~") with io.open(filename, 'w', encoding='utf-8') as fw: fw.write(h.unescape(fc))
def stage_data(self): # Write script remote_script = os.path.join(self.config.solver_dir, "process_synthetics.py") with io.open(utilities.get_script_file("process_synthetics"), "r") as fh: script_string = fh.readlines() script_string.insert(0, "#!{}\n".format(self.config.python_exec)) self.remote_machine.write_file(remote_script, "".join(script_string)) # Copy over pickle file. info = {"lowpass": self.iteration_info["lowpass"], "highpass": self.iteration_info["highpass"], "event_list": self.event_info.keys()} tmp_pickle = "tmp_pickle.p" remote_pickle = os.path.join(self.config.solver_dir, "info.p") with io.open(tmp_pickle, "wb") as fh: cPickle.dump(info, fh) self.remote_machine.put_file(tmp_pickle, remote_pickle) os.remove(tmp_pickle) # Copy sbatch file. remote_sbatch = os.path.join(self.config.solver_dir, "process_synthetics.sbatch") with io.open(utilities.get_template_file("sbatch"), "r") as fh: sbatch_string = fh.read().format(**self.sbatch_dict) self.remote_machine.write_file(remote_sbatch, sbatch_string)
def newoverwrite(s, filename, verbose=False): """Useful for not forcing re-compiles and thus playing nicely with the build system. This is acomplished by not writing the file if the existsing contents are exactly the same as what would be written out. Parameters ---------- s : str string contents of file to possible filename : str Path to file. vebose : bool, optional prints extra message """ if os.path.isfile(filename): with io.open(filename, 'rb') as f: old = f.read() if s == old: return else: dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) with io.open(filename, 'wb') as f: f.write(s.encode()) if verbose: print(" wrote " + filename)
def test_can_report_when_system_locale_is_ascii(monkeypatch): import io read, write = os.pipe() read = io.open(read, 'r', encoding='ascii') write = io.open(write, 'w', encoding='ascii') monkeypatch.setattr(sys, 'stdout', write) reporting.default(u"☃")
def test_freezer_nested(self): cfg = self.given_a_file_in_test_dir('buildout.cfg', '''\ [buildout] extends= http://example.com/buildout.cfg ''') expected1 = '''\ [buildout] extends= external_buildouts/example.com_buildout.cfg ''' expected2 = '''\ # File managed by freeze command from buildout_helpers # Changes will be overwritten # ETAG: None # ORIGIN: http://example.com/buildout.cfg [buildout] extends= example.com_buildout2.cfg ''' with requests_mock.mock() as m: m.get('http://example.com/buildout.cfg', text='''[buildout] extends= buildout2.cfg ''') m.get('http://example.com/buildout2.cfg', text='''[buildout]''') freeze(Config(cfg)) abs_dir, _ = os.path.split(cfg) new_file_contents = open(os.path.join(abs_dir, 'external_buildouts', 'example.com_buildout.cfg'), 'r').read() old_file_contents = open(cfg, 'r').read() self.assertEqual(old_file_contents, expected1) self.assertEqual(new_file_contents, expected2)
def stemminig(self, doc): doc1 = doc suffFileDirec = "suffixes.txt" try: suffixFile = io.open(suffFileDirec, "r", encoding='utf-8').read() except UnicodeDecodeError: suffixFile = io.open(suffFileDirec, "r", encoding='latin-1').read() suffixList = suffixFile.split() doc.sort() wordList = doc i = 0 while (i < len(wordList) - 1): j = i + 1 while (j < len(wordList)): benchWord = wordList[i] checkWord = wordList[j] benchCharList = list(benchWord) checkCharList = list(checkWord) if (checkWord.startswith(benchWord)): for suffix in suffixList: if checkWord.endswith(suffix): print(benchWord+" = "+checkWord) wordList[j] = benchWord break j += 1 i += 1
def test_install_pre_commit(tmpdir_factory): path = git_dir(tmpdir_factory) runner = Runner(path) ret = install(runner) assert ret == 0 assert os.path.exists(runner.pre_commit_path) pre_commit_contents = io.open(runner.pre_commit_path).read() pre_commit_script = resource_filename('hook-tmpl') expected_contents = io.open(pre_commit_script).read().format( sys_executable=sys.executable, hook_type='pre-commit', pre_push='' ) assert pre_commit_contents == expected_contents assert os.access(runner.pre_commit_path, os.X_OK) ret = install(runner, hook_type='pre-push') assert ret == 0 assert os.path.exists(runner.pre_push_path) pre_push_contents = io.open(runner.pre_push_path).read() pre_push_tmpl = resource_filename('pre-push-tmpl') pre_push_template_contents = io.open(pre_push_tmpl).read() expected_contents = io.open(pre_commit_script).read().format( sys_executable=sys.executable, hook_type='pre-push', pre_push=pre_push_template_contents, ) assert pre_push_contents == expected_contents
def export(self, ole, export_path): if not self.safe_makedir(export_path): return for stream in ole.listdir(streams=True, storages=True): try: stream_content = ole.openstream(stream).read() except Exception as e: self.log('warning', "Unable to open stream {0}: {1}".format(string_clean('/'.join(stream)), e)) continue store_path = os.path.join(export_path, string_clean('-'.join(stream))) flash_objects = self.detect_flash(ole.openstream(stream).read()) if len(flash_objects) > 0: self.log('info', "Saving Flash objects...") count = 1 for header, flash_object in flash_objects: self.print_swf_header_info(header) save_path = '{0}-FLASH-Decompressed{1}'.format(store_path, count) with open(save_path, 'wb') as flash_out: flash_out.write(flash_object) self.log('item', "Saved Decompressed Flash File to {0}".format(save_path)) count += 1 with open(store_path, 'wb') as out: out.write(stream_content) self.log('info', "Saved stream to {0}".format(store_path)) ole.close()
def decrypt(key, ct_path="ciphertext.enc", savePT="plaintext.dec"): with open(ct_path, 'rb') as input: u = dill.load(input) ciphertext = dill.load(input) v = modexp(u, key.x, key.p) uv = str(u)+str(v) k = SHA224.new(uv.encode('utf-8')).hexdigest().encode('utf-8') #symmetric key for compute the ciphertext with AES print("K: "+str(k)) bs = Blowfish.block_size iv = ciphertext[:bs] # Remove IV ciphertext = ciphertext[bs:] print("CT-LEN:"+str(len(ciphertext))) cipher = Blowfish.new(k, Blowfish.MODE_CBC, iv) plaintext = cipher.decrypt(ciphertext) # Remove padding last_byte = plaintext[-1] plaintext = plaintext[:- (last_byte if type(last_byte) is int else ord(last_byte))] # Write to file the plaintext decrypted #plaintext = plaintext.decode(plaintext, key.iNumBits) io.open(savePT,"wb").write(plaintext) return plaintext
def merge_data_from_crawl(): #To refactor far too long category_file = sorted(os.listdir(os.getcwd()+"/data/data_from_crawl")) movies_category = [file_name for file_name in category_file if not any(c.isdigit() for c in file_name)] for category in movies_category: if category.endswith('.json'): category = category[:-5] if category.endswith('die'): movies_with_part = [file_name for file_name in category_file if (len(file_name) < 30 and category in file_name)] else: movies_with_part = [file_name for file_name in category_file if category in file_name] if len(movies_with_part) > 1: all_part_data = {} for movies_part in movies_with_part: with open('data/data_from_crawl/' + movies_part) as part: data = json.load(part) for movie_title, movie_data in data.iteritems(): all_part_data[movie_title] = movie_data with io.open("data/data_to_use/" + movies_with_part[0] , "w+", encoding='utf8') as all_part: output = json.dumps(all_part_data, ensure_ascii=False, encoding='utf8') all_part.write(unicode(output)) if len(movies_with_part) == 1 and movies_with_part[0][0] != '.': with open('data/data_from_crawl/' + movies_with_part[0]) as part: data = json.load(part) with io.open("data/data_to_use/" + movies_with_part[0] , "w+", encoding='utf8') as all_part: output = json.dumps(data, ensure_ascii=False, encoding='utf8') all_part.write(unicode(output))
def __init__(self, sensor_name, bus, addr=_DEFAULT_ADDRESS): '''Initializes the sensor with some default values. bus: The SMBus descriptor on which this sensor is attached. addr: The I2C bus address (default is 0x40). ''' SensorBase.__init__( self, sensor_name=sensor_name) self._ior = io.open('/dev/i2c-' + str(bus), 'rb', buffering=0) self._iow = io.open('/dev/i2c-' + str(bus), 'wb', buffering=0) fcntl.ioctl(self._ior, _I2C_SLAVE, addr) fcntl.ioctl(self._iow, _I2C_SLAVE, addr) self._resolution = RESOLUTION_12BITS self._onchip_heater = _DISABLE_ONCHIP_HEATER self._otp_reload = _DISABLE_OTP_RELOAD self._use_temperature = True self._reset() self._reconfigure()
def edit_changes(file, data, msg="Update to version [% VERSION_DEB %]"): """ Prepend the most simplistic but syntactically correct debian changelog entry. No changelog body text. If an identical entry (except for the timestamp) is already there, we just update the timestamp. """ entry_fmt = """------------------------------------------------------------------- [% DATE_RPM %] - [% MAINTAINER_EMAIL %] - """+msg+""" """ entry = subst_variables(entry_fmt, data) txt = '' if os.path.isfile(file): txt = open(file, encoding="latin-1").read() txt2 = re.sub(r'^.*\n','', txt) txt2 = re.sub(r'^.*\n','', txt2) # cut away first 2 lines. ent2 = re.sub(r'^.*\n','', entry) ent2 = re.sub(r'^.*\n','', ent2) # cut away first 2 lines. if txt2.startswith(ent2): txt = txt2[len(ent2):] txt = entry + txt out=open(file, "w", encoding="latin-1") out.write(txt) out.close()
def batch_sort(input, output, buffer_size=32000, tempdirs=None): if tempdirs is None: tempdirs = [] if not tempdirs: tempdirs.append(gettempdir()) chunks = [] try: with io.open(input,mode='r',buffering=64*1024, encoding='utf8') as input_file: print(u"Opened input {0}".format(input)) input_iterator = iter(input_file) for tempdir in cycle(tempdirs): current_chunk = list(islice(input_iterator,buffer_size)) if not current_chunk: break current_chunk.sort(key=keyfunc) fname = '%06i' % len(chunks) output_chunk = io.open(os.path.join(tempdir,fname),mode='w+',buffering=64*1024, encoding='utf8') print(u"Writing tempfile {0}/{1}".format(tempdir, fname)) chunks.append(output_chunk) output_chunk.writelines(current_chunk) output_chunk.flush() output_chunk.seek(0) print(u"Writing outfile {0}".format(output)) with io.open(output,mode='w',buffering=64*1024, encoding='utf8') as output_file: output_file.writelines(merge(*chunks)) finally: for chunk in chunks: try: chunk.close() os.remove(chunk.name) except Exception: print(u"Exception when closing chunk") pass
def main(): complrepldict=OrderedDict() # STEP 1.1: add all multi-word expressions from a dictionary of the language in question ('s ochtends --> 's_ochtends) alldutchwords=[line.strip() for line in open(nlwoordenbestand,mode="r",encoding="utf-8")] complrepldict.update(replacespaces(alldutchwords)) # STEP 1.2: add all own rules (column 0 and 1 from user-generated TAB file) complrepldict.update(replaceown(ownreplacements,0,1)) # STEP 1.3: save output to general replacement list with open(outputbestand,mode="w",encoding="utf-8") as fo: fo.write(unicode(json.dumps(complrepldict,ensure_ascii=False))) print "\nFinished writing",outputbestand print "YOU'RE READY WITH THE GENERAL REPLACEMENT LIST!\n" # STEP 2.1: Add rules that only have to be replaced if already one replacement according to rule above has taken # place. Example: Jan Smit is replaced by jan_smit (1.2), now, also subsequent mentions of Smit (without Jan) # should be replaced by jan_smit complrepldict2=OrderedDict() complrepldict2.update(replaceownlistoutput(ownreplacements,2,1)) # STEP 2.2: save output to second-mention-output file with open(outputbestand2,mode="w",encoding="utf-8") as fo: fo.write(unicode(json.dumps(complrepldict2,ensure_ascii=False))) print "\nFinished writing",outputbestand2 print "YOU'RE READY WITH THE REPLACEMENT LIST FOR SECOND MENTIONS (e.g., LAST NAMES/FULL NAMES)!\n" # STEP 3.1: Add rules for replacements that only have to take place if some other expression occurs in the article complrepldict3=OrderedDict() complrepldict3.update(replaceownindien(ownreplacements,2,1,3)) with open(outputbestand3,mode="w",encoding="utf-8") as fo: fo.write(unicode(json.dumps(complrepldict3,ensure_ascii=False))) print "\nFinished writing",outputbestand3 print "YOU'RE READY WITH THE REPLACEMENT LIST FOR REPLACEMENTST IN CASE OF INDICATORS BEING PRESENT!\n"
def edit_debchangelog(file, data, msg="Update to version [% VERSION_DEB %]"): """ Prepend the most simplistic but syntactically correct debian changelog entry. No changelog body text. If an identical entry (except for the timestamp) is already there, we just update the timestamp. """ entry_fmt = """[% PACKNAME %] ([% VERSION_DEB %]-[% BUILDRELEASE_DEB %]) stable; urgency=low * """+msg+""" -- [% MAINTAINER_NAME %] <[% MAINTAINER_EMAIL %]> """ entry = subst_variables(entry_fmt, data) txt = '' if os.path.isfile(file): txt = open(file, encoding="latin-1").read() if txt.startswith(entry): txt = txt[len(entry):] txt = re.sub(r'^.*','', txt) # zap the timestamp txt = re.sub(r'^[\s*\n]*','', txt, re.M) # zap leading newlines and whitespaces entry += data['DATE_DEB'] + "\n\n" txt = entry + txt out=open(file, "w", encoding="latin-1") out.write(txt) out.close()
def process_ebuild(ebuild, ops, arch_status=None, verbose=0, quiet=0, dry_run=False, style='color-inline', manifest=False): """Process |ops| for |ebuild| Args: ebuild: The ebuild file to operate on & update in place ops: An iterable of operations (Op objects) to perform on |ebuild| arch_status: A dict mapping default arches to their stability; see the load_profile_data function for more details verbose: Be verbose; show various status messages quiet: Be quiet; only show errors dry_run: Do not make any changes to |ebuild|; show what would be done style: The diff style Returns: Whether any updates were processed """ with io.open(ebuild, encoding='utf8') as f: updated, content = process_content( ebuild, f, ops, arch_status=arch_status, verbose=verbose, quiet=quiet, style=style) if updated and not dry_run: with io.open(ebuild, 'w', encoding='utf8') as f: f.writelines(content) if manifest: subprocess.check_call(['ebuild', ebuild, 'manifest']) return updated
def __init__(self): super(StreamingHttpServer, self).__init__( ('', HTTP_PORT), StreamingHttpHandler) with io.open('index.html', 'r') as f: self.index_template = f.read() with io.open('jsmpg.js', 'r') as f: self.jsmpg_content = f.read()
def testBasicIO(self): for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65): for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le": f = io.open(test_support.TESTFN, "w+", encoding=enc) f._CHUNK_SIZE = chunksize self.assertEquals(f.write(u"abc"), 3) f.close() f = io.open(test_support.TESTFN, "r+", encoding=enc) f._CHUNK_SIZE = chunksize self.assertEquals(f.tell(), 0) self.assertEquals(f.read(), u"abc") cookie = f.tell() self.assertEquals(f.seek(0), 0) self.assertEquals(f.read(2), u"ab") self.assertEquals(f.read(1), u"c") self.assertEquals(f.read(1), u"") self.assertEquals(f.read(), u"") self.assertEquals(f.tell(), cookie) self.assertEquals(f.seek(0), 0) self.assertEquals(f.seek(0, 2), cookie) self.assertEquals(f.write(u"def"), 3) self.assertEquals(f.seek(cookie), cookie) self.assertEquals(f.read(), u"def") if enc.startswith("utf"): self.multi_line_test(f, enc) f.close()
def write_manifest(self): with open(os.path.join("meta", "def.json"), "r") as tmpl: def_json = os.path.join(self.build_dir, "def.json") with open(def_json, "wt", encoding="utf-8", newline="\n") as f: f.write("\ufeff") f.write(re.sub(r"\{(?=[^\w\{])|(?<=[^\w\}])\}", r"\g<0>\g<0>", tmpl.read()).format(**self.config))
def execute_nb(src, dst, allow_errors=False, timeout=1000, kernel_name=''): """ Execute notebook in `src` and write the output to `dst` Parameters ---------- src, dst: str path to notebook allow_errors: bool timeout: int kernel_name: str defualts to value set in notebook metadata Returns ------- dst: str """ import nbformat from nbconvert.preprocessors import ExecutePreprocessor with io.open(src, encoding='utf-8') as f: nb = nbformat.read(f, as_version=4) ep = ExecutePreprocessor(allow_errors=allow_errors, timeout=timeout, kernel_name=kernel_name) ep.preprocess(nb, resources={}) with io.open(dst, 'wt', encoding='utf-8') as f: nbformat.write(nb, f) return dst
def test_2to3_user_mode(self, test_env): settings = dict( name='foo', packages=['foo'], use_2to3=True, version='0.0', ) dist = Distribution(settings) dist.script_name = 'setup.py' cmd = develop(dist) cmd.user = 1 cmd.ensure_finalized() cmd.install_dir = site.USER_SITE cmd.user = 1 with contexts.quiet(): cmd.run() # let's see if we got our egg link at the right place content = os.listdir(site.USER_SITE) content.sort() assert content == ['easy-install.pth', 'foo.egg-link'] # Check that we are using the right code. fn = os.path.join(site.USER_SITE, 'foo.egg-link') with io.open(fn) as egg_link_file: path = egg_link_file.read().split()[0].strip() fn = os.path.join(path, 'foo', '__init__.py') with io.open(fn) as init_file: init = init_file.read().strip() expected = 'print("foo")' if six.PY3 else 'print "foo"' assert init == expected
def save_data_4_nn_k_words(dataset, path2OutDir, k_words= -1, tf_idf_vectorizer = None, data_name = "cnn"): if os.path.isdir(path2OutDir) is False: os.makedirs(path2OutDir) if os.path.isdir(path2OutDir + "/content") is False: os.makedirs(path2OutDir + "/content") if os.path.isdir(path2OutDir + "/summary") is False: os.makedirs(path2OutDir + "/summary") fo_content = io.open(path2OutDir + "/content/{0}words_{1}line.content".format(k_words,len(dataset)), "w", encoding='utf8') fo_sum = io.open(path2OutDir + "/summary/{0}words_{1}line.summary".format(k_words,len(dataset)), "w", encoding='utf8') if k_words != -1: if tf_idf_vectorizer is None: with open("exsum/tf_idf_vectorizer_100_01.pickle", mode="rb") as f: tf_idf_vectorizer = pickle.load(f) print "Saving ", k_words, " sent ..." progress_bar = ProgressBar(len(dataset)) for filename, content, highlights in dataset: selected_sents = select_k_words(content,tf_idf_vectorizer, k_words) highlights = filter_sent(highlights) fo_content.write(" ".join(selected_sents).replace("\n"," ") + u"\n") fo_sum.write(" ".join(highlights).replace("\n"," ") + u"\n") progress_bar.Increment() else: print "Saving all sent ..." progress_bar = ProgressBar(len(dataset)) for filename, content, highlights in dataset: highlights = filter_sent(highlights) fo_content.write(" ".join(content).replace("\n"," ") + "\n") fo_sum.write(" ".join(highlights).replace("\n"," ") + "\n") progress_bar.Increment() fo_content.close() fo_sum.close()
def process_output_file(output_dict, output_filename): import json import datetime import io import sys global total_processed global first_sleep if 'errors' not in output_dict: if first_sleep: with io.open(output_filename, 'w', encoding='utf-8') as f: first_sleep = False for tweet in output_dict: f.write(unicode(json.dumps(tweet, ensure_ascii=False))) f.write(u"\n") total_processed+=1 else: with io.open(output_filename, 'a', encoding='utf-8') as f: for tweet in output_dict: f.write(unicode(json.dumps(tweet, ensure_ascii=False))) f.write(u"\n") total_processed+=1 timenow = datetime.datetime.today().strftime("%H:%M:%S") msg = "%s processed at %s, rows %d"%(output_filename, timenow, total_processed) print msg logging.info(msg) sys.stdout.flush() output_dict = []
def test_main(self): encoding = sys.getfilesystemencoding() # sys.stdout = StringIO() sys.stdout = io.BytesIO() sys.stdin = io.BytesIO(b'hello world') sys.stdin.buffer = sys.stdin _main([sys.argv[0], '-t', 'zh-CN']) self.assertEqual(u'你好世界\n'.encode(encoding), sys.stdout.getvalue()) sys.stdout = io.BytesIO() sys.stdin = io.BytesIO(u'你好'.encode(encoding)) sys.stdin.buffer = sys.stdin _main([sys.argv[0], '-t', 'en']) self.assertEqual(u'Hello\n'.encode(encoding), sys.stdout.getvalue()) sys.stdout = io.BytesIO() sys.stdin = io.BytesIO(b'hello world') sys.stdin.buffer = sys.stdin _main([sys.argv[0], '-t', 'zh-CN', '-o', 'utf-8']) self.assertEqual(u'你好世界\n'.encode('utf-8'), sys.stdout.getvalue()) sys.stdout = io.BytesIO() sys.stdin = io.BytesIO(u'你好'.encode('utf-8')) sys.stdin.buffer = sys.stdin _main([sys.argv[0], '-t', 'en', '-i', 'utf-8']) self.assertEqual(u'Hello\n'.encode(encoding), sys.stdout.getvalue()) sys.stdout = io.BytesIO() with open('for_test.tmp', 'w') as f: f.write('hello world') _main([sys.argv[0], '-t', 'zh-CN', f.name]) self.assertEqual(u'你好世界\n'.encode(encoding), sys.stdout.getvalue()) sys.stdout = io.BytesIO() with open('for_test.tmp', 'w') as f: f.write('hello world') _main([sys.argv[0], '-t', 'zh-CN', '-o', 'utf-8', f.name]) self.assertEqual(u'你好世界\n'.encode('utf-8'), sys.stdout.getvalue()) sys.stdout = io.BytesIO() with io.open('for_test.tmp', 'w', encoding=encoding) as f: f.write(u'你好') _main([sys.argv[0], '-t', 'en', f.name]) self.assertEqual(u'Hello\n'.encode(encoding), sys.stdout.getvalue()) sys.stdout = io.BytesIO() with io.open('for_test.tmp', 'w', encoding='utf-8') as f: f.write(u'你好') _main([sys.argv[0], '-t', 'en', '-i', 'utf-8', f.name]) self.assertEqual(u'Hello\n'.encode(encoding), sys.stdout.getvalue()) sys.stdout = io.BytesIO() with io.open('for_test.tmp', 'w', encoding='utf-8') as f: f.write(u'你好') with io.open('for_test_2.tmp', 'w', encoding='utf-8') as f2: f2.write(u'世界') _main([sys.argv[0], '-t', 'en', '-i', 'utf-8', f.name, f2.name]) self.assertEqual(u'Hello\nWorld\n'.encode(encoding), sys.stdout.getvalue())
def _get_redirects(self): """Determine which i/o streams to attach.""" input_streams, output_streams = [], [] # explicit redirects # input redirects infile_params = self.get('input').split(u':') if infile_params[0].upper() in (u'STDIO', u'STDIN'): if u'RAW' in (_x.upper() for _x in infile_params): input_streams.append(stdin.buffer) else: input_streams.append(stdin) else: if len(infile_params) > 1 and infile_params[0].upper() == u'FILE': infile = infile_params[1] else: infile = infile_params[0] if infile: try: input_streams.append(io.open(infile, 'rb')) except EnvironmentError as e: logging.warning(u'Could not open input file %s: %s', infile, e.strerror) # output redirects outfile_params = self.get('output').split(u':') if outfile_params[0].upper() in (u'STDIO', u'STDOUT'): if u'RAW' in (_x.upper() for _x in outfile_params): output_streams.append(stdout.buffer) else: output_streams.append(stdout) else: if len(outfile_params) > 1 and outfile_params[0].upper() == u'FILE': outfile_params = outfile_params[1:] outfile = outfile_params[0] append = len(outfile_params) > 1 and outfile_params[1].lower() == u'append' if outfile: try: output_streams.append(io.open(outfile, 'ab' if append else 'wb')) except EnvironmentError as e: logging.warning(u'Could not open output file %s: %s', outfile, e.strerror) # implicit stdio redirects # add stdio if redirected or no interface if stdin not in input_streams and stdin.buffer not in input_streams: if IS_CONSOLE_APP and not stdin.isatty(): # redirected on console; use bytes stream input_streams.append(stdin.buffer) elif IS_CONSOLE_APP and not self.interface: # no interface & on console; use unicode stream input_streams.append(stdin) # redirect output as well if input is redirected, but not the other way around # this is because (1) GW-BASIC does this from the DOS prompt # (2) otherwise we don't see anything - we quit after input closes # isatty is also false if we run as a GUI exe, so check that here if stdout not in output_streams and stdout.buffer not in output_streams: if IS_CONSOLE_APP and (not stdout.isatty() or not stdin.isatty()): output_streams.append(stdout.buffer) elif IS_CONSOLE_APP and not self.interface: output_streams.append(stdout) return { 'output_streams': output_streams, 'input_streams': input_streams, }
def file(self, opts, filename, encoding): """Return a context manager for writing to. If you set encoding to "binary" or False, the file is opened in binary mode and you should encode the data you write yourself. """ if not filename or filename == '-': filename, mode = sys.stdout.fileno(), 'w' else: if filename not in self._seen_filenames: self._seen_filenames.add(filename) if opts.backup_suffix and os.path.exists(filename): shutil.copy(filename, filename + opts.backup_suffix) mode = 'w' else: mode = 'a' if encoding in (False, "binary"): f = io.open(filename, mode + 'b') else: f = io.open(filename, mode, encoding=encoding) try: yield f finally: f.close()
def do_apply(self): if not self.netconf.locked("dhcp") or self.netconf.ip4_changed: if self.netconf.ip4_changed: self.do_remove() dhcp_config, existing_subnet = self._read_dhcp_config() subnet = self._generate_subnet_config() # if subnet != self.existing_subnet: f = open(DHCP_CONFIG_FILE, "w") f.write(dhcp_config) f.write(subnet) f.close() cmd = [have("dhcpd3") or have("dhcpd"), "-pf", "/var/run/dhcpd-server/dhcp.pan1.pid", "pan1"] p = Popen(cmd) ret = p.wait() if ret == 0: dprint("dhcpd started correctly") f = open("/var/run/dhcp-server/dhcpd.pan1.pid", "r") self.pid = int(f.read()) f.close() dprint("pid", self.pid) self.netconf.lock("dhcp") else: raise Exception("dhcpd failed to start. Check the system log for errors")
def test_close_flushes(self): f = io.open(test_support.TESTFN, "wb") f.write(b"xxx") f.close() f = io.open(test_support.TESTFN, "rb") self.assertEqual(f.read(), b"xxx") f.close()
print("You could try running under WSL.", file=sys.stderr) sys.exit(1) def dirty(): for extra in ([], ['--cached'],): cmd = ['git', 'diff-index', '--quiet', 'HEAD'] + extra try: check_call(cmd) except CalledProcessError as e: if e.returncode == 1: return '.dirty' else: raise return '' with open('README.md', 'r', encoding='utf-8') as fh: long_description = [] for line in fh: if not line.startswith('[PyPI]') and not line.startswith('`pip'): long_description.append(line) long_description = ''.join(long_description) if exists('PKG-INFO'): with open('PKG-INFO', 'r', encoding='utf-8') as fh: for line in fh: if line.startswith('Version: '): version = line.strip().split()[1] break else: version = datetime.utcnow().strftime('%Y.%m.%d') env_version = os.environ.get('ACCELERATOR_BUILD_VERSION')
def from_indexpath(cls, indexpath): with io.open(indexpath, 'rb') as file: return pickle.load(file)
def write_pattern_file(template_rendered): with io.open(os.path.join(DIR, 'pattern.py'), mode='w', encoding='utf-8') as fh: fh.write(template_rendered)
def read_template(): with io.open(os.path.join(DIR, 'pattern_template.py'), mode='r', encoding='utf-8') as fh: return fh.read()
import folium import pandas, io data = pandas.read_csv("Volcanoes.txt") data_json = io.open('world.json', 'r', encoding='utf-8-sig').read() map = folium.Map(location=[38.2, -99.1], zoom_start=6, titles="Mapbox Bright") fgv = folium.FeatureGroup(name="Volcanoes") lat = list(data['LAT']) lon = list(data['LON']) elev = list(data['ELEV']) def color_producer(elevation): if elevation < 1000: return 'green' elif 1000 <= elevation < 3000: return 'orange' else: return 'red' for i, j, k in zip(lat, lon, elev): fgv.add_child( folium.CircleMarker(location=[i, j], radius=6, popup=k, fill_color=color_producer(k), color='grey', fill_opacity=0.7)) #fg.add_child(folium.GeoJson(data=open('world.json', 'r', encoding='utf-8-sig'), style_function=lambda x: {'fillColor':'yellow'})) fgp = folium.FeatureGroup(name="Population")
def __call__(self, url, filename=None): self.fetched = url fn = url_to_filename(url) with open(fn, 'r', encoding="utf8") as f: content = f.read() return content
pass def run(self): rcfile = os.path.abspath('.pylintrc') standaloneModules = [m for m in []] cli_options = ['-E'] if self.errorsonly else [] cli_options.append('--output-format={0}'.format(self.format)) errno = subprocess.call([sys.executable, '-m', 'pylint', '--rcfile={}'.format(rcfile), '--output-format=colorized'] + cli_options + ['pychroot'] + standaloneModules) raise SystemExit(errno) test_requirements = ['pytest'] if sys.hexversion < 0x03030000: test_requirements.append('mock') with open('README.rst', 'r', encoding='utf-8') as f: readme = f.read() with open('NEWS.rst', 'r', encoding='utf-8') as f: news = f.read() setup( name='pychroot', version=pkgdist.version(), description='a python library and cli tool that simplify chroot handling', long_description=readme + '\n\n' + news, author='Tim Harder', author_email='*****@*****.**', url='https://github.com/pkgcore/pychroot', license='BSD', packages=find_packages(), scripts=os.listdir('bin'),
def get_from_cache(url, cache_dir=None, force_download=False, proxies=None, etag_timeout=10, resume_download=False): """ Given a URL, look for the corresponding dataset in the local cache. If it's not there, download it. Then return the path to the cached file. """ if cache_dir is None: cache_dir = TRANSFORMERS_CACHE if sys.version_info[0] == 3 and isinstance(cache_dir, Path): cache_dir = str(cache_dir) if sys.version_info[0] == 2 and not isinstance(cache_dir, str): cache_dir = str(cache_dir) if not os.path.exists(cache_dir): os.makedirs(cache_dir) # Get eTag to add to filename, if it exists. if url.startswith("s3://"): etag = s3_etag(url, proxies=proxies) else: try: response = requests.head( url, allow_redirects=True, proxies=proxies, timeout=etag_timeout) if response.status_code != 200: etag = None else: etag = response.headers.get("ETag") except (EnvironmentError, requests.exceptions.Timeout): etag = None if sys.version_info[0] == 2 and etag is not None: etag = etag.decode('utf-8') filename = url_to_filename(url, etag) # get cache path to put the file cache_path = os.path.join(cache_dir, filename) # If we don't have a connection (etag is None) and can't identify the file # try to get the last downloaded one if not os.path.exists(cache_path) and etag is None: matching_files = fnmatch.filter(os.listdir(cache_dir), filename + '.*') matching_files = list( filter(lambda s: not s.endswith('.json'), matching_files)) if matching_files: cache_path = os.path.join(cache_dir, matching_files[-1]) if resume_download: incomplete_path = cache_path + '.incomplete' @contextmanager def _resumable_file_manager(): with open(incomplete_path, 'a+b') as f: yield f os.remove(incomplete_path) temp_file_manager = _resumable_file_manager if os.path.exists(incomplete_path): resume_size = os.stat(incomplete_path).st_size else: resume_size = 0 else: temp_file_manager = tempfile.NamedTemporaryFile resume_size = 0 if not os.path.exists(cache_path) or force_download: # Download to temporary file, then copy to cache dir once finished. # Otherwise you get corrupt cache entries if the download gets interrupted. with temp_file_manager() as temp_file: logger.info( "%s not found in cache or force_download set to True, downloading to %s", url, temp_file.name) # GET file object if url.startswith("s3://"): if resume_download: logger.warn( 'Warning: resumable downloads are not implemented for "s3://" urls') s3_get(url, temp_file, proxies=proxies) else: http_get(url, temp_file, proxies=proxies, resume_size=resume_size) # we are copying the file before closing it, so flush to avoid truncation temp_file.flush() # shutil.copyfileobj() starts at the current position, so go to the start temp_file.seek(0) logger.info("copying %s to cache at %s", temp_file.name, cache_path) with open(cache_path, 'wb') as cache_file: shutil.copyfileobj(temp_file, cache_file) logger.info("creating metadata file for %s", cache_path) meta = {'url': url, 'etag': etag} meta_path = cache_path + '.json' with open(meta_path, 'w') as meta_file: output_string = json.dumps(meta) if sys.version_info[0] == 2 and isinstance(output_string, str): # The beauty of python 2 output_string = unicode(output_string, 'utf-8') meta_file.write(output_string) logger.info("removing temp file %s", temp_file.name) return cache_path
""" description = "do nothing" user_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): pass with open("README.rst", "r", encoding="utf-8") as readme: long_description = readme.read() setup( name='zopfli', version='0.1.4', author='Adam DePrince', author_email='*****@*****.**', maintainer='Cosimo Lupo', maintainer_email='*****@*****.**', description='Zopfli module for python', long_description=long_description, ext_modules=[ Extension('zopfli.zopfli', sources=[
def _resumable_file_manager(): with open(incomplete_path, 'a+b') as f: yield f os.remove(incomplete_path)
# What packages are optional? EXTRAS = { # "fancy feature": ["django"], } # The rest you shouldn"t have to touch too much :) # ------------------------------------------------ # Except, perhaps the License and Trove Classifiers! # If you do change the License, remember to change the Trove Classifier for # that! HERE = os.path.abspath(os.path.dirname(__file__)) # Import the README and use it as the long-description. # Note: this will only work if "README.md" is present in your MANIFEST.in file! try: with io.open(os.path.join(HERE, "README.md"), encoding="utf-8") as f: LONG_DESCRIPTION = "\n" + f.read() except FileNotFoundError: LONG_DESCRIPTION = DESCRIPTION # Load the package"s __version__.py module as a dictionary. ABOUT = {} if not VERSION: PROJECT_SLUG = NAME.lower().replace("-", "_").replace(" ", "_") with open(os.path.join(HERE, PROJECT_SLUG, "__version__.py")) as f: exec(f.read(), ABOUT) # pylint: disable=exec-used else: ABOUT["__version__"] = VERSION class UploadCommand(Command):
'country_code', 'default', 'best', 'start_date', 'end_date', 'overlay', 'available_projections', 'attribution', 'icon']: thing = properties.get(f) if thing is not None: converted[f] = thing for f in ['min_zoom', 'max_zoom']: thing = properties.get(f) if thing is not None: extent_obj[f] = thing if extent_obj: converted['extent'] = extent_obj return converted parser = argparse.ArgumentParser(description='Generate legacy json output format from geojosn format sources') parser.add_argument('files', metavar='F', nargs='+', help='file(s) to process') parser.add_argument('-b', dest='gen_bbox', action='store_true', help='generate bounding boxes from polygons') parser.add_argument('-t', dest='tms_only', action='store_true', help='only include tile servers') parser.add_argument('-r', dest='remove_polygons', action='store_true', help='remove polygons from output, typically used together with -b') args = parser.parse_args() features = [] for file in args.files: with io.open(file, 'r') as f: features.append(convert_json_source(args, json.load(f))) print(json.dumps(features, sort_keys=True, separators=(',', ':'), ensure_ascii=False).encode('utf-8'))
def handle(self, **options): target = options.pop('directory') # 先获取原内容 if PY_VER[0] == '2': old_file = open('config/default.py') else: old_file = open('config/default.py', encoding='utf-8') # if some directory is given, make sure it's nicely expanded top_dir = path.abspath(path.expanduser(target)) if not path.exists(top_dir): raise CommandError("Destination directory '%s' does not " "exist, please init first." % top_dir) if not path.exists(path.join(top_dir, 'manage.py')): raise CommandError("Current directory '%s' is not " "a django project dir, please init first. " "(bk-admin init ${app_code})" % top_dir) base_subdir = 'weixin_template' append_file_tuple = (('', 'requirements.txt'), ) # Setup a stub settings environment for template rendering if not settings.configured: settings.configure() django.setup() template_dir = path.join(blueapps.__path__[0], 'conf', base_subdir) run_ver = None conf_file = open(path.join(os.getcwd(), 'config', '__init__.py')) for line in conf_file.readlines(): if line.startswith('RUN_VER'): run_ver = line[11:-2] conf_file.close() prefix_length = len(template_dir) + 1 for root, dirs, files in os.walk(template_dir): relative_dir = root[prefix_length:] target_dir = path.join(top_dir, relative_dir) if not path.exists(target_dir): os.mkdir(target_dir) flag = root.endswith('sites') for dirname in dirs[:]: if (dirname.startswith('.') or # noqa dirname == '__pycache__' or # noqa (flag and dirname != run_ver)): dirs.remove(dirname) for filename in files: if filename.endswith(('.pyo', '.pyc', '.py.class', '.json')): # Ignore some files as they cause various breakages. continue old_path = path.join(root, filename) new_path = path.join(top_dir, relative_dir, filename) for old_suffix, new_suffix in self.rewrite_template_suffixes: if new_path.endswith(old_suffix): new_path = new_path[:-len(old_suffix)] + new_suffix break # Only rewrite once with io.open(old_path, 'rb') as template_file: content = template_file.read() w_mode = 'wb' for _root, _filename in append_file_tuple: if _root == relative_dir and _filename == filename: w_mode = 'ab' with io.open(new_path, w_mode) as new_file: new_file.write(content) try: shutil.copymode(old_path, new_path) self.make_writeable(new_path) except OSError: self.stderr.write( "Notice: Couldn't set permission bits on %s. You're " "probably using an uncommon filesystem setup. No " "problem." % new_path, self.style.NOTICE) # 修改文件 modify_default_file(old_file)
from io import open from setuptools import setup, find_packages # from pip.req import parse_requirements with open("requirements.txt") as f: install_requires = f.read().strip().split("\n") setup( name="fast_bert", version="1.6.5", description="AI Library using BERT", author="Kaushal Trivedi", author_email="*****@*****.**", license="Apache2", url="https://github.com/kaushaltrivedi/fast-bert", long_description=open("README.md", "r", encoding="utf-8").read(), long_description_content_type="text/markdown", keywords="BERT NLP deep learning google", packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), install_requires=install_requires, classifiers=[ "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], zip_safe=False, )
def instantiate_from(self, filename): datadir = os.environ.get('FHIR_UNITTEST_DATADIR') or '' with io.open(os.path.join(datadir, filename), 'r', encoding='utf-8') as handle: js = json.load(handle) self.assertEqual("ResearchDefinition", js["resourceType"]) return researchdefinition.ResearchDefinition(js)
browser.get(song['url']) # 先评论 while True: easy2hide_notice = browser.find_element_by_css_selector('.easy2hide_notice') if easy2hide_notice: time.sleep(10) now_time = datetime.datetime.now() now_time_str = datetime.datetime.strftime(now_time,'%Y%m%d_%H%M%S') comment = browser.find_element_by_css_selector('#comment') comment.send_keys(now_time_str) comment.submit() else: break indexFile = io.open(indexPath, 'a', encoding='utf-8') indexFile.write('\n') indexFile.close() imgELs = browser.find_elements_by_css_selector('.wp-caption.aligncenter a') # 遍历图 for imgEL in imgELs: # imgName = imgEL.get_attribute('title') # 图片名 imgURL = imgEL.get_attribute('href') imgPath = os.path.join(songPath, os.path.basename(imgURL)) indexFile = io.open(indexPath, 'a', encoding='utf-8') indexFile.write(imgPath+'\n') indexFile.close() # check file
def read(filename): filename = os.path.join(os.path.dirname(__file__), filename) text_type = type(u"") with io.open(filename, mode="r", encoding='utf-8') as fd: return re.sub(text_type(r':[a-z]+:`~?(.*?)`'), text_type(r'``\1``'), fd.read())
catalog = '.' # '/' - from the root, 'cat', '.' - in file's directory patterns = { r'<=>': "$\\\\Leftrightarrow$", r'<=': "$\\\\Leftarrow$", r'=>': "$\\\\Rightarrow$", r'\|->': "\\\\mapsto ", r'->': '\\\\rightarrow ', } for (dirpath, dirnames, filenames) in os.walk(catalog): for filename in filenames: print('File under consideration', filename) name = re.search('\.(.+)$', filename) if name.group(1) == 'tex': import io with io.open(os.path.join(dirpath, filename), 'r', encoding='utf-8', errors='replace') as file: text = file.read() print('- recognized and open an applicable file') for pattern, replacement in patterns.items(): pattern = re.compile(pattern, flags=0) textnew = re.sub(pattern, replacement, text) text = textnew print('replacement', str(pattern), replacement) print(text) file = open(os.path.join(dirpath, filename + '.re.tex'), 'wb') file.write(text.encode('utf8')) file.close() else: print('File {} doest\'t match a criteria and is rejected'.format( filename))
def write_predictions(all_examples, all_features, all_results, n_best_size, max_answer_length, do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, verbose_logging, version_2_with_negative, null_score_diff_threshold): """Write final predictions to the json file and log-odds of null if needed.""" logger.info("Writing predictions to: %s" % (output_prediction_file)) logger.info("Writing nbest to: %s" % (output_nbest_file)) example_index_to_features = collections.defaultdict(list) for feature in all_features: example_index_to_features[feature.example_index].append(feature) unique_id_to_result = {} for result in all_results: unique_id_to_result[result.unique_id] = result _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name "PrelimPrediction", [ "feature_index", "start_index", "end_index", "start_logit", "end_logit" ]) all_predictions = collections.OrderedDict() all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() for (example_index, example) in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] # keep track of the minimum score of null start+end of position 0 score_null = 1000000 # large and positive min_null_feature_index = 0 # the paragraph slice with min null score null_start_logit = 0 # the start logit at the slice with min null score null_end_logit = 0 # the end logit at the slice with min null score for (feature_index, feature) in enumerate(features): result = unique_id_to_result[feature.unique_id] start_indexes = _get_best_indexes(result.start_logits, n_best_size) end_indexes = _get_best_indexes(result.end_logits, n_best_size) # if we could have irrelevant answers, get the min score of irrelevant if version_2_with_negative: feature_null_score = result.start_logits[ 0] + result.end_logits[0] if feature_null_score < score_null: score_null = feature_null_score min_null_feature_index = feature_index null_start_logit = result.start_logits[0] null_end_logit = result.end_logits[0] for start_index in start_indexes: for end_index in end_indexes: # We could hypothetically create invalid predictions, e.g., predict # that the start of the span is in the question. We throw out all # invalid predictions. if start_index >= len(feature.tokens): continue if end_index >= len(feature.tokens): continue if start_index not in feature.token_to_orig_map: continue if end_index not in feature.token_to_orig_map: continue if not feature.token_is_max_context.get( start_index, False): continue if end_index < start_index: continue length = end_index - start_index + 1 if length > max_answer_length: continue prelim_predictions.append( _PrelimPrediction( feature_index=feature_index, start_index=start_index, end_index=end_index, start_logit=result.start_logits[start_index], end_logit=result.end_logits[end_index])) if version_2_with_negative: prelim_predictions.append( _PrelimPrediction( feature_index=min_null_feature_index, start_index=0, end_index=0, start_logit=null_start_logit, end_logit=null_end_logit)) prelim_predictions = sorted( prelim_predictions, key=lambda x: (x.start_logit + x.end_logit), reverse=True) _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name "NbestPrediction", ["text", "start_logit", "end_logit"]) seen_predictions = {} nbest = [] for pred in prelim_predictions: if len(nbest) >= n_best_size: break feature = features[pred.feature_index] if pred.start_index > 0: # this is a non-null prediction tok_tokens = feature.tokens[pred.start_index:( pred.end_index + 1)] orig_doc_start = feature.token_to_orig_map[pred.start_index] orig_doc_end = feature.token_to_orig_map[pred.end_index] orig_tokens = example.doc_tokens[orig_doc_start:( orig_doc_end + 1)] tok_text = " ".join(tok_tokens) # De-tokenize WordPieces that have been split off. tok_text = tok_text.replace(" ##", "") tok_text = tok_text.replace("##", "") # Clean whitespace tok_text = tok_text.strip() tok_text = " ".join(tok_text.split()) orig_text = " ".join(orig_tokens) final_text = get_final_text(tok_text, orig_text, do_lower_case, verbose_logging) if final_text in seen_predictions: continue seen_predictions[final_text] = True else: final_text = "" seen_predictions[final_text] = True nbest.append( _NbestPrediction( text=final_text, start_logit=pred.start_logit, end_logit=pred.end_logit)) # if we didn't include the empty option in the n-best, include it if version_2_with_negative: if "" not in seen_predictions: nbest.append( _NbestPrediction( text="", start_logit=null_start_logit, end_logit=null_end_logit)) # In very rare edge cases we could only have single null prediction. # So we just create a nonce prediction in this case to avoid failure. if len(nbest) == 1: nbest.insert( 0, _NbestPrediction( text="empty", start_logit=0.0, end_logit=0.0)) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: nbest.append( _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) assert len(nbest) >= 1 total_scores = [] best_non_null_entry = None for entry in nbest: total_scores.append(entry.start_logit + entry.end_logit) if not best_non_null_entry: if entry.text: best_non_null_entry = entry probs = _compute_softmax(total_scores) nbest_json = [] for (i, entry) in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] output["start_logit"] = entry.start_logit output["end_logit"] = entry.end_logit nbest_json.append(output) assert len(nbest_json) >= 1 if not version_2_with_negative: all_predictions[example.qas_id] = nbest_json[0]["text"] else: # predict "" iff the null score - the score of best non-null > threshold score_diff = score_null - best_non_null_entry.start_logit - ( best_non_null_entry.end_logit) scores_diff_json[example.qas_id] = score_diff if score_diff > null_score_diff_threshold: all_predictions[example.qas_id] = "" else: all_predictions[example.qas_id] = best_non_null_entry.text all_nbest_json[example.qas_id] = nbest_json with open(output_prediction_file, "w") as writer: writer.write(json.dumps(all_predictions, indent=4) + "\n") with open(output_nbest_file, "w") as writer: writer.write(json.dumps(all_nbest_json, indent=4) + "\n") if version_2_with_negative: with open(output_null_log_odds_file, "w") as writer: writer.write(json.dumps(scores_diff_json, indent=4) + "\n") return all_predictions
from flask import Flask from flask import request from flask_cors import CORS import numpy as np import random import io import pickle app = Flask(__name__) CORS(app) model_1 = keras.models.load_model('/Users/kushalvajrala/ProgrammingProjects/DrakeLyricsGenerator/DrakeGenerator/model1') path = '/Users/kushalvajrala/ProgrammingProjects/DrakeLyricsGenerator/DrakeGenerator/drake_dataset/drake_lyrics.txt' with io.open(path, encoding="utf-8") as f: text = f.read().lower() text = text.replace("\n", " ") # We remove newlines chars for nicer display chars = sorted(list(set(text))) #char_indices = dict((c, i) for i, c in enumerate(chars)) #indices_char = dict((i, c) for i, c in enumerate(chars)) diversity = 0.5 file_name1 = "chari" file_name2 = "ichar" outfile1 = open(file_name1, "rb") outfile2 = open(file_name2, "rb")
# 'Development Status :: 4 - Beta' # 'Development Status :: 5 - Production/Stable' release_status = 'Development Status :: 5 - Production/Stable' dependencies = [ 'google-api-core[grpc] >= 1.4.1, < 2.0.0dev', ] extras = { } # Setup boilerplate below this line. package_root = os.path.abspath(os.path.dirname(__file__)) readme_filename = os.path.join(package_root, 'README.rst') with io.open(readme_filename, encoding='utf-8') as readme_file: readme = readme_file.read() # Only include packages under the 'google' namespace. Do not include tests, # benchmarks, etc. packages = [ package for package in setuptools.find_packages() if package.startswith('google')] # Determine which namespaces are needed. namespaces = ['google'] if 'google.cloud' in packages: namespaces.append('google.cloud') setuptools.setup(
def write_predictions_extended( all_examples, all_features, all_results, n_best_size, max_answer_length, output_prediction_file, output_nbest_file, output_null_log_odds_file, orig_data_file, start_n_top, end_n_top, version_2_with_negative, tokenizer, verbose_logging): """ XLNet write prediction logic (more complex than Bert's). Write final predictions to the json file and log-odds of null if needed. Requires utils_squad_evaluate.py """ _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name "PrelimPrediction", [ "feature_index", "start_index", "end_index", "start_log_prob", "end_log_prob" ]) _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name "NbestPrediction", ["text", "start_log_prob", "end_log_prob"]) logger.info("Writing predictions to: %s", output_prediction_file) # logger.info("Writing nbest to: %s" % (output_nbest_file)) example_index_to_features = collections.defaultdict(list) for feature in all_features: example_index_to_features[feature.example_index].append(feature) unique_id_to_result = {} for result in all_results: unique_id_to_result[result.unique_id] = result all_predictions = collections.OrderedDict() all_nbest_json = collections.OrderedDict() scores_diff_json = collections.OrderedDict() for (example_index, example) in enumerate(all_examples): features = example_index_to_features[example_index] prelim_predictions = [] # keep track of the minimum score of null start+end of position 0 score_null = 1000000 # large and positive for (feature_index, feature) in enumerate(features): result = unique_id_to_result[feature.unique_id] cur_null_score = result.cls_logits # if we could have irrelevant answers, get the min score of irrelevant score_null = min(score_null, cur_null_score) for i in range(start_n_top): for j in range(end_n_top): start_log_prob = result.start_top_log_probs[i] start_index = result.start_top_index[i] j_index = i * end_n_top + j end_log_prob = result.end_top_log_probs[j_index] end_index = result.end_top_index[j_index] # We could hypothetically create invalid predictions, e.g., predict # that the start of the span is in the question. We throw out all # invalid predictions. if start_index >= feature.paragraph_len - 1: continue if end_index >= feature.paragraph_len - 1: continue if not feature.token_is_max_context.get( start_index, False): continue if end_index < start_index: continue length = end_index - start_index + 1 if length > max_answer_length: continue prelim_predictions.append( _PrelimPrediction( feature_index=feature_index, start_index=start_index, end_index=end_index, start_log_prob=start_log_prob, end_log_prob=end_log_prob)) prelim_predictions = sorted( prelim_predictions, key=lambda x: (x.start_log_prob + x.end_log_prob), reverse=True) seen_predictions = {} nbest = [] for pred in prelim_predictions: if len(nbest) >= n_best_size: break feature = features[pred.feature_index] # XLNet un-tokenizer # Let's keep it simple for now and see if we need all this later. # # tok_start_to_orig_index = feature.tok_start_to_orig_index # tok_end_to_orig_index = feature.tok_end_to_orig_index # start_orig_pos = tok_start_to_orig_index[pred.start_index] # end_orig_pos = tok_end_to_orig_index[pred.end_index] # paragraph_text = example.paragraph_text # final_text = paragraph_text[start_orig_pos: end_orig_pos + 1].strip() # Previously used Bert untokenizer tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] orig_doc_start = feature.token_to_orig_map[pred.start_index] orig_doc_end = feature.token_to_orig_map[pred.end_index] orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] tok_text = tokenizer.convert_tokens_to_string(tok_tokens) # Clean whitespace tok_text = tok_text.strip() tok_text = " ".join(tok_text.split()) orig_text = " ".join(orig_tokens) final_text = get_final_text( tok_text, orig_text, tokenizer.do_lower_case, verbose_logging) if final_text in seen_predictions: continue seen_predictions[final_text] = True nbest.append( _NbestPrediction( text=final_text, start_log_prob=pred.start_log_prob, end_log_prob=pred.end_log_prob)) # In very rare edge cases we could have no valid predictions. So we # just create a nonce prediction in this case to avoid failure. if not nbest: nbest.append( _NbestPrediction( text="", start_log_prob=-1e6, end_log_prob=-1e6)) total_scores = [] best_non_null_entry = None for entry in nbest: total_scores.append(entry.start_log_prob + entry.end_log_prob) if not best_non_null_entry: best_non_null_entry = entry probs = _compute_softmax(total_scores) nbest_json = [] for (i, entry) in enumerate(nbest): output = collections.OrderedDict() output["text"] = entry.text output["probability"] = probs[i] output["start_log_prob"] = entry.start_log_prob output["end_log_prob"] = entry.end_log_prob nbest_json.append(output) assert len(nbest_json) >= 1 assert best_non_null_entry is not None score_diff = score_null scores_diff_json[example.qas_id] = score_diff # note(zhiliny): always predict best_non_null_entry # and the evaluation script will search for the best threshold all_predictions[example.qas_id] = best_non_null_entry.text all_nbest_json[example.qas_id] = nbest_json with open(output_prediction_file, "w") as writer: writer.write(json.dumps(all_predictions, indent=4) + "\n") with open(output_nbest_file, "w") as writer: writer.write(json.dumps(all_nbest_json, indent=4) + "\n") if version_2_with_negative: with open(output_null_log_odds_file, "w") as writer: writer.write(json.dumps(scores_diff_json, indent=4) + "\n") with open(orig_data_file, "r", encoding='utf-8') as reader: orig_data = json.load(reader)["data"] qid_to_has_ans = make_qid_to_has_ans(orig_data) has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v] exact_raw, f1_raw = get_raw_scores(orig_data, all_predictions) out_eval = {} find_all_best_thresh_v2(out_eval, all_predictions, exact_raw, f1_raw, scores_diff_json, qid_to_has_ans) return out_eval
update_line_numbers(filename) context = Context( mutation_id=mutation_id, filename=filename, dict_synonyms=dict_synonyms, ) mutate_file( backup=backup, context=context, ) if context.number_of_performed_mutations == 0: raise RuntimeError('No mutations performed.') null_out = open(os.devnull, 'w') class Config(object): def __init__(self, swallow_output, test_command, exclude_callback, baseline_time_elapsed, test_time_multiplier, test_time_base, backup, dict_synonyms, total, using_testmon, cache_only, tests_dirs, hash_of_tests, pre_mutation, post_mutation): self.swallow_output = swallow_output self.test_command = test_command self.exclude_callback = exclude_callback self.baseline_time_elapsed = baseline_time_elapsed self.test_time_multipler = test_time_multiplier self.test_time_base = test_time_base self.backup = backup self.dict_synonyms = dict_synonyms
def read_squad_examples(input_file, is_training, version_2_with_negative): """Read a SQuAD json file into a list of SquadExample.""" with open(input_file, "r", encoding='utf-8') as reader: input_data = json.load(reader)["data"] def is_whitespace(c): if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: return True return False def is_english_or_number(c): return (ord(c) > 64 and ord(c) < 91) or (ord(c) < 123 and ord(c) > 96) examples = [] for entry in input_data: for paragraph in entry["paragraphs"]: paragraph_text = paragraph["context"] doc_tokens = [] char_to_word_offset = [] prev_is_whitespace = True for c in paragraph_text: if is_whitespace(c): continue doc_tokens.append(c) char_to_word_offset.append(len(doc_tokens) - 1) for qa in paragraph["qas"]: qas_id = qa["id"] question_text = qa["question"] start_position = None end_position = None orig_answer_text = None is_impossible = False if is_training: if (len(qa["answers"]) != 1) and (not is_impossible): raise ValueError( "For training, each question should have exactly 1 answer." ) answer = qa["answers"][0] orig_answer_text = answer["text"] answer_offset = answer["answer_start"] answer_length = len(orig_answer_text) if answer_offset > len(char_to_word_offset) - 1: logger.warning("样本错误: '%s' offfset vs. length'%s'", answer_offset, len(char_to_word_offset)) continue start_position = char_to_word_offset[answer_offset] end_position = answer_offset + answer_length - 1 if end_position > len(char_to_word_offset) - 1: logger.warning("样本错误: '%s' vs. '%s'", end_position, len(char_to_word_offset)) continue end_position = char_to_word_offset[answer_offset + answer_length - 1] # Only add answers where the text can be exactly recovered from the # document. If this CAN'T happen it's likely due to weird Unicode # stuff so we will just skip the example. # # Note that this means for training mode, every example is NOT # guaranteed to be preserved. actual_text = "".join( doc_tokens[start_position:(end_position + 1)]) cleaned_answer_text = "".join( whitespace_tokenize(orig_answer_text)) if actual_text.find(cleaned_answer_text) == -1: logger.warning("样本错误: '%s' vs. '%s'", actual_text, cleaned_answer_text) continue example = SquadExample( qas_id=qas_id, question_text=question_text, doc_tokens=doc_tokens, orig_answer_text=orig_answer_text, start_position=start_position, end_position=end_position, is_impossible=is_impossible) examples.append(example) return examples
def read(*names, **kwargs): return io.open( join(dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8") ).read()
import io import os import re from setuptools import setup # Get the version from huggingmolecules/__init__.py # Adapted from https://stackoverflow.com/a/39671214 this_directory = os.path.dirname(os.path.realpath(__file__)) init_path = os.path.join(this_directory, 'huggingmolecules', '__init__.py') version_matches = re.search( r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', io.open(init_path, encoding='utf_8_sig').read(), ) if version_matches is None: raise Exception('Could not determine huggingmolecules version from __init__.py') __version__ = version_matches.group(1) setup( name='huggingmolecules', version=__version__, packages=['huggingmolecules'], install_requires=[ #'torch==1.7.0', 'scikit-learn>=0.23.2', 'filelock>=3.0.12', 'gdown>=3.12.2' ] )
# Compile a db of {for value => dict terms that use that for value} fors = defaultdict(set) for key, anchors_ in anchors.items(): for anchor in anchors_: for for_ in anchor["for"]: if for_ == "": continue fors[for_].add(key) if not anchor["for"]: fors["/"].add(key) for key, val in fors.items(): fors[key] = list(val) if not config.dryRun: try: with io.open(config.scriptPath+"/spec-data/specs.json", 'w', encoding="utf-8") as f: f.write(unicode(json.dumps(specs, ensure_ascii=False, indent=2, sort_keys=True))) except Exception, e: die("Couldn't save spec database to disk.\n{0}", e) return try: with io.open(config.scriptPath+"/spec-data/headings.json", 'w', encoding="utf-8") as f: f.write(unicode(json.dumps(headings, ensure_ascii=False, indent=2, sort_keys=True))) except Exception, e: die("Couldn't save headings database to disk.\n{0}", e) return try: with io.open(config.scriptPath+"/spec-data/anchors.data", 'w', encoding="utf-8") as f: writeAnchorsFile(f, anchors) except Exception, e: die("Couldn't save anchor database to disk.\n{0}", e)
(object, InfoExtractor, SearchInfoExtractor)) stop = False for b in bases: if b not in classes and b not in ordered_cls: if b.__name__ == 'GenericIE': exit() classes.insert(0, b) stop = True if stop: break if all(b in ordered_cls for b in bases): ordered_cls.append(c) classes.remove(c) break ordered_cls.append(_ALL_CLASSES[-1]) names = [] for ie in ordered_cls: name = ie.__name__ src = build_lazy_ie(ie, name) module_contents.append(src) if ie in _ALL_CLASSES: names.append(name) module_contents.append('_ALL_CLASSES = [{0}]'.format(', '.join(names))) module_src = '\n'.join(module_contents) + '\n' with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: f.write(module_src)
from io import open from os import path from setuptools import setup, find_packages # set rootdir to the repository root directory rootdir = path.abspath(path.dirname(__file__)) # read readme.md to long_description with open(path.join(rootdir, "README.md"), encoding="utf-8") as readme: long_description = readme.read() setup( # Required: project name name="torecsys", # Required: tag version="0.0.5.dev1", # Optional: short description description="Pure PyTorch Recommender System Module", # Optional: long description long_description=long_description, # Optional: long description type long_description_content_type="text/markdown", # Optional: project url url="https://github.com/p768lwy3/torecsys", # Optional: author author="Jasper Li", # Optional: author email author_email="*****@*****.**", # Classifier classifiers=[ "Development Status :: 1 - Planning",
import io import os from google.cloud import vision from google.cloud.vision import types print(vision) client = vision.ImageAnnotatorClient() file_name = os.path.abspath('face.jpg') with io.open(file_name, 'rb') as image_file: content = image_file.read() image = types.Image(content=content) #response = client.label_detection(image=image) response = client.face_detection(image=image) labels = response.label_annotations print(response) print("---------------------------------------------------") print('Labels:') for label in labels: print(label)