def save_model(model, model_name, models_path="models/"): # serialize model to JSON model_file = models_path + model_name + ".json" weights_file = models_path + model_name + ".h5" just.write(json.loads(model.to_json()), model_file) model.save_weights(weights_file) print("Saved model to disk")
def _save(self, fname_or_obj, file_path_prefix, allow_overwrite=False, compression=None, **save_kwargs): if isinstance(fname_or_obj, str): fname = os.path.expanduser(fname_or_obj) if os.path.isdir(fname): raise ValueError("Cannot compress directory") elif os.path.isfile(fname): obj = just.bytes.read(fname) else: obj = fname_or_obj else: obj = fname_or_obj path = file_path_prefix if compression is not None: path += "." + compression if not allow_overwrite and just.exists(path): raise ValueError("Path exists, cannot save {!r}".format(path)) if isinstance(obj, str): obj = bytes(obj, encoding="utf8") just.write(obj, path, unknown_type="bytes") return path
def save(self): if hasattr(self.encoder_decoder, "X"): del self.encoder_decoder.X if hasattr(self.encoder_decoder, "y"): del self.encoder_decoder.y just.write(self.encoder_decoder, self.pkl_path) self.model.save(self.h5_path)
def extract_and_save(args): file_name, file_content = args url = file_content["url"] html = file_content["html"] parsed = parse_article(html, url) just.write(parsed, "~/.nostalgia_chrome/metadata/" + file_name.split("/")[-1])
def test_csv_iread(): fname = "testobj.csv" obj = [['"a"', '"b"']] + [['"1"', '"2"']] * 99 just.write(obj, "testobj.csv") try: assert [x for x in just.iread(fname)] == [x for x in obj] finally: os.remove(fname)
def test_newl_iread(): fname = "testobj.newl" obj = ["1", "2"] just.write(obj, "testobj.newl") try: assert [x for x in just.iread(fname)] == [x for x in obj] finally: os.remove(fname)
def test_txt_iread(): fname = "testobj.txt" obj = "1\n2\n3\n4\n5" just.write(obj, "testobj.txt") try: assert [x for x in just.iread(fname)] == [x for x in obj.split("\n")] finally: os.remove(fname)
def load_entry(): import sys just.write("", "~/nostalgia_data/__init__.py") sys.path.append(os.path.expanduser("~/nostalgia_data/")) import nostalgia_entry return nostalgia_entry
def render_as_html(): # remove base pandas from documentation original_ndf = replace_ndf_class() try: portray.as_html(overwrite=True) except KeyboardInterrupt: print("Exiting") # put pandas class back in finally: just.write(original_ndf, NDF_PATH)
def test_csv_iread_problem_lines(): fname = "testobj.csv" obj = ["a"] + [['"a"', '"b"']] just.write(obj, "testobj.csv") try: just.read(fname) except ValueError: assert True finally: os.remove(fname)
def check_seen(name, value): path = "~/nostalgia_data/seen/" + slugify(name) + ".json" is_new = True res = just.read(path, no_exist=False) if res: if isinstance(value, tuple): value = list(value) is_new = res != value if is_new: just.write(value, path) return is_new
def test_csv_iread_error(): fname = "testobj.csv" obj = [['"a"', '"b"']] + [['"1"', '"2"', '"']] * 100 just.write(obj, "testobj.csv") try: list(just.iread(fname)) # should not reach here except ValueError: assert True finally: os.remove(fname)
def ensure_access_token(self): if self.access_token: return now = int(time.time()) if just.exists(self.ACCESS_TOKEN_FILE): access_token = just.read(self.ACCESS_TOKEN_FILE, unknown_type="json") if now > access_token['time'] + access_token['expires_in']: log.info('Cached access token is expired') os.unlink(self.ACCESS_TOKEN_FILE) else: self.access_token = access_token return self.access_token = self.get_access_token() self.access_token['time'] = now just.write(self.access_token, self.ACCESS_TOKEN_FILE, unknown_type="json")
def __enter__(self): try: good = self.fn(self.args) except Exception as e: exception = e if self.env == "dev" and exception: try: print("load mocked", self.name, "ignoring exception:") print(exception) return just.read(self.name) except: # do not raise this exception, but the original raise exception elif self.env == "dev" and good is not None: print("storing mocked", self.name) just.write(good, self.name) return good elif exception: raise exception
def _save(self, obj, file_path_prefix, allow_overwrite=False, compression=None, **save_kwargs): path = file_path_prefix if not path.endswith(".json"): path += ".json" if compression is not None: path += "." + compression if not allow_overwrite and just.exists(path): raise ValueError("Path exists, cannot save {!r}".format(path)) try: just.write(obj, path) except AttributeError as e: print(e) raise ValueError("Ensure that file_path_prefix ends with .json") return path
def mockle(name, good=None, exception=""): name = get_path(name) if get_env() == "dev": if not exception: print("storing mocked", name) just.write(good, name) return good else: try: print("load mocked", name, "ignoring exception:") print(exception) return just.read(name) except: # do not raise "this" exception, but the original raise exception elif exception == "": return good else: raise exception
def add_text(): url = request.json["url"] print("url", url) if any([y in url for y in blocklist]): print("blocked", [y for y in blocklist if y in url]) return jsonify({}) html = request.json["html"] html = lxml.html.tostring(lxml.html.fromstring(html.encode("utf8"))) tree = make_tree(html, url) html = lxml.html.tostring(tree).decode("utf8") slugged_url = slug_url(url) t1 = time.time() # meta_path = BASE_PATH / "meta/v1/{}_{}.json".format(t1, slugged_url) # try: # article = parse_article(html, url) # metadata = article.to_dict(keys=ARTICLE_KEYS_TO_KEEP, skip_if_empty=True) # except Exception as e: # metadata = {"error": str(e)} # metadata["creation_time"] = t1 # metadata["slugged_url"] = slugged_url # with open(meta_path, "w") as f: # json.dump(metadata, f, indent=4) # just.write(metadata, meta_path) html_path = BASE_PATH + "html/{}_{}.html.gz".format(t1, slugged_url) print("html_path", html_path) just.write(html, html_path) obj = {"path": str(html_path), "url": url, "time": str(time.time())} print("META_PATH", META_PATH) just.append(obj, META_PATH) last.append(html) last_urls.append(url) print("saved", url) return jsonify({"urls": list(last_urls)})
def main(): conf_path = "~/nostalgia_data/config/fitbit/config.json" if not just.exists(conf_path): webbrowser.open("https://dev.fitbit.com/apps/new") webbrowser.open( "https://raw.githubusercontent.com/nostalgia-dev/nostalgia_fitbit/master/docs/fitbit_app.png" ) client_id = getpass.getpass("Client ID: ") client_secret = getpass.getpass("Client Secret: ") info = {"client_id": client_id, "client_secret": client_secret} just.write(info, conf_path) print("Saved in:", conf_path) config = just.read(conf_path) if not config["client_id"] or not config["client_secret"]: msg = "Fill in a value for client_id and client_secret in '{}'".format( conf_path) raise ValueError(msg) fa = FitbitAuth(client_id=config['client_id'], client_secret=config['client_secret']) fa.ensure_access_token() try: f = Fitbit(access_token=fa.access_token['access_token']) print(json.dumps(f.profile, indent=2)) except requests.exceptions.HTTPError as e: print(e.response.status_code) if e.response.status_code == 429: print(e.response.headers) return raise export = FitbitExport(f, profile=f.profile) export.sync_sleep() export.sync_heartrate_intraday()
usd_min = set([x[0] for x in usm['symbols']["$"]] + [x[0] for x in usm['currencies']["dollar"]] + [x[0] for x in usm['currencies']["bucks"]]) usm['keywords'] = {k: v for k, v in usm['keywords'].items() if v in usd_min} del usm['keywords']["Tongan"] usm['symbols'] = {"$": usm['symbols']['$']} usm['currencies'] = { "dollar": usm['currencies']['dollar'], "bucks": usm['currencies']['bucks'] } usm['abbrevs'] = [x for x in usm['abbrevs'] if x in usm['keywords'].values()] usm['abbrevs'].remove("TOP") usm = just.write(usm, "data/money/us_min.json") ### import just usm = just.read("/Users/pascal/egoroot/natura/data/money/us.json") eur_min = set([x[0] for x in usm['symbols']["$"]] + [x[0] for x in usm['currencies']["dollar"]] + [x[0] for x in usm['currencies']["bucks"]] + [x[0] for x in usm['symbols']["€"]] + [x[0] for x in usm['currencies']["euro"]]) usm['keywords'] = {k: v for k, v in usm['keywords'].items() if v in eur_min} usm['symbols'] = {"$": usm['symbols']['$'], "€": usm['symbols']["€"]} usm['currencies'] = { "dollar": usm['currencies']['dollar'],
def save_processed_files(fnames, name): path = "~/nostalgia_data/seen/" + slugify(name) + ".json" just.write(fnames, path)
def save_newline_count(n, name): """ counts by row numbers in a file """ path = "~/nostalgia_data/seen/" + slugify(name) + ".json" return just.write(n, path)
def save_last_latest_file(latest_file, name): path = "~/nostalgia_data/seen/" + slugify(name) + ".json" return just.write(latest_file, path)
links = set() query = "keras lstm language:python filename:*.py" for i in range(1, 60): try: url = "https://github.com/search?p={}&q={}&ref=searchresults&type=Code&utf8=%E2%9C%93" driver.get(url.format(i, query)) tree = lxml.html.fromstring(driver.page_source) page_links = [ x for x in tree.xpath('//a/@href') if "/blob/" in x and "#" not in x ] links.update(page_links) print(i, len(links)) except KeyboardInterrupt: break # visit and save source files base = "https://github.com" for num, link in enumerate(links): url = base + link html = requests.get(url).text tree = lxml.html.fromstring(html) xpath = '//*[@class="blob-code blob-code-inner js-file-line"]' contents = "\n".join([x.text_content() for x in tree.xpath(xpath)]) # note that link conveniently starts with / like a webpath just.write(contents, "data" + link) print(num, len(contents)) other_options = [] # bigquery
import just import portray NDF_PATH = "nostalgia/ndf.py" def replace_ndf_class(): ndf_replace = "class NDF:" original = just.read(NDF_PATH) ndf = [x for x in original.split("\n") if x.startswith("class NDF")][0] just.write(original.replace(ndf, ndf_replace), NDF_PATH) return original # remove base pandas from documentation original_ndf = replace_ndf_class() try: portray.on_github_pages() except KeyboardInterrupt: print("Exiting") finally: # put pandas class back in just.write(original_ndf, NDF_PATH)
def save(self): just.write(self.encoder_decoder, self.pkl_path) self.model.save(self.h5_path)
import gzip import os import just from auto_extract import parse_article import tqdm from urllib.parse import urlparse import tldextract from utils import KEYS_TO_KEEP for x in tqdm.tqdm(just.glob("/home/pascal/.nostalgia/meta/v1/*.json")): print("processing", x) meta = just.read(x) if "extruct" in meta: print("skipping", x) continue html_path = "/home/pascal/.nostalgia/html/" + x.split("/")[-1].rstrip( ".json") + ".html.gz" if os.path.exists(html_path): with gzip.GzipFile(html_path, "r") as f: html = f.read() article = parse_article(html, meta["url"]) meta = article.to_dict(keys=KEYS_TO_KEEP, skip_if_empty=True) just.write(meta, x) os.system("touch '{}' -r '{}'".format(x, html_path)) print("done", x)
def slug_url(url): pre_slug = re.sub(r"[-\s]+", "-", url) slugged_url = re.sub(r"[^\w\s-]", "", pre_slug).strip().lower()[-150:] return slugged_url for x in tqdm.tqdm( just.glob("/home/pascal/.nostalgia_chrome/old/html/*.json")): ctime = os.path.getctime(x) with open(x) as f: print("processing", x) data = json.load(f) html = data["html"] url = data["url"] slugged_url = slug_url(url) article = parse_article(html, url) meta = article.to_dict(keys=KEYS_TO_KEEP, skip_if_empty=True) meta["creation_time"] = ctime meta["slugged_url"] = slugged_url html_path = "/home/pascal/.nostalgia_chrome/html/{}_{}.html.gz".format( ctime, slugged_url) with gzip.GzipFile(html_path, "w") as f: f.write(html.encode("utf8")) meta_path = "/home/pascal/.nostalgia_chrome/meta/v1/{}_{}.json".format( ctime, slugged_url) just.write(meta, meta_path) os.system("touch '{}' -r '{}'".format(html_path, x)) os.system("touch '{}' -r '{}'".format(meta_path, x)) just.remove(x)
def parse(self, response): base = "https://raw.githubusercontent.com" content = response.text.encode("utf-8") just.write(content, "data" + response.url[len(base):])
import just ENTRY = "~/nostalgia_data/nostalgia_entry.py" if not just.exists("~/nostalgia_data/nostalgia_entry.py"): just.write("", ENTRY) from nostalgia.ndf import NDF
def replace_ndf_class(): ndf_replace = "class NDF:" original = just.read(NDF_PATH) ndf = [x for x in original.split("\n") if x.startswith("class NDF")][0] just.write(original.replace(ndf, ndf_replace), NDF_PATH) return original