示例#1
0
def save_model(model, model_name, models_path="models/"):
    # serialize model to JSON
    model_file = models_path + model_name + ".json"
    weights_file = models_path + model_name + ".h5"
    just.write(json.loads(model.to_json()), model_file)
    model.save_weights(weights_file)
    print("Saved model to disk")
示例#2
0
 def _save(self,
           fname_or_obj,
           file_path_prefix,
           allow_overwrite=False,
           compression=None,
           **save_kwargs):
     if isinstance(fname_or_obj, str):
         fname = os.path.expanduser(fname_or_obj)
         if os.path.isdir(fname):
             raise ValueError("Cannot compress directory")
         elif os.path.isfile(fname):
             obj = just.bytes.read(fname)
         else:
             obj = fname_or_obj
     else:
         obj = fname_or_obj
     path = file_path_prefix
     if compression is not None:
         path += "." + compression
     if not allow_overwrite and just.exists(path):
         raise ValueError("Path exists, cannot save {!r}".format(path))
     if isinstance(obj, str):
         obj = bytes(obj, encoding="utf8")
     just.write(obj, path, unknown_type="bytes")
     return path
示例#3
0
 def save(self):
     if hasattr(self.encoder_decoder, "X"):
         del self.encoder_decoder.X
     if hasattr(self.encoder_decoder, "y"):
         del self.encoder_decoder.y
     just.write(self.encoder_decoder, self.pkl_path)
     self.model.save(self.h5_path)
示例#4
0
def extract_and_save(args):
    file_name, file_content = args
    url = file_content["url"]
    html = file_content["html"]
    parsed = parse_article(html, url)
    just.write(parsed,
               "~/.nostalgia_chrome/metadata/" + file_name.split("/")[-1])
示例#5
0
def test_csv_iread():
    fname = "testobj.csv"
    obj = [['"a"', '"b"']] + [['"1"', '"2"']] * 99
    just.write(obj, "testobj.csv")
    try:
        assert [x for x in just.iread(fname)] == [x for x in obj]
    finally:
        os.remove(fname)
示例#6
0
def test_newl_iread():
    fname = "testobj.newl"
    obj = ["1", "2"]
    just.write(obj, "testobj.newl")
    try:
        assert [x for x in just.iread(fname)] == [x for x in obj]
    finally:
        os.remove(fname)
示例#7
0
def test_txt_iread():
    fname = "testobj.txt"
    obj = "1\n2\n3\n4\n5"
    just.write(obj, "testobj.txt")
    try:
        assert [x for x in just.iread(fname)] == [x for x in obj.split("\n")]
    finally:
        os.remove(fname)
示例#8
0
def load_entry():
    import sys

    just.write("", "~/nostalgia_data/__init__.py")

    sys.path.append(os.path.expanduser("~/nostalgia_data/"))
    import nostalgia_entry

    return nostalgia_entry
示例#9
0
def render_as_html():
    # remove base pandas from documentation
    original_ndf = replace_ndf_class()
    try:
        portray.as_html(overwrite=True)
    except KeyboardInterrupt:
        print("Exiting")
    # put pandas class back in
    finally:
        just.write(original_ndf, NDF_PATH)
示例#10
0
def test_csv_iread_problem_lines():
    fname = "testobj.csv"
    obj = ["a"] + [['"a"', '"b"']]
    just.write(obj, "testobj.csv")
    try:
        just.read(fname)
    except ValueError:
        assert True
    finally:
        os.remove(fname)
示例#11
0
def check_seen(name, value):
    path = "~/nostalgia_data/seen/" + slugify(name) + ".json"
    is_new = True
    res = just.read(path, no_exist=False)
    if res:
        if isinstance(value, tuple):
            value = list(value)
        is_new = res != value
    if is_new:
        just.write(value, path)
    return is_new
示例#12
0
def test_csv_iread_error():
    fname = "testobj.csv"
    obj = [['"a"', '"b"']] + [['"1"', '"2"', '"']] * 100
    just.write(obj, "testobj.csv")
    try:
        list(just.iread(fname))
        # should not reach here
    except ValueError:
        assert True
    finally:
        os.remove(fname)
示例#13
0
 def ensure_access_token(self):
     if self.access_token:
         return
     now = int(time.time())
     if just.exists(self.ACCESS_TOKEN_FILE):
         access_token = just.read(self.ACCESS_TOKEN_FILE,
                                  unknown_type="json")
         if now > access_token['time'] + access_token['expires_in']:
             log.info('Cached access token is expired')
             os.unlink(self.ACCESS_TOKEN_FILE)
         else:
             self.access_token = access_token
             return
     self.access_token = self.get_access_token()
     self.access_token['time'] = now
     just.write(self.access_token,
                self.ACCESS_TOKEN_FILE,
                unknown_type="json")
示例#14
0
 def __enter__(self):
     try:
         good = self.fn(self.args)
     except Exception as e:
         exception = e
     if self.env == "dev" and exception:
         try:
             print("load mocked", self.name, "ignoring exception:")
             print(exception)
             return just.read(self.name)
         except:
             # do not raise this exception, but the original
             raise exception
     elif self.env == "dev" and good is not None:
         print("storing mocked", self.name)
         just.write(good, self.name)
         return good
     elif exception:
         raise exception
示例#15
0
 def _save(self,
           obj,
           file_path_prefix,
           allow_overwrite=False,
           compression=None,
           **save_kwargs):
     path = file_path_prefix
     if not path.endswith(".json"):
         path += ".json"
     if compression is not None:
         path += "." + compression
     if not allow_overwrite and just.exists(path):
         raise ValueError("Path exists, cannot save {!r}".format(path))
     try:
         just.write(obj, path)
     except AttributeError as e:
         print(e)
         raise ValueError("Ensure that file_path_prefix ends with .json")
     return path
示例#16
0
def mockle(name, good=None, exception=""):
    name = get_path(name)
    if get_env() == "dev":
        if not exception:
            print("storing mocked", name)
            just.write(good, name)
            return good
        else:
            try:
                print("load mocked", name, "ignoring exception:")
                print(exception)
                return just.read(name)
            except:
                # do not raise "this" exception, but the original
                raise exception
    elif exception == "":
        return good
    else:
        raise exception
示例#17
0
def add_text():
    url = request.json["url"]
    print("url", url)

    if any([y in url for y in blocklist]):
        print("blocked", [y for y in blocklist if y in url])
        return jsonify({})
    html = request.json["html"]
    html = lxml.html.tostring(lxml.html.fromstring(html.encode("utf8")))

    tree = make_tree(html, url)

    html = lxml.html.tostring(tree).decode("utf8")

    slugged_url = slug_url(url)

    t1 = time.time()
    # meta_path = BASE_PATH / "meta/v1/{}_{}.json".format(t1, slugged_url)
    # try:
    #     article = parse_article(html, url)
    #     metadata = article.to_dict(keys=ARTICLE_KEYS_TO_KEEP, skip_if_empty=True)
    # except Exception as e:
    #     metadata = {"error": str(e)}
    # metadata["creation_time"] = t1
    # metadata["slugged_url"] = slugged_url
    # with open(meta_path, "w") as f:
    #     json.dump(metadata, f, indent=4)
    # just.write(metadata, meta_path)

    html_path = BASE_PATH + "html/{}_{}.html.gz".format(t1, slugged_url)
    print("html_path", html_path)
    just.write(html, html_path)

    obj = {"path": str(html_path), "url": url, "time": str(time.time())}
    print("META_PATH", META_PATH)
    just.append(obj, META_PATH)

    last.append(html)
    last_urls.append(url)
    print("saved", url)
    return jsonify({"urls": list(last_urls)})
示例#18
0
def main():
    conf_path = "~/nostalgia_data/config/fitbit/config.json"
    if not just.exists(conf_path):
        webbrowser.open("https://dev.fitbit.com/apps/new")
        webbrowser.open(
            "https://raw.githubusercontent.com/nostalgia-dev/nostalgia_fitbit/master/docs/fitbit_app.png"
        )
        client_id = getpass.getpass("Client ID: ")
        client_secret = getpass.getpass("Client Secret: ")
        info = {"client_id": client_id, "client_secret": client_secret}
        just.write(info, conf_path)
        print("Saved in:", conf_path)
    config = just.read(conf_path)
    if not config["client_id"] or not config["client_secret"]:
        msg = "Fill in a value for client_id and client_secret in '{}'".format(
            conf_path)
        raise ValueError(msg)

    fa = FitbitAuth(client_id=config['client_id'],
                    client_secret=config['client_secret'])
    fa.ensure_access_token()

    try:
        f = Fitbit(access_token=fa.access_token['access_token'])
        print(json.dumps(f.profile, indent=2))
    except requests.exceptions.HTTPError as e:
        print(e.response.status_code)
        if e.response.status_code == 429:
            print(e.response.headers)
            return
        raise

    export = FitbitExport(f, profile=f.profile)

    export.sync_sleep()
    export.sync_heartrate_intraday()
示例#19
0
usd_min = set([x[0] for x in usm['symbols']["$"]] +
              [x[0] for x in usm['currencies']["dollar"]] +
              [x[0] for x in usm['currencies']["bucks"]])

usm['keywords'] = {k: v for k, v in usm['keywords'].items() if v in usd_min}
del usm['keywords']["Tongan"]
usm['symbols'] = {"$": usm['symbols']['$']}
usm['currencies'] = {
    "dollar": usm['currencies']['dollar'],
    "bucks": usm['currencies']['bucks']
}
usm['abbrevs'] = [x for x in usm['abbrevs'] if x in usm['keywords'].values()]
usm['abbrevs'].remove("TOP")

usm = just.write(usm, "data/money/us_min.json")

###
import just
usm = just.read("/Users/pascal/egoroot/natura/data/money/us.json")

eur_min = set([x[0] for x in usm['symbols']["$"]] +
              [x[0] for x in usm['currencies']["dollar"]] +
              [x[0] for x in usm['currencies']["bucks"]] +
              [x[0] for x in usm['symbols']["€"]] +
              [x[0] for x in usm['currencies']["euro"]])

usm['keywords'] = {k: v for k, v in usm['keywords'].items() if v in eur_min}
usm['symbols'] = {"$": usm['symbols']['$'], "€": usm['symbols']["€"]}
usm['currencies'] = {
    "dollar": usm['currencies']['dollar'],
示例#20
0
def save_processed_files(fnames, name):
    path = "~/nostalgia_data/seen/" + slugify(name) + ".json"
    just.write(fnames, path)
示例#21
0
def save_newline_count(n, name):
    """ counts by row numbers in a file """
    path = "~/nostalgia_data/seen/" + slugify(name) + ".json"
    return just.write(n, path)
示例#22
0
def save_last_latest_file(latest_file, name):
    path = "~/nostalgia_data/seen/" + slugify(name) + ".json"
    return just.write(latest_file, path)
示例#23
0
links = set()
query = "keras lstm language:python filename:*.py"
for i in range(1, 60):
    try:
        url = "https://github.com/search?p={}&q={}&ref=searchresults&type=Code&utf8=%E2%9C%93"
        driver.get(url.format(i, query))
        tree = lxml.html.fromstring(driver.page_source)
        page_links = [
            x for x in tree.xpath('//a/@href')
            if "/blob/" in x and "#" not in x
        ]
        links.update(page_links)
        print(i, len(links))
    except KeyboardInterrupt:
        break

# visit and save source files
base = "https://github.com"
for num, link in enumerate(links):
    url = base + link
    html = requests.get(url).text
    tree = lxml.html.fromstring(html)
    xpath = '//*[@class="blob-code blob-code-inner js-file-line"]'
    contents = "\n".join([x.text_content() for x in tree.xpath(xpath)])
    # note that link conveniently starts with / like a webpath
    just.write(contents, "data" + link)
    print(num, len(contents))

other_options = []
# bigquery
示例#24
0
import just
import portray

NDF_PATH = "nostalgia/ndf.py"


def replace_ndf_class():
    ndf_replace = "class NDF:"
    original = just.read(NDF_PATH)
    ndf = [x for x in original.split("\n") if x.startswith("class NDF")][0]
    just.write(original.replace(ndf, ndf_replace), NDF_PATH)
    return original


# remove base pandas from documentation
original_ndf = replace_ndf_class()
try:
    portray.on_github_pages()
except KeyboardInterrupt:
    print("Exiting")
finally:
    # put pandas class back in
    just.write(original_ndf, NDF_PATH)
示例#25
0
 def save(self):
     just.write(self.encoder_decoder, self.pkl_path)
     self.model.save(self.h5_path)
示例#26
0
import gzip
import os
import just
from auto_extract import parse_article
import tqdm
from urllib.parse import urlparse
import tldextract
from utils import KEYS_TO_KEEP

for x in tqdm.tqdm(just.glob("/home/pascal/.nostalgia/meta/v1/*.json")):
    print("processing", x)
    meta = just.read(x)
    if "extruct" in meta:
        print("skipping", x)
        continue
    html_path = "/home/pascal/.nostalgia/html/" + x.split("/")[-1].rstrip(
        ".json") + ".html.gz"
    if os.path.exists(html_path):
        with gzip.GzipFile(html_path, "r") as f:
            html = f.read()
        article = parse_article(html, meta["url"])
        meta = article.to_dict(keys=KEYS_TO_KEEP, skip_if_empty=True)
        just.write(meta, x)
        os.system("touch '{}' -r '{}'".format(x, html_path))
        print("done", x)
示例#27
0
def slug_url(url):
    pre_slug = re.sub(r"[-\s]+", "-", url)
    slugged_url = re.sub(r"[^\w\s-]", "", pre_slug).strip().lower()[-150:]
    return slugged_url


for x in tqdm.tqdm(
        just.glob("/home/pascal/.nostalgia_chrome/old/html/*.json")):
    ctime = os.path.getctime(x)
    with open(x) as f:
        print("processing", x)
        data = json.load(f)
        html = data["html"]
        url = data["url"]
        slugged_url = slug_url(url)
        article = parse_article(html, url)
        meta = article.to_dict(keys=KEYS_TO_KEEP, skip_if_empty=True)
        meta["creation_time"] = ctime
        meta["slugged_url"] = slugged_url
        html_path = "/home/pascal/.nostalgia_chrome/html/{}_{}.html.gz".format(
            ctime, slugged_url)
        with gzip.GzipFile(html_path, "w") as f:
            f.write(html.encode("utf8"))
        meta_path = "/home/pascal/.nostalgia_chrome/meta/v1/{}_{}.json".format(
            ctime, slugged_url)
        just.write(meta, meta_path)
        os.system("touch '{}' -r '{}'".format(html_path, x))
        os.system("touch '{}' -r '{}'".format(meta_path, x))
        just.remove(x)
示例#28
0
 def parse(self, response):
     base = "https://raw.githubusercontent.com"
     content = response.text.encode("utf-8")
     just.write(content, "data" + response.url[len(base):])
示例#29
0
import just

ENTRY = "~/nostalgia_data/nostalgia_entry.py"
if not just.exists("~/nostalgia_data/nostalgia_entry.py"):
    just.write("", ENTRY)

from nostalgia.ndf import NDF
示例#30
0
def replace_ndf_class():
    ndf_replace = "class NDF:"
    original = just.read(NDF_PATH)
    ndf = [x for x in original.split("\n") if x.startswith("class NDF")][0]
    just.write(original.replace(ndf, ndf_replace), NDF_PATH)
    return original