def reader(fname, no_exist, read_func_name, unknown_type, ignore_exceptions): fname = make_path(fname) if not os.path.isfile(fname) and no_exist is not None: return no_exist compression = [] stripped_fname = fname for k, v in EXT_TO_COMPRESSION.items(): if fname.endswith(k): compression.append(v) stripped_fname = stripped_fname[:-(len(k) + 1)] ext = stripped_fname.split(".")[-1] if "." in stripped_fname[-6:] else None if ext not in EXT_TO_MODULE and unknown_type == "RAISE": raise TypeError("just does not yet cover '{}'".format(ext)) reader_module = EXT_TO_MODULE.get(ext, None) or EXT_TO_MODULE[unknown_type] read_fn = getattr(reader_module, read_func_name) if ignore_exceptions is not None: try: if compression: compression = compression[0] # actually returns a file handler >.< with compression(fname, "rb") as f: return read_fn(f) else: return read_fn(fname) except ignore_exceptions: return None else: if compression: compression = compression[0] # actually returns a file handler >.< with compression(fname, "rb") as f: return read_fn(f) else: return read_fn(fname)
def writer(obj, fname, mkdir_no_exist, skip_if_exist, write_func_name, unknown_type): fname = make_path(fname) if skip_if_exist and os.path.isfile(fname): # pragma: no cover return False if mkdir_no_exist: dname = os.path.dirname(fname) if dname not in set([".", "..", ""]): mkdir(dname) compression = [] stripped_fname = fname for k, v in EXT_TO_COMPRESSION.items(): if fname.endswith(k): compression.append(v) stripped_fname = stripped_fname[:-(len(k) + 1)] ext = stripped_fname.split(".")[-1] if "." in stripped_fname[-6:] else None if ext not in EXT_TO_MODULE and unknown_type == "RAISE": raise TypeError("just does not yet cover '{}'".format(ext)) writer_module = EXT_TO_MODULE.get(ext, None) or EXT_TO_MODULE[unknown_type] write_fn = getattr(writer_module, write_func_name) if compression: # actually returns a file handler >.< compression = compression[0] with compression(fname, "wb") as f: return write_fn(obj, f) else: return write_fn(obj, fname)
def __init__(self, hostname, username, password): self._validate_constructor_arguments(hostname, username, password) self.hostname = hostname self.username = username self.password = password self.root = just.make_path("~/nostalgia_data/input/imap")
def file_modified_since_last(fname, name): path = just.make_path("~/nostalgia_data/seen/" + slugify(name) + ".json") last_run_mt = float(just.read(path, no_exist=0)) modified_time = os.path.getmtime(fname) if last_run_mt != modified_time: return modified_time else: return None
def load_image_texts(cls, glob_pattern_s, nrows=None): import pytesseract from PIL import Image if isinstance(glob_pattern_s, list): fnames = set() for glob_pattern in glob_pattern_s: fnames.update(set(just.glob(glob_pattern))) glob_pattern = "_".join(glob_pattern_s) else: fnames = set(just.glob(glob_pattern)) name = glob_pattern + "_" + normalize_name(cls.__name__) processed_files = get_processed_files(name) to_process = fnames.difference(processed_files) objects = [] cache = get_cache("tesseract") if nrows is not None: if not to_process: return load_df(name).iloc[-nrows:] else: to_process = list(to_process)[-nrows:] if to_process: for fname in to_process: if fname in cache: text = cache[fname] else: try: text = pytesseract.image_to_string( Image.open(just.make_path(fname))) except OSError as e: print("ERR", fname, e) continue cache[fname] = text time = datetime_from_timestamp(os.path.getmtime(fname), "utc") data = { "text": text, "path": fname, "title": fname.split("/")[-1], "time": time } objects.append(data) data = pd.DataFrame(objects) if processed_files and nrows is None: data = pd.concat((data, load_df(name))) for x in ["time", "start", "end"]: if x in data: data = data.sort_values(x) break if nrows is None: save_df(data, name) save_processed_files(fnames | processed_files, name) else: data = load_df(name) if nrows is not None: data = data.iloc[-nrows:] return data
def load_embeddings(path=None): path = just.make_path(path) binary = path.endswith("gz") or path.endswith("bz2") if binary: embeddings = Word2Vec.load_word2vec_format(path, binary=True) else: embeddings = Word2Vec.load_word2vec_format(path, binary=False) esize = _get_embedding_size(embeddings) return embeddings, esize
def __init__(self, client=None, user_id=None, profile=None): self.root = just.make_path("~/nostalgia_data/input/fitbit") self.client = client self.user_id = user_id self.profile = profile if profile is not None: self.member_since = datetime.strptime(self.profile["memberSince"], "%Y-%m-%d").date() else: self.member_since = None
def read_array_of_dict_from_json(fname, key_name=None, nrows=None): """ This is an iterative way to read a json file without having to construct Python elements for everything. It can be a lot faster. Example data: {"participants": {"name": "a", "name": "b", "messages": [{"sender": "a", "time": 123}, {"sender": "b", "time": 124}]}} Function call: read_array_of_dict_from_json(fname, "messages", nrows=1) Returns: pd.DataFrame([{"sender": "a", "time": 123}]) """ if fname.endswith(".jsonl"): if not key_name: return pd.read_json(fname, lines=True) else: return pd.DataFrame([x[key_name] for x in just.read(fname)]) if nrows is None: if not key_name: return pd.read_json(fname, lines=fname.endswith(".jsonl")) else: return pd.DataFrame(just.read(fname)[key_name]) import ijson with open(just.make_path(fname)) as f: parser = ijson.parse(f) capture = False rows = [] row = {} map_key = "" num = 0 for prefix, event, value in parser: if num > nrows: break if prefix == key_name and event == "start_array": capture = True if not capture: continue if event == "start_map": continue elif event == "map_key": map_key = value elif event == "end_map": rows.append(row) row = {} num += 1 elif map_key: row[map_key] = value return pd.DataFrame(rows)
def record(data_name, data_path="~/tracktrack/"): path = just.make_path(data_path + data_name + "/") offset = len(just.glob(path + "/im*.png")) for image, it, mouse_pos in yield_images(): cv2.imwrite(path + "/im_{}.png".format(it + offset), image) just.append(mouse_pos, path + "/positions.jsonl")
class FitbitAuth(object): ACCESS_TOKEN_FILE = just.make_path( '~/nostalgia_data/config/fitbit/.access_token') def __init__(self, client_id, client_secret): self.client_id = client_id self.client_secret = client_secret self.access_token = None def get_auth_code(self): log.info('Getting new auth code') url = 'https://www.fitbit.com/oauth2/authorize?' + '&'.join( '{}={}'.format(k, v) for k, v in { 'response_type': 'code', 'client_id': self.client_id, 'redirect_uri': RedirectServer.URL, 'scope': '%20'.join(( 'activity', 'heartrate', 'location', 'nutrition', 'profile', 'settings', 'sleep', 'social', 'weight', )), 'expires_in': '31536000', }.items()) redirect = RedirectServer() webbrowser.open_new(url) result = redirect.get_result() return result['code'][0] def get_access_token(self): log.info('Getting new access token') auth_code = self.get_auth_code() auth_string = base64.b64encode( self.client_id.encode('ascii') + b':' + self.client_secret.encode('ascii')).decode('ascii') r = requests.post( 'https://api.fitbit.com/oauth2/token', headers={'Authorization': 'Basic ' + auth_string}, data={ 'clientId': self.client_id, 'code': auth_code, 'grant_type': 'authorization_code', 'redirect_uri': RedirectServer.URL, }, timeout=30, ) r.raise_for_status() return json.loads(r.text) def ensure_access_token(self): if self.access_token: return now = int(time.time()) if just.exists(self.ACCESS_TOKEN_FILE): access_token = just.read(self.ACCESS_TOKEN_FILE, unknown_type="json") if now > access_token['time'] + access_token['expires_in']: log.info('Cached access token is expired') os.unlink(self.ACCESS_TOKEN_FILE) else: self.access_token = access_token return self.access_token = self.get_access_token() self.access_token['time'] = now just.write(self.access_token, self.ACCESS_TOKEN_FILE, unknown_type="json")