Пример #1
0
def reader(fname, no_exist, read_func_name, unknown_type, ignore_exceptions):
    fname = make_path(fname)
    if not os.path.isfile(fname) and no_exist is not None:
        return no_exist
    compression = []
    stripped_fname = fname
    for k, v in EXT_TO_COMPRESSION.items():
        if fname.endswith(k):
            compression.append(v)
            stripped_fname = stripped_fname[:-(len(k) + 1)]
    ext = stripped_fname.split(".")[-1] if "." in stripped_fname[-6:] else None
    if ext not in EXT_TO_MODULE and unknown_type == "RAISE":
        raise TypeError("just does not yet cover '{}'".format(ext))
    reader_module = EXT_TO_MODULE.get(ext, None) or EXT_TO_MODULE[unknown_type]
    read_fn = getattr(reader_module, read_func_name)
    if ignore_exceptions is not None:
        try:
            if compression:
                compression = compression[0]
                # actually returns a file handler >.<
                with compression(fname, "rb") as f:
                    return read_fn(f)
            else:
                return read_fn(fname)
        except ignore_exceptions:
            return None
    else:
        if compression:
            compression = compression[0]
            # actually returns a file handler >.<
            with compression(fname, "rb") as f:
                return read_fn(f)
        else:
            return read_fn(fname)
Пример #2
0
def writer(obj, fname, mkdir_no_exist, skip_if_exist, write_func_name,
           unknown_type):
    fname = make_path(fname)
    if skip_if_exist and os.path.isfile(fname):  # pragma: no cover
        return False
    if mkdir_no_exist:
        dname = os.path.dirname(fname)
        if dname not in set([".", "..", ""]):
            mkdir(dname)
    compression = []
    stripped_fname = fname
    for k, v in EXT_TO_COMPRESSION.items():
        if fname.endswith(k):
            compression.append(v)
            stripped_fname = stripped_fname[:-(len(k) + 1)]

    ext = stripped_fname.split(".")[-1] if "." in stripped_fname[-6:] else None
    if ext not in EXT_TO_MODULE and unknown_type == "RAISE":
        raise TypeError("just does not yet cover '{}'".format(ext))
    writer_module = EXT_TO_MODULE.get(ext, None) or EXT_TO_MODULE[unknown_type]
    write_fn = getattr(writer_module, write_func_name)
    if compression:
        # actually returns a file handler >.<
        compression = compression[0]
        with compression(fname, "wb") as f:
            return write_fn(obj, f)
    else:
        return write_fn(obj, fname)
Пример #3
0
    def __init__(self, hostname, username, password):
        self._validate_constructor_arguments(hostname, username, password)
        self.hostname = hostname
        self.username = username
        self.password = password

        self.root = just.make_path("~/nostalgia_data/input/imap")
Пример #4
0
def file_modified_since_last(fname, name):
    path = just.make_path("~/nostalgia_data/seen/" + slugify(name) + ".json")
    last_run_mt = float(just.read(path, no_exist=0))
    modified_time = os.path.getmtime(fname)
    if last_run_mt != modified_time:
        return modified_time
    else:
        return None
Пример #5
0
    def load_image_texts(cls, glob_pattern_s, nrows=None):
        import pytesseract
        from PIL import Image

        if isinstance(glob_pattern_s, list):
            fnames = set()
            for glob_pattern in glob_pattern_s:
                fnames.update(set(just.glob(glob_pattern)))
            glob_pattern = "_".join(glob_pattern_s)
        else:
            fnames = set(just.glob(glob_pattern))
        name = glob_pattern + "_" + normalize_name(cls.__name__)
        processed_files = get_processed_files(name)
        to_process = fnames.difference(processed_files)
        objects = []

        cache = get_cache("tesseract")

        if nrows is not None:
            if not to_process:
                return load_df(name).iloc[-nrows:]
            else:
                to_process = list(to_process)[-nrows:]
        if to_process:
            for fname in to_process:
                if fname in cache:
                    text = cache[fname]
                else:
                    try:
                        text = pytesseract.image_to_string(
                            Image.open(just.make_path(fname)))
                    except OSError as e:
                        print("ERR", fname, e)
                        continue
                    cache[fname] = text
                time = datetime_from_timestamp(os.path.getmtime(fname), "utc")
                data = {
                    "text": text,
                    "path": fname,
                    "title": fname.split("/")[-1],
                    "time": time
                }
                objects.append(data)
            data = pd.DataFrame(objects)
            if processed_files and nrows is None:
                data = pd.concat((data, load_df(name)))
            for x in ["time", "start", "end"]:
                if x in data:
                    data = data.sort_values(x)
                    break
            if nrows is None:
                save_df(data, name)
                save_processed_files(fnames | processed_files, name)
        else:
            data = load_df(name)
        if nrows is not None:
            data = data.iloc[-nrows:]
        return data
Пример #6
0
def load_embeddings(path=None):
    path = just.make_path(path)
    binary = path.endswith("gz") or path.endswith("bz2")
    if binary:
        embeddings = Word2Vec.load_word2vec_format(path, binary=True)
    else:
        embeddings = Word2Vec.load_word2vec_format(path, binary=False)
    esize = _get_embedding_size(embeddings)
    return embeddings, esize
Пример #7
0
 def __init__(self, client=None, user_id=None, profile=None):
     self.root = just.make_path("~/nostalgia_data/input/fitbit")
     self.client = client
     self.user_id = user_id
     self.profile = profile
     if profile is not None:
         self.member_since = datetime.strptime(self.profile["memberSince"], "%Y-%m-%d").date()
     else:
         self.member_since = None
Пример #8
0
def read_array_of_dict_from_json(fname, key_name=None, nrows=None):
    """
    This is an iterative way to read a json file without having to construct Python elements for everything.
    It can be a lot faster.

    Example data:
    {"participants": {"name": "a", "name": "b", "messages": [{"sender": "a", "time": 123}, {"sender": "b", "time": 124}]}}

    Function call:
    read_array_of_dict_from_json(fname, "messages", nrows=1)

    Returns:
    pd.DataFrame([{"sender": "a", "time": 123}])
    """
    if fname.endswith(".jsonl"):
        if not key_name:
            return pd.read_json(fname, lines=True)
        else:
            return pd.DataFrame([x[key_name] for x in just.read(fname)])

    if nrows is None:
        if not key_name:
            return pd.read_json(fname, lines=fname.endswith(".jsonl"))
        else:
            return pd.DataFrame(just.read(fname)[key_name])

    import ijson

    with open(just.make_path(fname)) as f:
        parser = ijson.parse(f)
        capture = False
        rows = []
        row = {}
        map_key = ""
        num = 0
        for prefix, event, value in parser:
            if num > nrows:
                break
            if prefix == key_name and event == "start_array":
                capture = True
            if not capture:
                continue
            if event == "start_map":
                continue
            elif event == "map_key":
                map_key = value
            elif event == "end_map":
                rows.append(row)
                row = {}
                num += 1
            elif map_key:
                row[map_key] = value
    return pd.DataFrame(rows)
Пример #9
0
def record(data_name, data_path="~/tracktrack/"):
    path = just.make_path(data_path + data_name + "/")
    offset = len(just.glob(path + "/im*.png"))
    for image, it, mouse_pos in yield_images():
        cv2.imwrite(path + "/im_{}.png".format(it + offset), image)
        just.append(mouse_pos, path + "/positions.jsonl")
Пример #10
0
class FitbitAuth(object):
    ACCESS_TOKEN_FILE = just.make_path(
        '~/nostalgia_data/config/fitbit/.access_token')

    def __init__(self, client_id, client_secret):
        self.client_id = client_id
        self.client_secret = client_secret
        self.access_token = None

    def get_auth_code(self):
        log.info('Getting new auth code')
        url = 'https://www.fitbit.com/oauth2/authorize?' + '&'.join(
            '{}={}'.format(k, v) for k, v in {
                'response_type':
                'code',
                'client_id':
                self.client_id,
                'redirect_uri':
                RedirectServer.URL,
                'scope':
                '%20'.join((
                    'activity',
                    'heartrate',
                    'location',
                    'nutrition',
                    'profile',
                    'settings',
                    'sleep',
                    'social',
                    'weight',
                )),
                'expires_in':
                '31536000',
            }.items())

        redirect = RedirectServer()
        webbrowser.open_new(url)
        result = redirect.get_result()
        return result['code'][0]

    def get_access_token(self):
        log.info('Getting new access token')
        auth_code = self.get_auth_code()
        auth_string = base64.b64encode(
            self.client_id.encode('ascii') + b':' +
            self.client_secret.encode('ascii')).decode('ascii')
        r = requests.post(
            'https://api.fitbit.com/oauth2/token',
            headers={'Authorization': 'Basic ' + auth_string},
            data={
                'clientId': self.client_id,
                'code': auth_code,
                'grant_type': 'authorization_code',
                'redirect_uri': RedirectServer.URL,
            },
            timeout=30,
        )
        r.raise_for_status()
        return json.loads(r.text)

    def ensure_access_token(self):
        if self.access_token:
            return
        now = int(time.time())
        if just.exists(self.ACCESS_TOKEN_FILE):
            access_token = just.read(self.ACCESS_TOKEN_FILE,
                                     unknown_type="json")
            if now > access_token['time'] + access_token['expires_in']:
                log.info('Cached access token is expired')
                os.unlink(self.ACCESS_TOKEN_FILE)
            else:
                self.access_token = access_token
                return
        self.access_token = self.get_access_token()
        self.access_token['time'] = now
        just.write(self.access_token,
                   self.ACCESS_TOKEN_FILE,
                   unknown_type="json")