Exemplo n.º 1
0
def MergeSeriesCollections(
    series: typing.Iterator[
      me_pb2.SeriesCollection]) -> me_pb2.SeriesCollection:
  """Merge the given series collections into a single SeriesCollection.

  Args:
    series: The SeriesCollection messages to merge.

  Returns:
    A SeriesCollection message.

  Raises:
    ValueError: If there are Series with duplicate names.
  """
  series = list(labtypes.flatten(list(f.series) for f in series))

  # Create a map from series name to a list of series protos.
  names_to_series = collections.defaultdict(list)
  [names_to_series[s.name].append(s) for s in series]

  # Concatenate each list of series with the same name.
  concatenated_series = [
    ConcatenateSeries(s) for s in names_to_series.values()
  ]
  return me_pb2.SeriesCollection(
      series=sorted(concatenated_series, key=lambda s: s.name))
Exemplo n.º 2
0
def ProcessXmlFile(path: pathlib.Path) -> me_pb2.SeriesCollection:
    """Process a HealthKit XML data export.

  Args:
    path: Path of the XML file.

  Returns:
    A SeriesCollection message.

  Raises:
    FileNotFoundError: If the requested file is not found.
  """
    if not path.is_file():
        raise FileNotFoundError(str(path))
    try:
        return pbutil.RunProcessMessageInPlace(
            [
                str(
                    bazelutil.DataPath(
                        "phd/datasets/me_db/providers/health_kit/xml_export_worker"
                    ))
            ],
            me_pb2.SeriesCollection(source=str(path)),
        )
    except subprocess.CalledProcessError as e:
        raise importers.ImporterError("HealthKit", path, str(e)) from e
Exemplo n.º 3
0
def ProcessInbox(inbox: pathlib.Path) -> me_pb2.SeriesCollection:
    """Process a directory of YNAB data.

  Args:
    inbox: The inbox path.

  Returns:
    A SeriesCollection message.
  """
    if not (inbox / "ynab").is_dir():
        return me_pb2.SeriesCollection()

    files = (subprocess.check_output(
        ["find", "-L",
         str(inbox / "ynab"), "-name", "Budget.yfull"],
        universal_newlines=True,
    ).rstrip().split("\n"))

    # TODO(cec): There can be multiple directories for a single budget. Do we need
    # to de-duplicate them?
    files = [pathlib.Path(f) for f in files]

    series_collections = []
    if files and files[0]:
        for file in files:
            series_collections.append(ProcessBudgetJsonFile(file))
    return importers.MergeSeriesCollections(series_collections)
Exemplo n.º 4
0
def ProcessCsvFile(path: pathlib.Path) -> me_pb2.SeriesCollection:
    """Process a LifeCycle CSV data export.

  Args:
    path: Path of the CSV file.

  Returns:
    A SeriesCollection message.

  Raises:
    FileNotFoundError: If the requested file is not found.
  """
    if not path.is_file():
        raise FileNotFoundError(str(path))
    try:
        return pbutil.RunProcessMessageInPlace(
            [
                str(
                    bazelutil.DataPath(
                        "phd/datasets/me_db/providers/life_cycle/lc_export_csv_worker"
                    ))
            ],
            me_pb2.SeriesCollection(source=str(path)),
        )
    except subprocess.CalledProcessError as e:
        raise importers.ImporterError("LifeCycle", path, str(e)) from e
Exemplo n.º 5
0
def _ReadDatabaseToSeriesCollection(db) -> me_pb2.SeriesCollection:
    """Extract SeriesCollection from sqlite3 Timing.app database.

  Args:
    db: The sqlite3 database.

  Returns:
    A SeriesCollection message.
  """
    cursor = db.cursor()

    # Construct a map from distinct Task.title columns to Series protos.
    cursor.execute('SELECT DISTINCT(title) FROM TASK')
    title_series_map = {row[0]: me_pb2.Series() for row in cursor.fetchall()}

    # Process data from each title separately.
    for title, series in title_series_map.items():
        start_time = time.time()

        # Set the Series message fields.
        series.family = 'ScreenTime'
        # The name of a series is a CamelCaps version of the Task.title. E.g. 'Web'.
        series.name = "".join(title.title().split())
        series.unit = 'milliseconds'

        # Run a query to aggregate columns data. The SQL engine can do all the heavy
        # lifting, with the only processing of data required being the conversion of
        # Application.title to CamelCaps.
        # TODO(cec): What time zone does Timing.app store results in?
        cursor.execute(
            """
SELECT
  CAST(ROUND(AppActivity.startDate * 1000.0) AS int) as date,
  CAST(ROUND((AppActivity.endDate - AppActivity.startDate) * 1000.0) AS int) as value,
  Application.title as `group`
FROM
  AppActivity
LEFT JOIN 
  Application
  ON AppActivity.applicationID=AppActivity.id
LEFT JOIN
  Task ON AppActivity.taskID=Task.id
WHERE
  Task.title=?
""", (title, ))
        # Create Measurement protos for each of the returned rows.
        series.measurement.extend([
            me_pb2.Measurement(
                ms_since_unix_epoch=date,
                value=value,
                group="".join(group.title().split()) if group else "default",
                source='Timing.app',
            ) for date, value, group in cursor
        ])
        logging.info('Processed %s %s:%s measurements in %.3f seconds',
                     humanize.intcomma(len(series.measurement)), series.family,
                     series.name,
                     time.time() - start_time)

    return me_pb2.SeriesCollection(series=title_series_map.values())
Exemplo n.º 6
0
Arquivo: ynab.py Projeto: SpringRi/phd
def ProcessBudgetJsonFile(path: pathlib.Path) -> me_pb2.SeriesCollection:
    if not path.is_file():
        raise FileNotFoundError(str(path))
    try:
        return pbutil.RunProcessMessageInPlace([
            str(
                bazelutil.DataPath(
                    'phd/datasets/me_db/providers/ynab/json_budget_worker'))
        ], me_pb2.SeriesCollection(source=str(path)))
    except subprocess.CalledProcessError as e:
        raise importers.ImporterError('LifeCycle', path, str(e)) from e
Exemplo n.º 7
0
def ProcessInbox(inbox: pathlib.Path) -> me_pb2.SeriesCollection:
    """Process Timing.app data in an inbox.

  Args:
    inbox: The inbox path.

  Returns:
    A SeriesCollection message.
  """
    # Do nothing is there is no Timing.app database.
    if not (inbox / "timing" / "SQLite.db").is_file():
        return me_pb2.SeriesCollection()

    return ProcessDatabase(inbox / "timing" / "SQLite.db")
Exemplo n.º 8
0
def ProcessInbox(inbox: pathlib.Path) -> me_pb2.SeriesCollection:
    """Process Life Cycle data in an inbox.

  Args:
    inbox: The inbox path.

  Returns:
    A SeriesCollection message.
  """
    # Do nothing is there is no LC_export.zip file.
    if not (inbox / "life_cycle" / "LC_export.zip").is_file():
        return me_pb2.SeriesCollection()

    with tempfile.TemporaryDirectory(prefix="phd_") as d:
        temp_csv = pathlib.Path(d) / "LC_export.csv"
        with zipfile.ZipFile(inbox / "life_cycle" / "LC_export.zip") as z:
            with z.open("LC_export.csv") as csv_in:
                with open(temp_csv, "wb") as f:
                    f.write(csv_in.read())

        return ProcessCsvFile(temp_csv)
Exemplo n.º 9
0
def ProcessInbox(inbox: pathlib.Path) -> me_pb2.SeriesCollection:
    """Process a directory of HealthKit data.

  Args:
    inbox: The inbox path.

  Returns:
    A SeriesCollection message.
  """
    # Do nothing is there is there's no HealthKit export.zip file.
    if not (inbox / "health_kit" / "export.zip").is_file():
        return me_pb2.SeriesCollection()

    app.Log(1, "Unpacking %s", inbox / "health_kit" / "export.zip")
    with tempfile.TemporaryDirectory(prefix="phd_") as d:
        temp_xml = pathlib.Path(d) / "export.xml"
        with zipfile.ZipFile(inbox / "health_kit" / "export.zip") as z:
            with z.open("apple_health_export/export.xml") as xml_in:
                with open(temp_xml, "wb") as f:
                    f.write(xml_in.read())

        return ProcessXmlFile(temp_xml)
Exemplo n.º 10
0
def ProcessInbox(inbox: pathlib.Path) -> me_pb2.SeriesCollection:
    """Process a directory of HealthKit data.

  Args:
    inbox: The inbox path.

  Returns:
    A SeriesCollection message.
  """
    # Do nothing is there is there's no HealthKit export.zip file.
    if not (inbox / 'health_kit' / 'export.zip').is_file():
        return me_pb2.SeriesCollection()

    logging.info('Unpacking %s', inbox / 'health_kit' / 'export.zip')
    with tempfile.TemporaryDirectory(prefix='phd_') as d:
        temp_xml = pathlib.Path(d) / 'export.xml'
        with zipfile.ZipFile(inbox / 'health_kit' / 'export.zip') as z:
            with z.open('apple_health_export/export.xml') as xml_in:
                with open(temp_xml, 'wb') as f:
                    f.write(xml_in.read())

        return ProcessXmlFile(temp_xml)