Exemplo n.º 1
0
def plannerd_thread(sm=None, pm=None):
    config_realtime_process(5, Priority.CTRL_LOW)

    cloudlog.info("plannerd is waiting for CarParams")
    params = Params()
    CP = car.CarParams.from_bytes(params.get("CarParams", block=True))
    cloudlog.info("plannerd got CarParams: %s", CP.carName)

    use_lanelines = False
    wide_camera = params.get_bool('WideCameraOnly')

    cloudlog.event("e2e mode", on=use_lanelines)

    longitudinal_planner = Planner(CP)
    lateral_planner = LateralPlanner(use_lanelines=use_lanelines,
                                     wide_camera=wide_camera)

    if sm is None:
        sm = messaging.SubMaster(
            ['carState', 'controlsState', 'radarState', 'modelV2'],
            poll=['radarState', 'modelV2'],
            ignore_avg_freq=['radarState'])

    if pm is None:
        pm = messaging.PubMaster(['longitudinalPlan', 'lateralPlan'])

    while True:
        sm.update()

        if sm.updated['modelV2']:
            lateral_planner.update(sm)
            lateral_planner.publish(sm, pm)
            longitudinal_planner.update(sm)
            longitudinal_planner.publish(sm, pm)
Exemplo n.º 2
0
def finalize_update(wait_helper: WaitTimeHelper) -> None:
    """Take the current OverlayFS merged view and finalize a copy outside of
  OverlayFS, ready to be swapped-in at BASEDIR. Copy using shutil.copytree"""

    # Remove the update ready flag and any old updates
    cloudlog.info("creating finalized version of the overlay")
    set_consistent_flag(False)

    # Copy the merged overlay view and set the update ready flag
    if os.path.exists(FINALIZED):
        shutil.rmtree(FINALIZED)
    shutil.copytree(OVERLAY_MERGED, FINALIZED, symlinks=True)

    run(["git", "reset", "--hard"], FINALIZED)
    run(["git", "submodule", "foreach", "--recursive", "git", "reset"],
        FINALIZED)

    cloudlog.info("Starting git gc")
    t = time.monotonic()
    try:
        run(["git", "gc"], FINALIZED)
        cloudlog.event("Done git gc", duration=time.monotonic() - t)
    except subprocess.CalledProcessError:
        cloudlog.exception(f"Failed git gc, took {time.monotonic() - t:.3f} s")

    if wait_helper.shutdown:
        cloudlog.info("got interrupted finalizing overlay")
    else:
        set_consistent_flag(True)
        cloudlog.info("done finalizing overlay")
Exemplo n.º 3
0
def _do_upload(upload_item, callback=None):
    path = upload_item.path
    compress = False

    # If file does not exist, but does exist without the .bz2 extension we will compress on the fly
    if not os.path.exists(path) and os.path.exists(strip_bz2_extension(path)):
        path = strip_bz2_extension(path)
        compress = True

    with open(path, "rb") as f:
        if compress:
            cloudlog.event("athena.upload_handler.compress",
                           fn=path,
                           fn_orig=upload_item.path)
            data = bz2.compress(f.read())
            size = len(data)
            data = io.BytesIO(data)
        else:
            size = os.fstat(f.fileno()).st_size
            data = f

        if callback:
            data = CallbackReader(data, callback, size)

        return requests.put(upload_item.url,
                            data=data,
                            headers={
                                **upload_item.headers, 'Content-Length':
                                str(size)
                            },
                            timeout=30)
Exemplo n.º 4
0
    def upload(self, name, key, fn, network_type, metered):
        try:
            sz = os.path.getsize(fn)
        except OSError:
            cloudlog.exception("upload: getsize failed")
            return False

        cloudlog.event("upload_start",
                       key=key,
                       fn=fn,
                       sz=sz,
                       network_type=network_type,
                       metered=metered)

        if sz == 0:
            # tag files of 0 size as uploaded
            success = True
        elif name in self.immediate_priority and sz > UPLOAD_QLOG_QCAM_MAX_SIZE:
            cloudlog.event("uploader_too_large", key=key, fn=fn, sz=sz)
            success = True
        else:
            start_time = time.monotonic()
            stat = self.normal_upload(key, fn)
            if stat is not None and stat.status_code in (200, 201, 401, 403,
                                                         412):
                self.last_filename = fn
                self.last_time = time.monotonic() - start_time
                self.last_speed = (sz / 1e6) / self.last_time
                success = True
                cloudlog.event("upload_success" if stat.status_code != 412 else
                               "upload_ignored",
                               key=key,
                               fn=fn,
                               sz=sz,
                               network_type=network_type,
                               metered=metered)
            else:
                success = False
                cloudlog.event("upload_failed",
                               stat=stat,
                               exc=self.last_exc,
                               key=key,
                               fn=fn,
                               sz=sz,
                               network_type=network_type,
                               metered=metered)

        if success:
            # tag file as uploaded
            try:
                setxattr(fn, UPLOAD_ATTR_NAME, UPLOAD_ATTR_VALUE)
            except OSError:
                cloudlog.event("uploader_setxattr_failed",
                               exc=self.last_exc,
                               key=key,
                               fn=fn,
                               sz=sz)

        return success
Exemplo n.º 5
0
def main():
    try:
        set_core_affinity([0, 1, 2, 3])
    except Exception:
        cloudlog.exception("failed to set core affinity")

    params = Params()
    dongle_id = params.get("DongleId", encoding='utf-8')
    UploadQueueCache.initialize(upload_queue)

    ws_uri = ATHENA_HOST + "/ws/v2/" + dongle_id
    api = Api(dongle_id)

    conn_retries = 0
    while 1:
        try:
            cloudlog.event("athenad.main.connecting_ws", ws_uri=ws_uri)
            ws = create_connection(ws_uri,
                                   cookie="jwt=" + api.get_token(),
                                   enable_multithread=True,
                                   timeout=30.0)
            cloudlog.event("athenad.main.connected_ws", ws_uri=ws_uri)

            conn_retries = 0
            cur_upload_items.clear()

            handle_long_poll(ws)
        except (KeyboardInterrupt, SystemExit):
            break
        except (ConnectionError, TimeoutError, WebSocketException):
            conn_retries += 1
            params.delete("LastAthenaPingTime")
        except socket.timeout:
            params.delete("LastAthenaPingTime")
        except Exception:
            cloudlog.exception("athenad.main.exception")

            conn_retries += 1
            params.delete("LastAthenaPingTime")

        time.sleep(backoff(conn_retries))
Exemplo n.º 6
0
    def list_upload_files(self):
        if not os.path.isdir(self.root):
            return

        self.immediate_size = 0
        self.immediate_count = 0

        for logname in listdir_by_creation(self.root):
            path = os.path.join(self.root, logname)
            try:
                names = os.listdir(path)
            except OSError:
                continue

            if any(name.endswith(".lock") for name in names):
                continue

            for name in sorted(names, key=self.get_upload_sort):
                key = os.path.join(logname, name)
                fn = os.path.join(path, name)
                # skip files already uploaded
                try:
                    is_uploaded = getxattr(fn, UPLOAD_ATTR_NAME)
                except OSError:
                    cloudlog.event("uploader_getxattr_failed",
                                   exc=self.last_exc,
                                   key=key,
                                   fn=fn)
                    is_uploaded = True  # deleter could have deleted
                if is_uploaded:
                    continue

                try:
                    if name in self.immediate_priority:
                        self.immediate_count += 1
                        self.immediate_size += os.path.getsize(fn)
                except OSError:
                    pass

                yield (name, key, fn)
Exemplo n.º 7
0
def main() -> NoReturn:
  dongle_id = Params().get("DongleId", encoding='utf-8')
  def get_influxdb_line(measurement: str, value: Union[float, Dict[str, float]],  timestamp: datetime, tags: dict) -> str:
    res = f"{measurement}"
    for k, v in tags.items():
      res += f",{k}={str(v)}"
    res += " "

    if isinstance(value, float):
      value = {'value': value}

    for k, v in value.items():
      res += f"{k}={v},"

    res += f"dongle_id=\"{dongle_id}\" {int(timestamp.timestamp() * 1e9)}\n"
    return res

  # open statistics socket
  ctx = zmq.Context().instance()
  sock = ctx.socket(zmq.PULL)
  sock.bind(STATS_SOCKET)

  # initialize stats directory
  Path(STATS_DIR).mkdir(parents=True, exist_ok=True)

  # initialize tags
  tags = {
    'started': False,
    'version': get_short_version(),
    'branch': get_short_branch(),
    'dirty': is_dirty(),
    'origin': get_normalized_origin(),
    'deviceType': HARDWARE.get_device_type(),
  }

  # subscribe to deviceState for started state
  sm = SubMaster(['deviceState'])

  idx = 0
  last_flush_time = time.monotonic()
  gauges = {}
  samples: Dict[str, List[float]] = defaultdict(list)
  while True:
    started_prev = sm['deviceState'].started
    sm.update()

    # Update metrics
    while True:
      try:
        metric = sock.recv_string(zmq.NOBLOCK)
        try:
          metric_type = metric.split('|')[1]
          metric_name = metric.split(':')[0]
          metric_value = float(metric.split('|')[0].split(':')[1])

          if metric_type == METRIC_TYPE.GAUGE:
            gauges[metric_name] = metric_value
          elif metric_type == METRIC_TYPE.SAMPLE:
            samples[metric_name].append(metric_value)
          else:
            cloudlog.event("unknown metric type", metric_type=metric_type)
        except Exception:
          cloudlog.event("malformed metric", metric=metric)
      except zmq.error.Again:
        break

    # flush when started state changes or after FLUSH_TIME_S
    if (time.monotonic() > last_flush_time + STATS_FLUSH_TIME_S) or (sm['deviceState'].started != started_prev):
      result = ""
      current_time = datetime.utcnow().replace(tzinfo=timezone.utc)
      tags['started'] = sm['deviceState'].started

      for key, value in gauges.items():
        result += get_influxdb_line(f"gauge.{key}", value, current_time, tags)

      for key, values in samples.items():
        values.sort()
        sample_count = len(values)
        sample_sum = sum(values)

        stats = {
          'count': sample_count,
          'min': values[0],
          'max': values[-1],
          'mean': sample_sum / sample_count,
        }
        for percentile in [0.05, 0.5, 0.95]:
          value = values[int(round(percentile * (sample_count - 1)))]
          stats[f"p{int(percentile * 100)}"] = value

        result += get_influxdb_line(f"sample.{key}", stats, current_time, tags)

      # clear intermediate data
      gauges.clear()
      samples.clear()
      last_flush_time = time.monotonic()

      # check that we aren't filling up the drive
      if len(os.listdir(STATS_DIR)) < STATS_DIR_FILE_LIMIT:
        if len(result) > 0:
          stats_path = os.path.join(STATS_DIR, f"{current_time.timestamp():.0f}_{idx}")
          with atomic_write_in_dir(stats_path) as f:
            f.write(result)
          idx += 1
      else:
        cloudlog.error("stats dir full")
Exemplo n.º 8
0
    def update_events(self, CS):
        """Compute carEvents from carState"""

        self.events.clear()

        # Add startup event
        if self.startup_event is not None:
            self.events.add(self.startup_event)
            self.startup_event = None

        # Don't add any more events if not initialized
        if not self.initialized:
            self.events.add(EventName.controlsInitializing)
            return

        # Disable on rising edge of accelerator or brake. Also disable on brake when speed > 0
        if (CS.gasPressed and not self.CS_prev.gasPressed and self.disengage_on_accelerator) or \
          (CS.brakePressed and (not self.CS_prev.brakePressed or not CS.standstill)):
            self.events.add(EventName.pedalPressed)

        if CS.gasPressed:
            self.events.add(
                EventName.pedalPressedPreEnable if self.
                disengage_on_accelerator else EventName.gasPressedOverride)

        if not self.CP.notCar:
            self.events.add_from_msg(self.sm['driverMonitoringState'].events)

        # Handle car events. Ignore when CAN is invalid
        if CS.canTimeout:
            self.events.add(EventName.canBusMissing)
        elif not CS.canValid:
            self.events.add(EventName.canError)
        else:
            self.events.add_from_msg(CS.events)

        # Create events for temperature, disk space, and memory
        if self.sm['deviceState'].thermalStatus >= ThermalStatus.red:
            self.events.add(EventName.overheat)
        if self.sm['deviceState'].freeSpacePercent < 7 and not SIMULATION:
            # under 7% of space free no enable allowed
            self.events.add(EventName.outOfSpace)
        # TODO: make tici threshold the same
        if self.sm['deviceState'].memoryUsagePercent > 90 and not SIMULATION:
            self.events.add(EventName.lowMemory)

        # TODO: enable this once loggerd CPU usage is more reasonable
        #cpus = list(self.sm['deviceState'].cpuUsagePercent)
        #if max(cpus, default=0) > 95 and not SIMULATION:
        #  self.events.add(EventName.highCpuUsage)

        # Alert if fan isn't spinning for 5 seconds
        if self.sm['peripheralState'].pandaType == PandaType.dos:
            if self.sm['peripheralState'].fanSpeedRpm == 0 and self.sm[
                    'deviceState'].fanSpeedPercentDesired > 50:
                if (self.sm.frame -
                        self.last_functional_fan_frame) * DT_CTRL > 5.0:
                    self.events.add(EventName.fanMalfunction)
            else:
                self.last_functional_fan_frame = self.sm.frame

        # Handle calibration status
        cal_status = self.sm['liveCalibration'].calStatus
        if cal_status != Calibration.CALIBRATED:
            if cal_status == Calibration.UNCALIBRATED:
                self.events.add(EventName.calibrationIncomplete)
            else:
                self.events.add(EventName.calibrationInvalid)

        # Handle lane change
        if self.sm[
                'lateralPlan'].laneChangeState == LaneChangeState.preLaneChange:
            direction = self.sm['lateralPlan'].laneChangeDirection
            if (CS.leftBlindspot and direction == LaneChangeDirection.left) or \
               (CS.rightBlindspot and direction == LaneChangeDirection.right):
                self.events.add(EventName.laneChangeBlocked)
            else:
                if direction == LaneChangeDirection.left:
                    self.events.add(EventName.preLaneChangeLeft)
                else:
                    self.events.add(EventName.preLaneChangeRight)
        elif self.sm['lateralPlan'].laneChangeState in (
                LaneChangeState.laneChangeStarting,
                LaneChangeState.laneChangeFinishing):
            self.events.add(EventName.laneChange)

        for i, pandaState in enumerate(self.sm['pandaStates']):
            # All pandas must match the list of safetyConfigs, and if outside this list, must be silent or noOutput
            if i < len(self.CP.safetyConfigs):
                safety_mismatch = pandaState.safetyModel != self.CP.safetyConfigs[i].safetyModel or \
                                  pandaState.safetyParam != self.CP.safetyConfigs[i].safetyParam or \
                                  pandaState.alternativeExperience != self.CP.alternativeExperience
            else:
                safety_mismatch = pandaState.safetyModel not in IGNORED_SAFETY_MODES

            if safety_mismatch or self.mismatch_counter >= 200:
                self.events.add(EventName.controlsMismatch)

            if log.PandaState.FaultType.relayMalfunction in pandaState.faults:
                self.events.add(EventName.relayMalfunction)

        # Handle HW and system malfunctions
        # Order is very intentional here. Be careful when modifying this.
        # All events here should at least have NO_ENTRY and SOFT_DISABLE.
        num_events = len(self.events)

        not_running = {
            p.name
            for p in self.sm['managerState'].processes
            if not p.running and p.shouldBeRunning
        }
        if self.sm.rcv_frame['managerState'] and (not_running -
                                                  IGNORE_PROCESSES):
            self.events.add(EventName.processNotRunning)
        else:
            if not SIMULATION and not self.rk.lagging:
                if not self.sm.all_alive(self.camera_packets):
                    self.events.add(EventName.cameraMalfunction)
                elif not self.sm.all_freq_ok(self.camera_packets):
                    self.events.add(EventName.cameraFrameRate)
        if self.rk.lagging:
            self.events.add(EventName.controlsdLagging)
        if len(self.sm['radarState'].radarErrors):
            self.events.add(EventName.radarFault)
        if not self.sm.valid['pandaStates']:
            self.events.add(EventName.usbError)

        # generic catch-all. ideally, a more specific event should be added above instead
        no_system_errors = len(self.events) != num_events
        if (not self.sm.all_checks() or self.can_rcv_error
            ) and no_system_errors and CS.canValid and not CS.canTimeout:
            if not self.sm.all_alive():
                self.events.add(EventName.commIssue)
            elif not self.sm.all_freq_ok():
                self.events.add(EventName.commIssueAvgFreq)
            else:  # invalid or can_rcv_error.
                self.events.add(EventName.commIssue)

            logs = {
                'invalid':
                [s for s, valid in self.sm.valid.items() if not valid],
                'not_alive':
                [s for s, alive in self.sm.alive.items() if not alive],
                'not_freq_ok':
                [s for s, freq_ok in self.sm.freq_ok.items() if not freq_ok],
                'can_error':
                self.can_rcv_error,
            }
            if logs != self.logged_comm_issue:
                cloudlog.event("commIssue", error=True, **logs)
                self.logged_comm_issue = logs
        else:
            self.logged_comm_issue = None

        if not self.sm['liveParameters'].valid:
            self.events.add(EventName.vehicleModelInvalid)
        if not self.sm['lateralPlan'].mpcSolutionValid:
            self.events.add(EventName.plannerError)
        if not self.sm['liveLocationKalman'].sensorsOK and not NOSENSOR:
            if self.sm.frame > 5 / DT_CTRL:  # Give locationd some time to receive all the inputs
                self.events.add(EventName.sensorDataInvalid)
        if not self.sm['liveLocationKalman'].posenetOK:
            self.events.add(EventName.posenetInvalid)
        if not self.sm['liveLocationKalman'].deviceStable:
            self.events.add(EventName.deviceFalling)

        if not REPLAY:
            # Check for mismatch between openpilot and car's PCM
            cruise_mismatch = CS.cruiseState.enabled and (
                not self.enabled or not self.CP.pcmCruise)
            self.cruise_mismatch_counter = self.cruise_mismatch_counter + 1 if cruise_mismatch else 0
            if self.cruise_mismatch_counter > int(6. / DT_CTRL):
                self.events.add(EventName.cruiseMismatch)

        # Check for FCW
        stock_long_is_braking = self.enabled and not self.CP.openpilotLongitudinalControl and CS.aEgo < -1.25
        model_fcw = self.sm[
            'modelV2'].meta.hardBrakePredicted and not CS.brakePressed and not stock_long_is_braking
        planner_fcw = self.sm['longitudinalPlan'].fcw and self.enabled
        if planner_fcw or model_fcw:
            self.events.add(EventName.fcw)

        for m in messaging.drain_sock(self.log_sock, wait_for_one=False):
            try:
                msg = m.androidLog.message
                if any(err in msg
                       for err in ("ERROR_CRC", "ERROR_ECC",
                                   "ERROR_STREAM_UNDERFLOW", "APPLY FAILED")):
                    csid = msg.split("CSID:")[-1].split(" ")[0]
                    evt = CSID_MAP.get(csid, None)
                    if evt is not None:
                        self.events.add(evt)
            except UnicodeDecodeError:
                pass

        # TODO: fix simulator
        if not SIMULATION:
            if not NOSENSOR:
                if not self.sm['liveLocationKalman'].gpsOK and (
                        self.distance_traveled > 1000):
                    # Not show in first 1 km to allow for driving out of garage. This event shows after 5 minutes
                    self.events.add(EventName.noGps)

            if self.sm['modelV2'].frameDropPerc > 20:
                self.events.add(EventName.modeldLagging)
            if self.sm['liveLocationKalman'].excessiveResets:
                self.events.add(EventName.localizerMalfunction)

        # Only allow engagement with brake pressed when stopped behind another stopped car
        speeds = self.sm['longitudinalPlan'].speeds
        if len(speeds) > 1:
            v_future = speeds[-1]
        else:
            v_future = 100.0
        if CS.brakePressed and v_future >= self.CP.vEgoStarting \
          and self.CP.openpilotLongitudinalControl and CS.vEgo < 0.3:
            self.events.add(EventName.noTarget)
Exemplo n.º 9
0
def main() -> None:
    params = Params()

    if params.get_bool("DisableUpdates"):
        cloudlog.warning("updates are disabled by the DisableUpdates param")
        exit(0)

    ov_lock_fd = open(LOCK_FILE, 'w')
    try:
        fcntl.flock(ov_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except OSError as e:
        raise RuntimeError(
            "couldn't get overlay lock; is another instance running?") from e

    # Set low io priority
    proc = psutil.Process()
    if psutil.LINUX:
        proc.ionice(psutil.IOPRIO_CLASS_BE, value=7)

    # Check if we just performed an update
    if Path(os.path.join(STAGING_ROOT, "old_openpilot")).is_dir():
        cloudlog.event("update installed")

    if not params.get("InstallDate"):
        t = datetime.datetime.utcnow().isoformat()
        params.put("InstallDate", t.encode('utf8'))

    overlay_init = Path(os.path.join(BASEDIR, ".overlay_init"))
    overlay_init.unlink(missing_ok=True)

    update_failed_count = 0  # TODO: Load from param?
    wait_helper = WaitTimeHelper(proc)

    # Run the update loop
    while not wait_helper.shutdown:
        wait_helper.ready_event.clear()

        # Attempt an update
        exception = None
        new_version = False
        update_failed_count += 1
        try:
            init_overlay()

            # TODO: still needed? skip this and just fetch?
            # Lightweight internt check
            internet_ok, update_available = check_for_update()
            if internet_ok and not update_available:
                update_failed_count = 0

            # Fetch update
            if internet_ok:
                new_version = fetch_update(wait_helper)
                update_failed_count = 0
        except subprocess.CalledProcessError as e:
            cloudlog.event("update process failed",
                           cmd=e.cmd,
                           output=e.output,
                           returncode=e.returncode)
            exception = f"command failed: {e.cmd}\n{e.output}"
            overlay_init.unlink(missing_ok=True)
        except Exception as e:
            cloudlog.exception("uncaught updated exception, shouldn't happen")
            exception = str(e)
            overlay_init.unlink(missing_ok=True)

        if not wait_helper.shutdown:
            try:
                set_params(new_version, update_failed_count, exception)
            except Exception:
                cloudlog.exception(
                    "uncaught updated exception while setting params, shouldn't happen"
                )

        # infrequent attempts if we successfully updated recently
        wait_helper.sleep(5 * 60 if update_failed_count > 0 else 90 * 60)

    dismount_overlay()
Exemplo n.º 10
0
def fingerprint(logcan, sendcan):
  fixed_fingerprint = os.environ.get('FINGERPRINT', "")
  skip_fw_query = os.environ.get('SKIP_FW_QUERY', False)
  ecu_responses = set()

  if not fixed_fingerprint and not skip_fw_query:
    # Vin query only reliably works thorugh OBDII
    bus = 1

    cached_params = Params().get("CarParamsCache")
    if cached_params is not None:
      cached_params = car.CarParams.from_bytes(cached_params)
      if cached_params.carName == "mock":
        cached_params = None

    if cached_params is not None and len(cached_params.carFw) > 0 and cached_params.carVin is not VIN_UNKNOWN:
      cloudlog.warning("Using cached CarParams")
      vin = cached_params.carVin
      car_fw = list(cached_params.carFw)
    else:
      cloudlog.warning("Getting VIN & FW versions")
      _, vin = get_vin(logcan, sendcan, bus)
      ecu_responses = get_present_ecus(logcan, sendcan)
      car_fw = get_fw_versions(logcan, sendcan)

    exact_fw_match, fw_candidates = match_fw_to_car(car_fw)
  else:
    vin = VIN_UNKNOWN
    exact_fw_match, fw_candidates, car_fw = True, set(), []

  if len(vin) != 17:
    cloudlog.event("Malformed VIN", vin=vin, error=True)
    vin = VIN_UNKNOWN
  cloudlog.warning("VIN %s", vin)
  Params().put("CarVin", vin)

  finger = gen_empty_fingerprint()
  candidate_cars = {i: all_legacy_fingerprint_cars() for i in [0, 1]}  # attempt fingerprint on both bus 0 and 1
  frame = 0
  frame_fingerprint = 25  # 0.25s
  car_fingerprint = None
  done = False

  # drain CAN socket so we always get the latest messages
  messaging.drain_sock_raw(logcan)

  while not done:
    a = get_one_can(logcan)

    for can in a.can:
      # The fingerprint dict is generated for all buses, this way the car interface
      # can use it to detect a (valid) multipanda setup and initialize accordingly
      if can.src < 128:
        if can.src not in finger:
          finger[can.src] = {}
        finger[can.src][can.address] = len(can.dat)

      for b in candidate_cars:
        # Ignore extended messages and VIN query response.
        if can.src == b and can.address < 0x800 and can.address not in (0x7df, 0x7e0, 0x7e8):
          candidate_cars[b] = eliminate_incompatible_cars(can, candidate_cars[b])

    # if we only have one car choice and the time since we got our first
    # message has elapsed, exit
    for b in candidate_cars:
      if len(candidate_cars[b]) == 1 and frame > frame_fingerprint:
        # fingerprint done
        car_fingerprint = candidate_cars[b][0]

    # bail if no cars left or we've been waiting for more than 2s
    failed = (all(len(cc) == 0 for cc in candidate_cars.values()) and frame > frame_fingerprint) or frame > 200
    succeeded = car_fingerprint is not None
    done = failed or succeeded

    frame += 1

  exact_match = True
  source = car.CarParams.FingerprintSource.can

  # If FW query returns exactly 1 candidate, use it
  if len(fw_candidates) == 1:
    car_fingerprint = list(fw_candidates)[0]
    source = car.CarParams.FingerprintSource.fw
    exact_match = exact_fw_match

  if fixed_fingerprint:
    car_fingerprint = fixed_fingerprint
    source = car.CarParams.FingerprintSource.fixed

  cloudlog.event("fingerprinted", car_fingerprint=car_fingerprint, source=source, fuzzy=not exact_match,
                 fw_count=len(car_fw), ecu_responses=ecu_responses, error=True)
  return car_fingerprint, finger, vin, car_fw, source, exact_match
Exemplo n.º 11
0
def upload_handler(end_event: threading.Event) -> None:
    sm = messaging.SubMaster(['deviceState'])
    tid = threading.get_ident()

    while not end_event.is_set():
        cur_upload_items[tid] = None

        try:
            cur_upload_items[tid] = upload_queue.get(timeout=1)._replace(
                current=True)

            if cur_upload_items[tid].id in cancelled_uploads:
                cancelled_uploads.remove(cur_upload_items[tid].id)
                continue

            # Remove item if too old
            age = datetime.now() - datetime.fromtimestamp(
                cur_upload_items[tid].created_at / 1000)
            if age.total_seconds() > MAX_AGE:
                cloudlog.event("athena.upload_handler.expired",
                               item=cur_upload_items[tid],
                               error=True)
                continue

            # Check if uploading over metered connection is allowed
            sm.update(0)
            metered = sm['deviceState'].networkMetered
            network_type = sm['deviceState'].networkType.raw
            if metered and (not cur_upload_items[tid].allow_cellular):
                retry_upload(tid, end_event, False)
                continue

            try:

                def cb(sz, cur):
                    # Abort transfer if connection changed to metered after starting upload
                    sm.update(0)
                    metered = sm['deviceState'].networkMetered
                    if metered and (not cur_upload_items[tid].allow_cellular):
                        raise AbortTransferException

                    cur_upload_items[tid] = cur_upload_items[tid]._replace(
                        progress=cur / sz if sz else 1)

                fn = cur_upload_items[tid].path
                try:
                    sz = os.path.getsize(fn)
                except OSError:
                    sz = -1

                cloudlog.event("athena.upload_handler.upload_start",
                               fn=fn,
                               sz=sz,
                               network_type=network_type,
                               metered=metered,
                               retry_count=cur_upload_items[tid].retry_count)
                response = _do_upload(cur_upload_items[tid], cb)

                if response.status_code not in (200, 201, 401, 403, 412):
                    cloudlog.event("athena.upload_handler.retry",
                                   status_code=response.status_code,
                                   fn=fn,
                                   sz=sz,
                                   network_type=network_type,
                                   metered=metered)
                    retry_upload(tid, end_event)
                else:
                    cloudlog.event("athena.upload_handler.success",
                                   fn=fn,
                                   sz=sz,
                                   network_type=network_type,
                                   metered=metered)

                UploadQueueCache.cache(upload_queue)
            except (requests.exceptions.Timeout,
                    requests.exceptions.ConnectionError,
                    requests.exceptions.SSLError):
                cloudlog.event("athena.upload_handler.timeout",
                               fn=fn,
                               sz=sz,
                               network_type=network_type,
                               metered=metered)
                retry_upload(tid, end_event)
            except AbortTransferException:
                cloudlog.event("athena.upload_handler.abort",
                               fn=fn,
                               sz=sz,
                               network_type=network_type,
                               metered=metered)
                retry_upload(tid, end_event, False)

        except queue.Empty:
            pass
        except Exception:
            cloudlog.exception("athena.upload_handler.exception")
Exemplo n.º 12
0
def main() -> NoReturn:
    first_run = True
    params = Params()

    while True:
        try:
            params.delete("PandaSignatures")

            # Flash all Pandas in DFU mode
            for p in PandaDFU.list():
                cloudlog.info(
                    f"Panda in DFU mode found, flashing recovery {p}")
                PandaDFU(p).recover()
            time.sleep(1)

            panda_serials = Panda.list()
            if len(panda_serials) == 0:
                if first_run:
                    cloudlog.info("Resetting internal panda")
                    HARDWARE.reset_internal_panda()
                    time.sleep(2)  # wait to come back up
                continue

            cloudlog.info(
                f"{len(panda_serials)} panda(s) found, connecting - {panda_serials}"
            )

            # Flash pandas
            pandas: List[Panda] = []
            for serial in panda_serials:
                pandas.append(flash_panda(serial))

            # check health for lost heartbeat
            for panda in pandas:
                health = panda.health()
                if health["heartbeat_lost"]:
                    params.put_bool("PandaHeartbeatLost", True)
                    cloudlog.event("heartbeat lost",
                                   deviceState=health,
                                   serial=panda.get_usb_serial())

                if first_run:
                    cloudlog.info(f"Resetting panda {panda.get_usb_serial()}")
                    panda.reset()

            # sort pandas to have deterministic order
            pandas.sort(key=cmp_to_key(panda_sort_cmp))
            panda_serials = list(map(lambda p: p.get_usb_serial(),
                                     pandas))  # type: ignore

            # log panda fw versions
            params.put("PandaSignatures",
                       b','.join(p.get_signature() for p in pandas))

            # close all pandas
            for p in pandas:
                p.close()
        except (usb1.USBErrorNoDevice, usb1.USBErrorPipe):
            # a panda was disconnected while setting everything up. let's try again
            cloudlog.exception("Panda USB exception while setting up")
            continue

        first_run = False

        # run boardd with all connected serials as arguments
        os.environ['MANAGER_DAEMON'] = 'boardd'
        os.chdir(os.path.join(BASEDIR, "selfdrive/boardd"))
        subprocess.run(["./boardd", *panda_serials], check=True)
Exemplo n.º 13
0
def thermald_thread(end_event, hw_queue):
    pm = messaging.PubMaster(['deviceState'])
    sm = messaging.SubMaster([
        "peripheralState", "gpsLocationExternal", "controlsState",
        "pandaStates"
    ],
                             poll=["pandaStates"])

    count = 0

    onroad_conditions: Dict[str, bool] = {
        "ignition": False,
    }
    startup_conditions: Dict[str, bool] = {}
    startup_conditions_prev: Dict[str, bool] = {}

    off_ts = None
    started_ts = None
    started_seen = False
    thermal_status = ThermalStatus.green

    last_hw_state = HardwareState(
        network_type=NetworkType.none,
        network_metered=False,
        network_strength=NetworkStrength.unknown,
        network_info=None,
        nvme_temps=[],
        modem_temps=[],
    )

    current_filter = FirstOrderFilter(0., CURRENT_TAU, DT_TRML)
    temp_filter = FirstOrderFilter(0., TEMP_TAU, DT_TRML)
    should_start_prev = False
    in_car = False
    engaged_prev = False

    params = Params()
    power_monitor = PowerMonitoring()

    HARDWARE.initialize_hardware()
    thermal_config = HARDWARE.get_thermal_config()

    fan_controller = None

    while not end_event.is_set():
        sm.update(PANDA_STATES_TIMEOUT)

        pandaStates = sm['pandaStates']
        peripheralState = sm['peripheralState']

        msg = read_thermal(thermal_config)

        if sm.updated['pandaStates'] and len(pandaStates) > 0:

            # Set ignition based on any panda connected
            onroad_conditions["ignition"] = any(
                ps.ignitionLine or ps.ignitionCan for ps in pandaStates
                if ps.pandaType != log.PandaState.PandaType.unknown)

            pandaState = pandaStates[0]

            in_car = pandaState.harnessStatus != log.PandaState.HarnessStatus.notConnected

            # Setup fan handler on first connect to panda
            if fan_controller is None and peripheralState.pandaType != log.PandaState.PandaType.unknown:
                if TICI:
                    fan_controller = TiciFanController()

        elif (sec_since_boot() -
              sm.rcv_time['pandaStates']) > DISCONNECT_TIMEOUT:
            if onroad_conditions["ignition"]:
                onroad_conditions["ignition"] = False
                cloudlog.error("panda timed out onroad")

        try:
            last_hw_state = hw_queue.get_nowait()
        except queue.Empty:
            pass

        msg.deviceState.freeSpacePercent = get_available_percent(default=100.0)
        msg.deviceState.memoryUsagePercent = int(
            round(psutil.virtual_memory().percent))
        msg.deviceState.cpuUsagePercent = [
            int(round(n)) for n in psutil.cpu_percent(percpu=True)
        ]
        msg.deviceState.gpuUsagePercent = int(
            round(HARDWARE.get_gpu_usage_percent()))

        msg.deviceState.networkType = last_hw_state.network_type
        msg.deviceState.networkMetered = last_hw_state.network_metered
        msg.deviceState.networkStrength = last_hw_state.network_strength
        if last_hw_state.network_info is not None:
            msg.deviceState.networkInfo = last_hw_state.network_info

        msg.deviceState.nvmeTempC = last_hw_state.nvme_temps
        msg.deviceState.modemTempC = last_hw_state.modem_temps

        msg.deviceState.screenBrightnessPercent = HARDWARE.get_screen_brightness(
        )
        msg.deviceState.usbOnline = HARDWARE.get_usb_present()
        current_filter.update(msg.deviceState.batteryCurrent / 1e6)

        max_comp_temp = temp_filter.update(
            max(max(msg.deviceState.cpuTempC), msg.deviceState.memoryTempC,
                max(msg.deviceState.gpuTempC)))

        if fan_controller is not None:
            msg.deviceState.fanSpeedPercentDesired = fan_controller.update(
                max_comp_temp, onroad_conditions["ignition"])

        is_offroad_for_5_min = (started_ts is None) and (
            (not started_seen) or (off_ts is None) or
            (sec_since_boot() - off_ts > 60 * 5))
        if is_offroad_for_5_min and max_comp_temp > OFFROAD_DANGER_TEMP:
            # If device is offroad we want to cool down before going onroad
            # since going onroad increases load and can make temps go over 107
            thermal_status = ThermalStatus.danger
        else:
            current_band = THERMAL_BANDS[thermal_status]
            band_idx = list(THERMAL_BANDS.keys()).index(thermal_status)
            if current_band.min_temp is not None and max_comp_temp < current_band.min_temp:
                thermal_status = list(THERMAL_BANDS.keys())[band_idx - 1]
            elif current_band.max_temp is not None and max_comp_temp > current_band.max_temp:
                thermal_status = list(THERMAL_BANDS.keys())[band_idx + 1]

        # **** starting logic ****

        # Ensure date/time are valid
        now = datetime.datetime.utcnow()
        startup_conditions["time_valid"] = (now.year > 2020) or (
            now.year == 2020 and now.month >= 10)
        set_offroad_alert_if_changed("Offroad_InvalidTime",
                                     (not startup_conditions["time_valid"]))

        startup_conditions["up_to_date"] = params.get(
            "Offroad_ConnectivityNeeded") is None or params.get_bool(
                "DisableUpdates") or params.get_bool("SnoozeUpdate")
        startup_conditions["not_uninstalling"] = not params.get_bool(
            "DoUninstall")
        startup_conditions["accepted_terms"] = params.get(
            "HasAcceptedTerms") == terms_version

        # with 2% left, we killall, otherwise the phone will take a long time to boot
        startup_conditions["free_space"] = msg.deviceState.freeSpacePercent > 2
        startup_conditions["completed_training"] = params.get("CompletedTrainingVersion") == training_version or \
                                                   params.get_bool("Passive")
        startup_conditions["not_driver_view"] = not params.get_bool(
            "IsDriverViewEnabled")
        startup_conditions["not_taking_snapshot"] = not params.get_bool(
            "IsTakingSnapshot")
        # if any CPU gets above 107 or the battery gets above 63, kill all processes
        # controls will warn with CPU above 95 or battery above 60
        onroad_conditions[
            "device_temp_good"] = thermal_status < ThermalStatus.danger
        set_offroad_alert_if_changed(
            "Offroad_TemperatureTooHigh",
            (not onroad_conditions["device_temp_good"]))

        # TODO: this should move to TICI.initialize_hardware, but we currently can't import params there
        if TICI:
            if not os.path.isfile("/persist/comma/living-in-the-moment"):
                if not Path("/data/media").is_mount():
                    set_offroad_alert_if_changed("Offroad_StorageMissing",
                                                 True)
                else:
                    # check for bad NVMe
                    try:
                        with open("/sys/block/nvme0n1/device/model") as f:
                            model = f.read().strip()
                        if not model.startswith(
                                "Samsung SSD 980") and params.get(
                                    "Offroad_BadNvme") is None:
                            set_offroad_alert_if_changed(
                                "Offroad_BadNvme", True)
                            cloudlog.event("Unsupported NVMe",
                                           model=model,
                                           error=True)
                    except Exception:
                        pass

        # Handle offroad/onroad transition
        should_start = all(onroad_conditions.values())
        if started_ts is None:
            should_start = should_start and all(startup_conditions.values())

        if should_start != should_start_prev or (count == 0):
            params.put_bool("IsOnroad", should_start)
            params.put_bool("IsOffroad", not should_start)

            params.put_bool("IsEngaged", False)
            engaged_prev = False
            HARDWARE.set_power_save(not should_start)

        if sm.updated['controlsState']:
            engaged = sm['controlsState'].enabled
            if engaged != engaged_prev:
                params.put_bool("IsEngaged", engaged)
                engaged_prev = engaged

            try:
                with open('/dev/kmsg', 'w') as kmsg:
                    kmsg.write(f"<3>[thermald] engaged: {engaged}\n")
            except Exception:
                pass

        if should_start:
            off_ts = None
            if started_ts is None:
                started_ts = sec_since_boot()
                started_seen = True
        else:
            if onroad_conditions["ignition"] and (startup_conditions !=
                                                  startup_conditions_prev):
                cloudlog.event("Startup blocked",
                               startup_conditions=startup_conditions,
                               onroad_conditions=onroad_conditions)

            started_ts = None
            if off_ts is None:
                off_ts = sec_since_boot()

        # Offroad power monitoring
        power_monitor.calculate(peripheralState, onroad_conditions["ignition"])
        msg.deviceState.offroadPowerUsageUwh = power_monitor.get_power_used()
        msg.deviceState.carBatteryCapacityUwh = max(
            0, power_monitor.get_car_battery_capacity())
        current_power_draw = HARDWARE.get_current_power_draw()
        statlog.sample("power_draw", current_power_draw)
        msg.deviceState.powerDrawW = current_power_draw

        som_power_draw = HARDWARE.get_som_power_draw()
        statlog.sample("som_power_draw", som_power_draw)
        msg.deviceState.somPowerDrawW = som_power_draw

        # Check if we need to disable charging (handled by boardd)
        msg.deviceState.chargingDisabled = power_monitor.should_disable_charging(
            onroad_conditions["ignition"], in_car, off_ts)

        # Check if we need to shut down
        if power_monitor.should_shutdown(peripheralState,
                                         onroad_conditions["ignition"], in_car,
                                         off_ts, started_seen):
            cloudlog.warning(f"shutting device down, offroad since {off_ts}")
            params.put_bool("DoShutdown", True)

        msg.deviceState.chargingError = current_filter.x > 0. and msg.deviceState.batteryPercent < 90  # if current is positive, then battery is being discharged
        msg.deviceState.started = started_ts is not None
        msg.deviceState.startedMonoTime = int(1e9 * (started_ts or 0))

        last_ping = params.get("LastAthenaPingTime")
        if last_ping is not None:
            msg.deviceState.lastAthenaPingTime = int(last_ping)

        msg.deviceState.thermalStatus = thermal_status
        pm.send("deviceState", msg)

        should_start_prev = should_start
        startup_conditions_prev = startup_conditions.copy()

        # Log to statsd
        statlog.gauge("free_space_percent", msg.deviceState.freeSpacePercent)
        statlog.gauge("gpu_usage_percent", msg.deviceState.gpuUsagePercent)
        statlog.gauge("memory_usage_percent",
                      msg.deviceState.memoryUsagePercent)
        for i, usage in enumerate(msg.deviceState.cpuUsagePercent):
            statlog.gauge(f"cpu{i}_usage_percent", usage)
        for i, temp in enumerate(msg.deviceState.cpuTempC):
            statlog.gauge(f"cpu{i}_temperature", temp)
        for i, temp in enumerate(msg.deviceState.gpuTempC):
            statlog.gauge(f"gpu{i}_temperature", temp)
        statlog.gauge("memory_temperature", msg.deviceState.memoryTempC)
        statlog.gauge("ambient_temperature", msg.deviceState.ambientTempC)
        for i, temp in enumerate(msg.deviceState.pmicTempC):
            statlog.gauge(f"pmic{i}_temperature", temp)
        for i, temp in enumerate(last_hw_state.nvme_temps):
            statlog.gauge(f"nvme_temperature{i}", temp)
        for i, temp in enumerate(last_hw_state.modem_temps):
            statlog.gauge(f"modem_temperature{i}", temp)
        statlog.gauge("fan_speed_percent_desired",
                      msg.deviceState.fanSpeedPercentDesired)
        statlog.gauge("screen_brightness_percent",
                      msg.deviceState.screenBrightnessPercent)

        # report to server once every 10 minutes
        if (count % int(600. / DT_TRML)) == 0:
            cloudlog.event(
                "STATUS_PACKET",
                count=count,
                pandaStates=[
                    strip_deprecated_keys(p.to_dict()) for p in pandaStates
                ],
                peripheralState=strip_deprecated_keys(
                    peripheralState.to_dict()),
                location=(strip_deprecated_keys(
                    sm["gpsLocationExternal"].to_dict())
                          if sm.alive["gpsLocationExternal"] else None),
                deviceState=strip_deprecated_keys(msg.to_dict()))

        count += 1
Exemplo n.º 14
0
def hw_state_thread(end_event, hw_queue):
    """Handles non critical hardware state, and sends over queue"""
    count = 0
    registered_count = 0
    prev_hw_state = None

    modem_version = None
    modem_nv = None
    modem_configured = False

    while not end_event.is_set():
        # these are expensive calls. update every 10s
        if (count % int(10. / DT_TRML)) == 0:
            try:
                network_type = HARDWARE.get_network_type()
                modem_temps = HARDWARE.get_modem_temperatures()
                if len(modem_temps) == 0 and prev_hw_state is not None:
                    modem_temps = prev_hw_state.modem_temps

                # Log modem version once
                if AGNOS and ((modem_version is None) or (modem_nv is None)):
                    modem_version = HARDWARE.get_modem_version()  # pylint: disable=assignment-from-none
                    modem_nv = HARDWARE.get_modem_nv()  # pylint: disable=assignment-from-none

                    if (modem_version is not None) and (modem_nv is not None):
                        cloudlog.event("modem version",
                                       version=modem_version,
                                       nv=modem_nv)

                hw_state = HardwareState(
                    network_type=network_type,
                    network_metered=HARDWARE.get_network_metered(network_type),
                    network_strength=HARDWARE.get_network_strength(
                        network_type),
                    network_info=HARDWARE.get_network_info(),
                    nvme_temps=HARDWARE.get_nvme_temperatures(),
                    modem_temps=modem_temps,
                )

                try:
                    hw_queue.put_nowait(hw_state)
                except queue.Full:
                    pass

                if AGNOS and (hw_state.network_info
                              is not None) and (hw_state.network_info.get(
                                  'state', None) == "REGISTERED"):
                    registered_count += 1
                else:
                    registered_count = 0

                if registered_count > 10:
                    cloudlog.warning(
                        f"Modem stuck in registered state {hw_state.network_info}. nmcli conn up lte"
                    )
                    os.system("nmcli conn up lte")
                    registered_count = 0

                # TODO: remove this once the config is in AGNOS
                if not modem_configured and len(HARDWARE.get_sim_info().get(
                        'sim_id', '')) > 0:
                    cloudlog.warning("configuring modem")
                    HARDWARE.configure_modem()
                    modem_configured = True

                prev_hw_state = hw_state
            except Exception:
                cloudlog.exception("Error getting hardware state")

        count += 1
        time.sleep(DT_TRML)