From 659670b4947337256cb7d50fb174fcae982892a6 Mon Sep 17 00:00:00 2001 From: Mitchell R Date: Thu, 21 May 2026 10:47:45 +0200 Subject: [PATCH] feat(os-ota): kiosk-side RAUC bundle consumer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3 of the OS OTA pipeline. New module kiosk/src/os_update.rs polls /api/kiosk/os/check with the kiosk's compatibility string and current OS version (read from /etc/betterframe/os-compatibility + /etc/betterframe/os-version, both written by the image build), downloads the bundle, sha256-verifies the transport, and hands off to `rauc install`. RAUC takes it from there: CMS signature verify against /etc/rauc/keyring.pem, copy into inactive A/B slot, arm tryboot via the custom bootloader backend, return. We then post /api/kiosk/os/applied and `systemctl reboot` into the new slot. Wired into the existing 60s heartbeat loop in ui.rs, gated by BF_ENABLE_OS_OTA=1 (default OFF so dev kiosks on non-A/B images don't keep trying + failing). Runs BEFORE the kiosk-binary check on each tick so an OS bundle that ships an updated kiosk binary doesn't race the firmware path. On clean-boot heartbeat success we now also call `rauc status mark-good` so the boot-attempts counter resets — three bad boots in a row will auto-roll back without us needing a separate rollback path. What's NOT in this commit: - A/B partition layout in the pi-gen image (task #6, blocks actual deployment — bundles can be served + accepted but `rauc install` will refuse without two valid slots). - Admin UI for managing releases + rollouts (task #4). --- kiosk/Cargo.toml | 1 + kiosk/src/main.rs | 1 + kiosk/src/os_update.rs | 233 +++++++++++++++++++++++++++++++++++++++++ kiosk/src/ui.rs | 68 +++++++++++- 4 files changed, 301 insertions(+), 2 deletions(-) create mode 100644 kiosk/src/os_update.rs diff --git a/kiosk/Cargo.toml b/kiosk/Cargo.toml index 61cb10d..39389de 100644 --- a/kiosk/Cargo.toml +++ b/kiosk/Cargo.toml @@ -40,6 +40,7 @@ gpiod = "0.3" sha2 = "0.10" ed25519-dalek = { version = "2", features = ["pem"] } base64 = "0.22" +urlencoding = "2" # Local HTTP server on kiosk (LAN GET-only layout switch + admin proxy) axum = "0.7" diff --git a/kiosk/src/main.rs b/kiosk/src/main.rs index 97bba57..c133a2c 100644 --- a/kiosk/src/main.rs +++ b/kiosk/src/main.rs @@ -4,6 +4,7 @@ mod firmware; mod gpio; mod hwmon; mod local_server; +mod os_update; mod pipeline; mod server; mod ui; diff --git a/kiosk/src/os_update.rs b/kiosk/src/os_update.rs new file mode 100644 index 0000000..bd8f7af --- /dev/null +++ b/kiosk/src/os_update.rs @@ -0,0 +1,233 @@ +//! Kiosk-side full-OS OTA via RAUC. +//! +//! Mirrors `firmware.rs` (which handles the kiosk binary) but for the +//! complete OS image. Server endpoints: +//! +//! GET /api/kiosk/os/check?compatibility=¤t= +//! → { up_to_date: true } | { up_to_date: false, update: {...} } +//! GET /api/kiosk/os/download/:release_id +//! → octet-stream .raucb bytes +//! POST /api/kiosk/os/applied { version, error? } +//! +//! Signature verification is RAUC's job — bundles are signed with the +//! X.509 cert pair generated by scripts/gen-rauc-signing-keys.sh, and +//! the corresponding CA cert is baked into the image at +//! /etc/rauc/keyring.pem. We only sha256-check the download here to +//! catch transport corruption before handing off to `rauc install`. +//! +//! Slot switching, atomic copy, and rollback are RAUC's job too — +//! we just shell out to `rauc install`, post the outcome, and tell +//! systemd to reboot. The custom bootloader backend +//! (deploy/rauc/betterframe-rauc-boot.sh) flips Pi 5 tryboot on the +//! next boot. +//! +//! Gated by env `BF_ENABLE_OS_OTA=1`. Default OFF so dev kiosks running +//! a non-A/B layout don't try (and fail) to RAUC-install bundles. +//! +//! Compatibility: read from `/etc/betterframe/os-compatibility` (written +//! at image build time). Falls back to env `BF_RAUC_COMPATIBILITY`, then +//! a hardcoded default matching deploy/rauc/system.conf. + +use std::fs; +use std::path::PathBuf; +use std::process::Command; +use std::time::Duration; + +use serde::Deserialize; +use sha2::{Digest, Sha256}; +use tracing::{info, warn}; + +pub const DEFAULT_COMPATIBILITY: &str = "betterframe-rpi5-aarch64"; + +fn compatibility() -> String { + if let Ok(s) = fs::read_to_string("/etc/betterframe/os-compatibility") { + let trimmed = s.trim(); + if !trimmed.is_empty() { + return trimmed.to_string(); + } + } + std::env::var("BF_RAUC_COMPATIBILITY").unwrap_or_else(|_| DEFAULT_COMPATIBILITY.to_string()) +} + +fn current_os_version() -> String { + if let Ok(s) = fs::read_to_string("/etc/betterframe/os-version") { + let trimmed = s.trim(); + if !trimmed.is_empty() { + return trimmed.to_string(); + } + } + String::new() +} + +#[derive(Debug, Deserialize)] +pub struct CheckResponse { + pub up_to_date: bool, + pub update: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct UpdateInfo { + pub release_id: String, + pub version: String, + #[allow(dead_code)] + pub channel: String, + #[allow(dead_code)] + pub compatibility: String, + pub sha256: String, + pub size_bytes: u64, + #[allow(dead_code)] + pub bundle_format: Option, + pub download_url: String, +} + +/// Hit `/api/kiosk/os/check`. Returns `Some(UpdateInfo)` when an upgrade is +/// available. `None` on up-to-date, network failure, or parse error. +pub fn check(server: &str, key: &str) -> Option { + let compat = compatibility(); + let cur = current_os_version(); + let url = format!( + "{server}/api/kiosk/os/check?compatibility={compat}¤t={cur}", + compat = urlencoding::encode(&compat), + cur = urlencoding::encode(&cur), + ); + let client = reqwest::blocking::Client::new(); + let resp = match client + .get(&url) + .header("Authorization", format!("Bearer {key}")) + .timeout(Duration::from_secs(10)) + .send() + { + Ok(r) => r, + Err(err) => { + warn!("os-update check: request failed: {err}"); + return None; + } + }; + if !resp.status().is_success() { + warn!("os-update check: HTTP {}", resp.status()); + return None; + } + match resp.json::() { + Ok(c) if !c.up_to_date => c.update, + Ok(_) => None, + Err(err) => { + warn!("os-update check: parse failed: {err}"); + None + } + } +} + +/// Download → sha256 verify → `rauc install` → post outcome → reboot. +/// +/// On success: reboots the system (does not return). On failure: posts the +/// error to /api/kiosk/os/applied and returns Err so the caller logs it. +pub fn apply(server: &str, key: &str, info: &UpdateInfo) -> Result<(), String> { + info!( + "os-update: applying {} ({} bytes, release {})", + info.version, info.size_bytes, info.release_id + ); + + // 1. Download + let url = format!("{}{}", server, info.download_url); + let client = reqwest::blocking::Client::new(); + let resp = client + .get(&url) + .header("Authorization", format!("Bearer {key}")) + .timeout(Duration::from_secs(600)) // OS bundles run hundreds of MB + .send() + .map_err(|e| format!("download request: {e}"))?; + if !resp.status().is_success() { + return Err(format!("download HTTP {}", resp.status())); + } + let bytes = resp.bytes().map_err(|e| format!("download body: {e}"))?; + if bytes.len() as u64 != info.size_bytes { + return Err(format!( + "size mismatch: expected {}, got {}", + info.size_bytes, + bytes.len() + )); + } + + // 2. sha256 (catch transport corruption; RAUC will re-verify the CMS + // signature separately when it opens the bundle). + let mut hasher = Sha256::new(); + hasher.update(&bytes); + let digest = hasher.finalize(); + let got_sha = hex_lower(&digest); + if got_sha != info.sha256 { + return Err(format!( + "sha256 mismatch: expected {}, got {}", + info.sha256, got_sha + )); + } + + // 3. Stage on disk for `rauc install` (it expects a file path, not a fd). + // /var/tmp survives /tmp's potential tmpfs size cap; bundles can be big. + let staging_dir = PathBuf::from("/var/tmp/betterframe"); + fs::create_dir_all(&staging_dir).map_err(|e| format!("mkdir staging: {e}"))?; + let bundle_path = staging_dir.join(format!("os-{}.raucb", info.release_id)); + fs::write(&bundle_path, &bytes).map_err(|e| format!("write bundle: {e}"))?; + + // 4. Hand off to rauc. `rauc install` blocks until the bundle is fully + // copied into the inactive slot and bootloader is flipped. Exit code 0 + // = success; anything else = leave current slot booted, no reboot. + let status = Command::new("rauc") + .args(["install", bundle_path.to_str().unwrap_or("")]) + .status() + .map_err(|e| { + let _ = report_applied(server, key, &info.version, Some(&format!("rauc spawn: {e}"))); + format!("rauc spawn: {e}") + })?; + let _ = fs::remove_file(&bundle_path); + if !status.success() { + let msg = format!("rauc install exit {status:?}"); + let _ = report_applied(server, key, &info.version, Some(&msg)); + return Err(msg); + } + + // 5. Report success BEFORE reboot. After this we lose the server + // connection mid-call; that's fine, server sets last_attempt_at from + // the next heartbeat anyway, but recording success now means the + // admin UI shows progress immediately. + let _ = report_applied(server, key, &info.version, None); + + info!("os-update: rauc install OK → rebooting into the new slot"); + // RAUC's custom bootloader backend has already armed tryboot for the + // freshly-written slot. Reboot picks it up. On failure to reach the + // new slot, tryboot rolls back automatically on the next power cycle. + match Command::new("systemctl").arg("reboot").status() { + Ok(_) => { + // systemctl reboot returns before the reboot completes; sleep + // briefly so we don't race main() into a re-entry. + std::thread::sleep(Duration::from_secs(30)); + std::process::exit(0); + } + Err(e) => Err(format!("systemctl reboot: {e}")), + } +} + +fn report_applied(server: &str, key: &str, version: &str, error: Option<&str>) -> Result<(), String> { + let payload = if let Some(err) = error { + serde_json::json!({ "version": version, "error": err }) + } else { + serde_json::json!({ "version": version }) + }; + reqwest::blocking::Client::new() + .post(format!("{server}/api/kiosk/os/applied")) + .header("Authorization", format!("Bearer {key}")) + .json(&payload) + .timeout(Duration::from_secs(5)) + .send() + .map(|_| ()) + .map_err(|e| format!("report applied: {e}")) +} + +fn hex_lower(bytes: &[u8]) -> String { + const HEX: &[u8; 16] = b"0123456789abcdef"; + let mut s = String::with_capacity(bytes.len() * 2); + for b in bytes { + s.push(HEX[(b >> 4) as usize] as char); + s.push(HEX[(b & 0x0f) as usize] as char); + } + s +} diff --git a/kiosk/src/ui.rs b/kiosk/src/ui.rs index ffa4dfc..4f0929f 100644 --- a/kiosk/src/ui.rs +++ b/kiosk/src/ui.rs @@ -267,18 +267,26 @@ fn activate(app: &Application) { }); // Heartbeat loop — reports display geometry + hwmon, also checks for - // firmware updates so kiosks pick up new builds without admin push. + // firmware + OS bundle updates so kiosks pick up new builds without + // admin push. let mut first_iter = true; loop { let heartbeat_ok = send_heartbeat_now(&server, &key); if first_iter && heartbeat_ok { // Successfully heart-beat at least once → consider this boot a // healthy one. Clears the rollback-pending marker so the next - // start doesn't try to roll back a healthy install. + // start doesn't try to roll back a healthy install, AND tells + // RAUC the current slot is good so its boot-attempts counter + // resets (otherwise three bad boots auto-roll back). firmware::mark_firmware_applied(); mark_kiosk_healthy(); + mark_rauc_slot_good(); first_iter = false; } + // OS bundle first — if it succeeds it reboots and we never reach + // the firmware check below this iteration. Order matters: an OS + // bundle update can ship an app-binary change anyway. + maybe_apply_os_update(&server, &key); maybe_apply_firmware_update(&server, &key); std::thread::sleep(std::time::Duration::from_secs(60)); } @@ -442,6 +450,62 @@ fn mark_kiosk_healthy() { } } +/// Tell RAUC the current slot is good so its boot-attempts counter doesn't +/// fire a rollback after a clean boot. No-op when RAUC isn't installed +/// (dev / non-A/B kiosks). RAUC's `mark-good` reads the running slot from +/// /proc/device-tree/chosen/bootloader/partition via our custom bootloader +/// backend — we just shell out and ignore non-zero exit (e.g. running +/// kiosk on a non-RAUC image). +fn mark_rauc_slot_good() { + use std::process::Command; + let _ = Command::new("rauc") + .args(["status", "mark-good"]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status(); +} + +/// Ask the server whether a full-OS RAUC bundle is available for this +/// kiosk. On hit, download + sha256 + `rauc install` + reboot. On miss or +/// error: log + keep running. Gated by BF_ENABLE_OS_OTA=1 (default OFF +/// for dev kiosks running a non-A/B image). +fn maybe_apply_os_update(server_url: &str, kiosk_key: &str) { + if std::env::var("BF_ENABLE_OS_OTA").as_deref() != Ok("1") { + return; + } + let Some(info) = os_update::check(server_url, kiosk_key) else { + return; + }; + info!("os-update: bundle {} available", info.version); + server::report_kiosk_log( + server_url, + kiosk_key, + "info", + "os update available", + serde_json::json!({ + "target_version": &info.version, + "channel": &info.channel, + "release_id": &info.release_id, + "size_bytes": info.size_bytes, + }), + ); + if let Err(err) = os_update::apply(server_url, kiosk_key, &info) { + warn!("os-update: apply failed: {err}"); + server::report_kiosk_log( + server_url, + kiosk_key, + "error", + "os update failed", + serde_json::json!({ + "target_version": &info.version, + "release_id": &info.release_id, + "error": &err, + }), + ); + } + // Success path doesn't return — apply() reboots the system. +} + /// Ask the server whether an update is available. On hit, download + verify /// + swap + report + exit (systemd brings up the new binary). On miss or /// error: log + keep running. Designed to be safe to call from any thread.