mirror of
https://github.com/BetterCorp/BetterFrame.git
synced 2026-05-26 20:16:35 +00:00
feat(os-ota): kiosk-side RAUC bundle consumer
Phase 3 of the OS OTA pipeline. New module kiosk/src/os_update.rs polls /api/kiosk/os/check with the kiosk's compatibility string and current OS version (read from /etc/betterframe/os-compatibility + /etc/betterframe/os-version, both written by the image build), downloads the bundle, sha256-verifies the transport, and hands off to `rauc install`. RAUC takes it from there: CMS signature verify against /etc/rauc/keyring.pem, copy into inactive A/B slot, arm tryboot via the custom bootloader backend, return. We then post /api/kiosk/os/applied and `systemctl reboot` into the new slot. Wired into the existing 60s heartbeat loop in ui.rs, gated by BF_ENABLE_OS_OTA=1 (default OFF so dev kiosks on non-A/B images don't keep trying + failing). Runs BEFORE the kiosk-binary check on each tick so an OS bundle that ships an updated kiosk binary doesn't race the firmware path. On clean-boot heartbeat success we now also call `rauc status mark-good` so the boot-attempts counter resets — three bad boots in a row will auto-roll back without us needing a separate rollback path. What's NOT in this commit: - A/B partition layout in the pi-gen image (task #6, blocks actual deployment — bundles can be served + accepted but `rauc install` will refuse without two valid slots). - Admin UI for managing releases + rollouts (task #4).
This commit is contained in:
parent
084c119c44
commit
659670b494
4 changed files with 301 additions and 2 deletions
|
|
@ -40,6 +40,7 @@ gpiod = "0.3"
|
|||
sha2 = "0.10"
|
||||
ed25519-dalek = { version = "2", features = ["pem"] }
|
||||
base64 = "0.22"
|
||||
urlencoding = "2"
|
||||
|
||||
# Local HTTP server on kiosk (LAN GET-only layout switch + admin proxy)
|
||||
axum = "0.7"
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ mod firmware;
|
|||
mod gpio;
|
||||
mod hwmon;
|
||||
mod local_server;
|
||||
mod os_update;
|
||||
mod pipeline;
|
||||
mod server;
|
||||
mod ui;
|
||||
|
|
|
|||
233
kiosk/src/os_update.rs
Normal file
233
kiosk/src/os_update.rs
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
//! Kiosk-side full-OS OTA via RAUC.
|
||||
//!
|
||||
//! Mirrors `firmware.rs` (which handles the kiosk binary) but for the
|
||||
//! complete OS image. Server endpoints:
|
||||
//!
|
||||
//! GET /api/kiosk/os/check?compatibility=<X>¤t=<ver>
|
||||
//! → { up_to_date: true } | { up_to_date: false, update: {...} }
|
||||
//! GET /api/kiosk/os/download/:release_id
|
||||
//! → octet-stream .raucb bytes
|
||||
//! POST /api/kiosk/os/applied { version, error? }
|
||||
//!
|
||||
//! Signature verification is RAUC's job — bundles are signed with the
|
||||
//! X.509 cert pair generated by scripts/gen-rauc-signing-keys.sh, and
|
||||
//! the corresponding CA cert is baked into the image at
|
||||
//! /etc/rauc/keyring.pem. We only sha256-check the download here to
|
||||
//! catch transport corruption before handing off to `rauc install`.
|
||||
//!
|
||||
//! Slot switching, atomic copy, and rollback are RAUC's job too —
|
||||
//! we just shell out to `rauc install`, post the outcome, and tell
|
||||
//! systemd to reboot. The custom bootloader backend
|
||||
//! (deploy/rauc/betterframe-rauc-boot.sh) flips Pi 5 tryboot on the
|
||||
//! next boot.
|
||||
//!
|
||||
//! Gated by env `BF_ENABLE_OS_OTA=1`. Default OFF so dev kiosks running
|
||||
//! a non-A/B layout don't try (and fail) to RAUC-install bundles.
|
||||
//!
|
||||
//! Compatibility: read from `/etc/betterframe/os-compatibility` (written
|
||||
//! at image build time). Falls back to env `BF_RAUC_COMPATIBILITY`, then
|
||||
//! a hardcoded default matching deploy/rauc/system.conf.
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
|
||||
use serde::Deserialize;
|
||||
use sha2::{Digest, Sha256};
|
||||
use tracing::{info, warn};
|
||||
|
||||
pub const DEFAULT_COMPATIBILITY: &str = "betterframe-rpi5-aarch64";
|
||||
|
||||
fn compatibility() -> String {
|
||||
if let Ok(s) = fs::read_to_string("/etc/betterframe/os-compatibility") {
|
||||
let trimmed = s.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return trimmed.to_string();
|
||||
}
|
||||
}
|
||||
std::env::var("BF_RAUC_COMPATIBILITY").unwrap_or_else(|_| DEFAULT_COMPATIBILITY.to_string())
|
||||
}
|
||||
|
||||
fn current_os_version() -> String {
|
||||
if let Ok(s) = fs::read_to_string("/etc/betterframe/os-version") {
|
||||
let trimmed = s.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return trimmed.to_string();
|
||||
}
|
||||
}
|
||||
String::new()
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct CheckResponse {
|
||||
pub up_to_date: bool,
|
||||
pub update: Option<UpdateInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct UpdateInfo {
|
||||
pub release_id: String,
|
||||
pub version: String,
|
||||
#[allow(dead_code)]
|
||||
pub channel: String,
|
||||
#[allow(dead_code)]
|
||||
pub compatibility: String,
|
||||
pub sha256: String,
|
||||
pub size_bytes: u64,
|
||||
#[allow(dead_code)]
|
||||
pub bundle_format: Option<String>,
|
||||
pub download_url: String,
|
||||
}
|
||||
|
||||
/// Hit `/api/kiosk/os/check`. Returns `Some(UpdateInfo)` when an upgrade is
|
||||
/// available. `None` on up-to-date, network failure, or parse error.
|
||||
pub fn check(server: &str, key: &str) -> Option<UpdateInfo> {
|
||||
let compat = compatibility();
|
||||
let cur = current_os_version();
|
||||
let url = format!(
|
||||
"{server}/api/kiosk/os/check?compatibility={compat}¤t={cur}",
|
||||
compat = urlencoding::encode(&compat),
|
||||
cur = urlencoding::encode(&cur),
|
||||
);
|
||||
let client = reqwest::blocking::Client::new();
|
||||
let resp = match client
|
||||
.get(&url)
|
||||
.header("Authorization", format!("Bearer {key}"))
|
||||
.timeout(Duration::from_secs(10))
|
||||
.send()
|
||||
{
|
||||
Ok(r) => r,
|
||||
Err(err) => {
|
||||
warn!("os-update check: request failed: {err}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
if !resp.status().is_success() {
|
||||
warn!("os-update check: HTTP {}", resp.status());
|
||||
return None;
|
||||
}
|
||||
match resp.json::<CheckResponse>() {
|
||||
Ok(c) if !c.up_to_date => c.update,
|
||||
Ok(_) => None,
|
||||
Err(err) => {
|
||||
warn!("os-update check: parse failed: {err}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Download → sha256 verify → `rauc install` → post outcome → reboot.
|
||||
///
|
||||
/// On success: reboots the system (does not return). On failure: posts the
|
||||
/// error to /api/kiosk/os/applied and returns Err so the caller logs it.
|
||||
pub fn apply(server: &str, key: &str, info: &UpdateInfo) -> Result<(), String> {
|
||||
info!(
|
||||
"os-update: applying {} ({} bytes, release {})",
|
||||
info.version, info.size_bytes, info.release_id
|
||||
);
|
||||
|
||||
// 1. Download
|
||||
let url = format!("{}{}", server, info.download_url);
|
||||
let client = reqwest::blocking::Client::new();
|
||||
let resp = client
|
||||
.get(&url)
|
||||
.header("Authorization", format!("Bearer {key}"))
|
||||
.timeout(Duration::from_secs(600)) // OS bundles run hundreds of MB
|
||||
.send()
|
||||
.map_err(|e| format!("download request: {e}"))?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(format!("download HTTP {}", resp.status()));
|
||||
}
|
||||
let bytes = resp.bytes().map_err(|e| format!("download body: {e}"))?;
|
||||
if bytes.len() as u64 != info.size_bytes {
|
||||
return Err(format!(
|
||||
"size mismatch: expected {}, got {}",
|
||||
info.size_bytes,
|
||||
bytes.len()
|
||||
));
|
||||
}
|
||||
|
||||
// 2. sha256 (catch transport corruption; RAUC will re-verify the CMS
|
||||
// signature separately when it opens the bundle).
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(&bytes);
|
||||
let digest = hasher.finalize();
|
||||
let got_sha = hex_lower(&digest);
|
||||
if got_sha != info.sha256 {
|
||||
return Err(format!(
|
||||
"sha256 mismatch: expected {}, got {}",
|
||||
info.sha256, got_sha
|
||||
));
|
||||
}
|
||||
|
||||
// 3. Stage on disk for `rauc install` (it expects a file path, not a fd).
|
||||
// /var/tmp survives /tmp's potential tmpfs size cap; bundles can be big.
|
||||
let staging_dir = PathBuf::from("/var/tmp/betterframe");
|
||||
fs::create_dir_all(&staging_dir).map_err(|e| format!("mkdir staging: {e}"))?;
|
||||
let bundle_path = staging_dir.join(format!("os-{}.raucb", info.release_id));
|
||||
fs::write(&bundle_path, &bytes).map_err(|e| format!("write bundle: {e}"))?;
|
||||
|
||||
// 4. Hand off to rauc. `rauc install` blocks until the bundle is fully
|
||||
// copied into the inactive slot and bootloader is flipped. Exit code 0
|
||||
// = success; anything else = leave current slot booted, no reboot.
|
||||
let status = Command::new("rauc")
|
||||
.args(["install", bundle_path.to_str().unwrap_or("")])
|
||||
.status()
|
||||
.map_err(|e| {
|
||||
let _ = report_applied(server, key, &info.version, Some(&format!("rauc spawn: {e}")));
|
||||
format!("rauc spawn: {e}")
|
||||
})?;
|
||||
let _ = fs::remove_file(&bundle_path);
|
||||
if !status.success() {
|
||||
let msg = format!("rauc install exit {status:?}");
|
||||
let _ = report_applied(server, key, &info.version, Some(&msg));
|
||||
return Err(msg);
|
||||
}
|
||||
|
||||
// 5. Report success BEFORE reboot. After this we lose the server
|
||||
// connection mid-call; that's fine, server sets last_attempt_at from
|
||||
// the next heartbeat anyway, but recording success now means the
|
||||
// admin UI shows progress immediately.
|
||||
let _ = report_applied(server, key, &info.version, None);
|
||||
|
||||
info!("os-update: rauc install OK → rebooting into the new slot");
|
||||
// RAUC's custom bootloader backend has already armed tryboot for the
|
||||
// freshly-written slot. Reboot picks it up. On failure to reach the
|
||||
// new slot, tryboot rolls back automatically on the next power cycle.
|
||||
match Command::new("systemctl").arg("reboot").status() {
|
||||
Ok(_) => {
|
||||
// systemctl reboot returns before the reboot completes; sleep
|
||||
// briefly so we don't race main() into a re-entry.
|
||||
std::thread::sleep(Duration::from_secs(30));
|
||||
std::process::exit(0);
|
||||
}
|
||||
Err(e) => Err(format!("systemctl reboot: {e}")),
|
||||
}
|
||||
}
|
||||
|
||||
fn report_applied(server: &str, key: &str, version: &str, error: Option<&str>) -> Result<(), String> {
|
||||
let payload = if let Some(err) = error {
|
||||
serde_json::json!({ "version": version, "error": err })
|
||||
} else {
|
||||
serde_json::json!({ "version": version })
|
||||
};
|
||||
reqwest::blocking::Client::new()
|
||||
.post(format!("{server}/api/kiosk/os/applied"))
|
||||
.header("Authorization", format!("Bearer {key}"))
|
||||
.json(&payload)
|
||||
.timeout(Duration::from_secs(5))
|
||||
.send()
|
||||
.map(|_| ())
|
||||
.map_err(|e| format!("report applied: {e}"))
|
||||
}
|
||||
|
||||
fn hex_lower(bytes: &[u8]) -> String {
|
||||
const HEX: &[u8; 16] = b"0123456789abcdef";
|
||||
let mut s = String::with_capacity(bytes.len() * 2);
|
||||
for b in bytes {
|
||||
s.push(HEX[(b >> 4) as usize] as char);
|
||||
s.push(HEX[(b & 0x0f) as usize] as char);
|
||||
}
|
||||
s
|
||||
}
|
||||
|
|
@ -267,18 +267,26 @@ fn activate(app: &Application) {
|
|||
});
|
||||
|
||||
// Heartbeat loop — reports display geometry + hwmon, also checks for
|
||||
// firmware updates so kiosks pick up new builds without admin push.
|
||||
// firmware + OS bundle updates so kiosks pick up new builds without
|
||||
// admin push.
|
||||
let mut first_iter = true;
|
||||
loop {
|
||||
let heartbeat_ok = send_heartbeat_now(&server, &key);
|
||||
if first_iter && heartbeat_ok {
|
||||
// Successfully heart-beat at least once → consider this boot a
|
||||
// healthy one. Clears the rollback-pending marker so the next
|
||||
// start doesn't try to roll back a healthy install.
|
||||
// start doesn't try to roll back a healthy install, AND tells
|
||||
// RAUC the current slot is good so its boot-attempts counter
|
||||
// resets (otherwise three bad boots auto-roll back).
|
||||
firmware::mark_firmware_applied();
|
||||
mark_kiosk_healthy();
|
||||
mark_rauc_slot_good();
|
||||
first_iter = false;
|
||||
}
|
||||
// OS bundle first — if it succeeds it reboots and we never reach
|
||||
// the firmware check below this iteration. Order matters: an OS
|
||||
// bundle update can ship an app-binary change anyway.
|
||||
maybe_apply_os_update(&server, &key);
|
||||
maybe_apply_firmware_update(&server, &key);
|
||||
std::thread::sleep(std::time::Duration::from_secs(60));
|
||||
}
|
||||
|
|
@ -442,6 +450,62 @@ fn mark_kiosk_healthy() {
|
|||
}
|
||||
}
|
||||
|
||||
/// Tell RAUC the current slot is good so its boot-attempts counter doesn't
|
||||
/// fire a rollback after a clean boot. No-op when RAUC isn't installed
|
||||
/// (dev / non-A/B kiosks). RAUC's `mark-good` reads the running slot from
|
||||
/// /proc/device-tree/chosen/bootloader/partition via our custom bootloader
|
||||
/// backend — we just shell out and ignore non-zero exit (e.g. running
|
||||
/// kiosk on a non-RAUC image).
|
||||
fn mark_rauc_slot_good() {
|
||||
use std::process::Command;
|
||||
let _ = Command::new("rauc")
|
||||
.args(["status", "mark-good"])
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.status();
|
||||
}
|
||||
|
||||
/// Ask the server whether a full-OS RAUC bundle is available for this
|
||||
/// kiosk. On hit, download + sha256 + `rauc install` + reboot. On miss or
|
||||
/// error: log + keep running. Gated by BF_ENABLE_OS_OTA=1 (default OFF
|
||||
/// for dev kiosks running a non-A/B image).
|
||||
fn maybe_apply_os_update(server_url: &str, kiosk_key: &str) {
|
||||
if std::env::var("BF_ENABLE_OS_OTA").as_deref() != Ok("1") {
|
||||
return;
|
||||
}
|
||||
let Some(info) = os_update::check(server_url, kiosk_key) else {
|
||||
return;
|
||||
};
|
||||
info!("os-update: bundle {} available", info.version);
|
||||
server::report_kiosk_log(
|
||||
server_url,
|
||||
kiosk_key,
|
||||
"info",
|
||||
"os update available",
|
||||
serde_json::json!({
|
||||
"target_version": &info.version,
|
||||
"channel": &info.channel,
|
||||
"release_id": &info.release_id,
|
||||
"size_bytes": info.size_bytes,
|
||||
}),
|
||||
);
|
||||
if let Err(err) = os_update::apply(server_url, kiosk_key, &info) {
|
||||
warn!("os-update: apply failed: {err}");
|
||||
server::report_kiosk_log(
|
||||
server_url,
|
||||
kiosk_key,
|
||||
"error",
|
||||
"os update failed",
|
||||
serde_json::json!({
|
||||
"target_version": &info.version,
|
||||
"release_id": &info.release_id,
|
||||
"error": &err,
|
||||
}),
|
||||
);
|
||||
}
|
||||
// Success path doesn't return — apply() reboots the system.
|
||||
}
|
||||
|
||||
/// Ask the server whether an update is available. On hit, download + verify
|
||||
/// + swap + report + exit (systemd brings up the new binary). On miss or
|
||||
/// error: log + keep running. Designed to be safe to call from any thread.
|
||||
|
|
|
|||
Loading…
Reference in a new issue