diff --git a/deploy/scripts/setup-pi-kiosk.sh b/deploy/scripts/setup-pi-kiosk.sh index 7e02a4c..a8a3e88 100755 --- a/deploy/scripts/setup-pi-kiosk.sh +++ b/deploy/scripts/setup-pi-kiosk.sh @@ -229,10 +229,12 @@ if [ "${INSTALL_KIOSK}" = "1" ]; then printf 'BetterFrame Kiosk\n\n' > /etc/issue rm -f /etc/update-motd.d/10-uname /etc/update-motd.d/* 2>/dev/null || true - echo "==> Installing PAM + systemd unit" + echo "==> Installing PAM + systemd unit + firmware rollback hook" install -m 644 "${REPO_ROOT}/deploy/pam.d/cage" /etc/pam.d/cage install -m 644 "${REPO_ROOT}/deploy/systemd/betterframe-kiosk.service" \ /etc/systemd/system/betterframe-kiosk.service + install -m 755 "${REPO_ROOT}/deploy/systemd/betterframe-firmware-rollback.sh" \ + /usr/local/sbin/betterframe-firmware-rollback.sh if [ ! -e /etc/default/betterframe-kiosk ]; then cat > /etc/default/betterframe-kiosk <<'EOF' diff --git a/deploy/systemd/betterframe-firmware-rollback.sh b/deploy/systemd/betterframe-firmware-rollback.sh new file mode 100755 index 0000000..61c154a --- /dev/null +++ b/deploy/systemd/betterframe-firmware-rollback.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Rollback the kiosk binary if a recent OTA update never reached a healthy +# heartbeat. Run as ExecStartPre on the betterframe-kiosk service. +# +# Logic: +# - Marker file at /var/lib/betterframe/kiosk/firmware-applying.json +# written by the kiosk just before swapping in the new binary. +# - Kiosk deletes it after a successful heartbeat post-boot. +# - If we're running and the marker still exists older than 120s, the +# previous start failed before heartbeat → restore .prev, drop the marker. +# +# Idempotent. Silent on the happy path. Logs to journal otherwise. + +set -euo pipefail + +BIN="/opt/betterframe/kiosk/betterframe-kiosk" +PREV="${BIN}.prev" +MARKER="/var/lib/betterframe/kiosk/firmware-applying.json" + +if [ ! -f "$MARKER" ]; then + exit 0 +fi + +# Marker mtime in epoch seconds. +marker_mtime=$(stat -c %Y "$MARKER" 2>/dev/null || stat -f %m "$MARKER" 2>/dev/null || echo 0) +now=$(date +%s) +age=$(( now - marker_mtime )) + +# Marker fresh → previous boot is still in progress, leave it. +if [ "$age" -lt 120 ]; then + exit 0 +fi + +# Stale marker + .prev present → rollback. +if [ -f "$PREV" ]; then + echo "[bf-firmware-rollback] stale apply marker (${age}s old) + .prev exists — rolling back" >&2 + cp -f "$PREV" "$BIN" + chmod +x "$BIN" + rm -f "$MARKER" +else + echo "[bf-firmware-rollback] stale marker but no .prev — clearing marker, manual intervention needed" >&2 + rm -f "$MARKER" +fi diff --git a/deploy/systemd/betterframe-kiosk.service b/deploy/systemd/betterframe-kiosk.service index f5749aa..3eba323 100644 --- a/deploy/systemd/betterframe-kiosk.service +++ b/deploy/systemd/betterframe-kiosk.service @@ -32,6 +32,7 @@ Environment=GST_DEBUG=1 Environment=BETTERFRAME_SERVER=http://localhost # Let the unprivileged kiosk process control the Pi fan PWM sysfs files. ExecStartPre=+/bin/sh -c 'for d in /sys/class/hwmon/hwmon*; do [ -e "$d/pwm1" ] || continue; chgrp bfkiosk "$d/pwm1" "$d/pwm1_enable" 2>/dev/null || true; chmod g+w "$d/pwm1" "$d/pwm1_enable" 2>/dev/null || true; done' +ExecStartPre=+/usr/local/sbin/betterframe-firmware-rollback.sh ExecStart=/usr/bin/cage -s -- /opt/betterframe/kiosk/betterframe-kiosk Restart=always RestartSec=2 diff --git a/kiosk/src/firmware.rs b/kiosk/src/firmware.rs index d8d4219..512e013 100644 --- a/kiosk/src/firmware.rs +++ b/kiosk/src/firmware.rs @@ -157,6 +157,21 @@ pub fn apply(server: &str, key: &str, info: &UpdateInfo) -> Result<(), String> { f.sync_all().ok(); } + // Drop a marker file the systemd ExecStartPre script reads to detect a + // failed first boot of the new binary. We delete it after a clean boot + // (see `mark_firmware_applied()`). If we crash before that, next start + // sees a stale marker → restores .prev. + if let Some(dir) = bin.parent() { + let marker = dir.join("firmware-applying.json"); + let payload = serde_json::json!({ + "version": info.version, + "attempt_at": chrono_now_iso(), + "bin": bin.to_string_lossy(), + "prev": prev_path.to_string_lossy(), + }); + let _ = fs::write(&marker, payload.to_string()); + } + // Save current binary as .prev so an out-of-band rollback can restore it. if bin.exists() { let _ = fs::remove_file(&prev_path); @@ -190,6 +205,28 @@ fn verify_signature(public_key_pem: &str, sha256_hex: &str, sig_b64url: &str) -> .map_err(|e| format!("verify: {e}")) } +/// Clear the in-progress marker. Call after the kiosk has booted cleanly and +/// reported back to the server — proves the new binary survives startup. +pub fn mark_firmware_applied() { + let bin = binary_path(); + if let Some(dir) = bin.parent() { + let marker = dir.join("firmware-applying.json"); + if marker.exists() { + let _ = fs::remove_file(marker); + } + } +} + +fn chrono_now_iso() -> String { + // Sidesteps adding a chrono dep — Unix epoch ms is enough for the + // ExecStartPre rollback check. + let secs = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + format!("{secs}") +} + fn hex_lower(bytes: &[u8]) -> String { const HEX: &[u8; 16] = b"0123456789abcdef"; let mut s = String::with_capacity(bytes.len() * 2); diff --git a/kiosk/src/ui.rs b/kiosk/src/ui.rs index 80a938a..68ab0db 100644 --- a/kiosk/src/ui.rs +++ b/kiosk/src/ui.rs @@ -252,8 +252,16 @@ fn activate(app: &Application) { // Heartbeat loop — reports display geometry + hwmon, also checks for // firmware updates so kiosks pick up new builds without admin push. + let mut first_iter = true; loop { send_heartbeat_now(&server, &key); + if first_iter { + // Successfully heart-beat at least once → consider this boot a + // healthy one. Clears the rollback-pending marker so the next + // start doesn't try to roll back a healthy install. + firmware::mark_firmware_applied(); + first_iter = false; + } maybe_apply_firmware_update(&server, &key); std::thread::sleep(std::time::Duration::from_secs(60)); } diff --git a/server/src/plugins/service-admin-http/routes-firmware.ts b/server/src/plugins/service-admin-http/routes-firmware.ts index 1af8048..4f38fd5 100644 --- a/server/src/plugins/service-admin-http/routes-firmware.ts +++ b/server/src/plugins/service-admin-http/routes-firmware.ts @@ -15,6 +15,7 @@ import { htmlPage, htmlFragment } from "./html-response.js"; import type { AdminDeps } from "./index.js"; import { FirmwarePage, + FirmwareRolloutsPage, KioskFirmwarePanel, } from "../../web-templates/admin-pages.js"; import { getCoordinator } from "../../shared/coordinator-registry.js"; @@ -168,4 +169,68 @@ export function registerFirmwareRoutes(app: H3, deps: AdminDeps): void { const dispatched = getCoordinator().sendToKiosk(id, { type: "firmware_check" }); return { ok: true, dispatched }; }); + + // ---- Rollouts ----------------------------------------------------------- + + app.get("/admin/firmware/rollouts", (event) => { + const user = event.context.user!; + const rollouts = deps.repo.listFirmwareRollouts(); + const releases = deps.repo.listFirmwareReleases(); + const kiosks = deps.repo.listKiosks(); + return htmlPage(FirmwareRolloutsPage({ + user: user.username, + rollouts, + releases, + kiosks, + })); + }); + + app.post("/admin/firmware/rollouts/new", async (event) => { + const body = await readBody>(event); + const releaseId = String(body?.["release_id"] ?? ""); + if (!releaseId) throw createError({ statusCode: 400, statusMessage: "release_id required" }); + const release = deps.repo.getFirmwareRelease(releaseId); + if (!release) throw createError({ statusCode: 404, statusMessage: "release not found" }); + const percentage = clamp(Number(body?.["percentage"] ?? 100), 1, 100); + const targetsRaw = body?.["target_kiosk_ids"]; + const targets: number[] = Array.isArray(targetsRaw) + ? targetsRaw.map((s) => Number(s)).filter((n) => Number.isFinite(n)) + : typeof targetsRaw === "string" && targetsRaw + ? targetsRaw.split(",").map((s) => Number(s.trim())).filter((n) => Number.isFinite(n)) + : []; + const user = event.context.user!; + const rollout = deps.repo.createFirmwareRollout({ + id: randomUUID(), + release_id: releaseId, + target_kiosk_ids: targets, + percentage, + created_by: user.id ?? null, + }); + deps.repo.updateFirmwareRolloutState(rollout.id, "active"); + // Bump every targeted kiosk to check now (best-effort over WS). + const coord = getCoordinator(); + if (targets.length === 0) { + const allKiosks = deps.repo.listKiosks(); + for (const k of allKiosks) coord.sendToKiosk(k.id, { type: "firmware_check" }); + } else { + for (const id of targets) coord.sendToKiosk(id, { type: "firmware_check" }); + } + return new Response(null, { status: 302, headers: { location: "/admin/firmware/rollouts" } }); + }); + + app.post("/admin/firmware/rollouts/:id/state", async (event) => { + const id = String(getRouterParam(event, "id")); + const body = await readBody<{ state: string }>(event); + const state = body?.state; + if (state !== "paused" && state !== "active" && state !== "complete") { + throw createError({ statusCode: 400, statusMessage: "invalid state" }); + } + deps.repo.updateFirmwareRolloutState(id, state); + return new Response(null, { status: 302, headers: { location: "/admin/firmware/rollouts" } }); + }); +} + +function clamp(n: number, lo: number, hi: number): number { + if (!Number.isFinite(n)) return lo; + return Math.max(lo, Math.min(hi, Math.floor(n))); } diff --git a/server/src/plugins/service-api-http/index.ts b/server/src/plugins/service-api-http/index.ts index a078219..4f7f5b9 100644 --- a/server/src/plugins/service-api-http/index.ts +++ b/server/src/plugins/service-api-http/index.ts @@ -22,6 +22,7 @@ import { initiatePairing, claimPairing } from "../../shared/pairing.js"; import { generateBundle } from "../../shared/bundle.js"; import { initNoderedBridge, type NoderedBridge } from "../../shared/nodered-bridge.js"; import { initFirmware, type FirmwareApi } from "../../shared/firmware.js"; +import { createHash } from "node:crypto"; import type { Repository } from "../service-store/repository.js"; import type { AuthApi } from "../../shared/auth.js"; import type { SecretsApi } from "../../shared/secrets.js"; @@ -428,10 +429,24 @@ function registerKioskRoutes( const currentVersion = url.searchParams.get("current")?.trim() ?? kiosk.kiosk_app_version ?? ""; let release = null; + // Explicit per-kiosk pin wins over all rollout / channel selection. if (kiosk.firmware_target_version) { release = repo.getFirmwareReleaseByVersionArch(kiosk.firmware_target_version, arch); if (release?.yanked_at) release = null; } + // Active rollouts: most-recent matching, with bucket eligibility. + if (!release) { + const rollouts = repo.listActiveRolloutsForKiosk(kiosk.id); + for (const rollout of rollouts) { + if (!isKioskInRolloutBucket(kiosk.id, rollout.id, rollout.percentage)) continue; + const r = repo.getFirmwareRelease(rollout.release_id); + if (!r || r.yanked_at) continue; + if (r.arch !== arch) continue; + release = r; + break; + } + } + // Channel-latest fallback. if (!release) { const channel = (kiosk.firmware_channel ?? "stable") as FirmwareChannel; release = repo.getLatestFirmwareRelease(channel, arch); @@ -508,3 +523,19 @@ function registerKioskRoutes( return { ok: true }; }); } + +/** + * Deterministic bucket assignment for gradual rollouts. Same (kioskId, + * rolloutId) always lands in the same bucket, so a 50% rollout consistently + * targets the same half of the fleet across re-checks. Switch from 50%→100% + * gracefully adds the previously-excluded half rather than reshuffling. + */ +function isKioskInRolloutBucket(kioskId: number, rolloutId: string, percentage: number): boolean { + if (percentage >= 100) return true; + if (percentage <= 0) return false; + const h = createHash("sha256") + .update(`${rolloutId}:${String(kioskId)}`) + .digest(); + const bucket = h.readUInt32BE(0) % 100; + return bucket < percentage; +} diff --git a/server/src/plugins/service-store/repository.ts b/server/src/plugins/service-store/repository.ts index 615ebf0..2078620 100644 --- a/server/src/plugins/service-store/repository.ts +++ b/server/src/plugins/service-store/repository.ts @@ -1210,6 +1210,20 @@ export class Repository { return r ? rowToFirmwareRollout(r as Record) : null; } + /** + * Active rollouts whose target list either includes this kiosk OR is + * empty (= "all kiosks on the release channel"). Ordered most-recent first + * so a newer rollout supersedes older ones. + */ + listActiveRolloutsForKiosk(kioskId: number): FirmwareRollout[] { + const rs = this.prep( + `SELECT * FROM firmware_rollouts WHERE state = 'active' ORDER BY created_at DESC`, + ).all(); + return rs + .map((r) => rowToFirmwareRollout(r as Record)) + .filter((r) => r.target_kiosk_ids.length === 0 || r.target_kiosk_ids.includes(kioskId)); + } + listFirmwareRollouts(): FirmwareRollout[] { const rs = this.prep( "SELECT * FROM firmware_rollouts ORDER BY created_at DESC", diff --git a/server/src/web-templates/admin-pages.tsx b/server/src/web-templates/admin-pages.tsx index c1077e1..1d0f602 100644 --- a/server/src/web-templates/admin-pages.tsx +++ b/server/src/web-templates/admin-pages.tsx @@ -8,6 +8,7 @@ import type { Display, Entity, FirmwareRelease, + FirmwareRollout, Kiosk, KioskGpioBinding, Label, @@ -2673,10 +2674,11 @@ interface FirmwarePageProps { export function FirmwarePage(props: FirmwarePageProps) { return ( - +

Signed kiosk firmware artifacts. Uploaded binaries are hashed + Ed25519-signed by the server before kiosks can install them. + Rollouts →

@@ -2876,3 +2878,106 @@ export function KioskLocalPanel(props: KioskLocalPanelProps) {
); } + +// ---- Firmware rollouts ----------------------------------------------------- + +interface FirmwareRolloutsPageProps { + user: string; + rollouts: FirmwareRollout[]; + releases: FirmwareRelease[]; + kiosks: Kiosk[]; +} + +export function FirmwareRolloutsPage(props: FirmwareRolloutsPageProps) { + const releaseById = new Map(props.releases.map((r) => [r.id, r])); + const kioskById = new Map(props.kiosks.map((k) => [k.id, k])); + return ( + +

+ Push a specific release to a slice of the fleet. percentage + buckets kiosks deterministically by id, so re-running a 50% rollout + with the same targets touches the same half. +

+ +
+

New rollout

+
+
+ + +
+
+ + +
+
+ + +
Cmd/Ctrl-click to multi-select. Or post a comma-separated id list via API.
+
+ +
+
+ +
+ + + + + + + + + + + + + {props.rollouts.length === 0 ? ( + + ) : ( + props.rollouts.map((r) => { + const rel = releaseById.get(r.release_id); + const targetCount = r.target_kiosk_ids.length; + const targetSummary = targetCount === 0 + ? "(all on channel)" + : r.target_kiosk_ids.slice(0, 3).map((id) => kioskById.get(id)?.name ?? `#${String(id)}`).join(", ") + + (targetCount > 3 ? ` +${String(targetCount - 3)} more` : ""); + return ( + + + + + + + + + ); + }) + )} + +
ReleaseState%TargetsCreated
No rollouts yet.
{rel?.version ?? r.release_id}{rel && ({rel.channel}/{rel.arch})}{r.state}{String(r.percentage)}%{targetSummary}{formatTime(r.created_at)} +
+ + +
+
+ + +
+
+
+
+ ); +}