From e55c15ccb8128f97ba757fd80746a5e1af699531 Mon Sep 17 00:00:00 2001 From: fkwp Date: Mon, 18 May 2026 18:37:27 +0200 Subject: [PATCH] strip URL fields from $set / $set_once --- src/analytics/PosthogAnalytics.test.ts | 59 +++++++++++++++++++++ src/analytics/PosthogAnalytics.ts | 71 +++++++++++++++++--------- 2 files changed, 106 insertions(+), 24 deletions(-) diff --git a/src/analytics/PosthogAnalytics.test.ts b/src/analytics/PosthogAnalytics.test.ts index 8196fa4dd..b5052840c 100644 --- a/src/analytics/PosthogAnalytics.test.ts +++ b/src/analytics/PosthogAnalytics.test.ts @@ -138,6 +138,65 @@ describe("PosthogAnalytics", () => { it("passes null events through unchanged", () => { expect(applyPrivacyFilters(null, Anonymity.Pseudonymous)).toBeNull(); }); + + it("strips URL fields nested inside $set_once", () => { + const secretUrl = + "https://call.example.com/room/#/?password=hunter2&roomId=abc"; + const out = applyPrivacyFilters( + makeEvent({ + $current_url: "https://call.example.com/x", + $set_once: { + $current_url: secretUrl, + $initial_current_url: secretUrl, + $session_entry_url: secretUrl, + $initial_person_info: { r: "x", u: secretUrl }, + }, + }), + Anonymity.Pseudonymous, + ); + + const setOnce = out?.properties["$set_once"] as Record; + expect(setOnce["$current_url"]).not.toContain("password"); + expect(setOnce["$initial_current_url"]).not.toContain("password"); + expect(setOnce).not.toHaveProperty("$session_entry_url"); + expect(setOnce).not.toHaveProperty("$initial_person_info"); + }); + + it("strips URL fields nested inside $set", () => { + const secretUrl = + "https://call.example.com/room/#/?password=hunter2&roomId=abc"; + const out = applyPrivacyFilters( + makeEvent({ + $current_url: "https://call.example.com/x", + $set: { + $current_url: secretUrl, + $session_entry_url: secretUrl, + }, + }), + Anonymity.Pseudonymous, + ); + + const set = out?.properties["$set"] as Record; + expect(set["$current_url"]).not.toContain("password"); + expect(set).not.toHaveProperty("$session_entry_url"); + }); + + it("nulls referrer fields inside $set_once when anonymous", () => { + const out = applyPrivacyFilters( + makeEvent({ + $current_url: "https://x/y", + $set_once: { + $initial_referrer: "https://leaky", + $initial_referring_domain: "leaky", + }, + }), + Anonymity.Anonymous, + ); + + const setOnce = out?.properties["$set_once"] as Record; + expect(setOnce["$initial_referrer"]).toBeNull(); + expect(setOnce["$initial_referring_domain"]).toBeNull(); + }); }); // Verifies that applyPrivacyFilters is actually wired into posthog.init via diff --git a/src/analytics/PosthogAnalytics.ts b/src/analytics/PosthogAnalytics.ts index 08b275053..ea234a0a5 100644 --- a/src/analytics/PosthogAnalytics.ts +++ b/src/analytics/PosthogAnalytics.ts @@ -66,9 +66,47 @@ export enum RegistrationType { Registered, } +// Sanitize URL / referrer / device fields on a single posthog properties bag. +// Applied to event.properties and to the person-profile bags ($set / $set_once), +// since posthog mirrors the same URL fields into those. +function stripPrivacyFields( + obj: Properties | undefined, + anonymity: Anonymity, +): void { + if (!obj) return; + + if (anonymity === Anonymity.Anonymous) { + // drop referrer information for anonymous users + obj["$referrer"] = null; + obj["$referring_domain"] = null; + obj["$initial_referrer"] = null; + obj["$initial_referring_domain"] = null; + + // drop device ID, which is a UUID persisted in local storage + obj["$device_id"] = null; + } + + // the url leaks a lot of private data like the call name or the user + // (room password / room ID can land in the hash/query). Strip down to + // scheme + host so we still get host-level insights (develop / main / sfu). + for (const key of ["$current_url", "$initial_current_url"]) { + if (typeof obj[key] === "string") { + obj[key] = (obj[key] as string).split("/").slice(0, 3).join(""); + } + } + + // $session_entry_url carries the full untrimmed URL; $initial_person_info + // bundles initial referrer + URL into a nested object that bypasses the + // per-key strips above. Drop both. + delete obj["$session_entry_url"]; + delete obj["$initial_person_info"]; +} + /** * Strip PII from posthog's built-in properties (URL, referrer fields, - * device ID, $initial_person_info) before events leave the client. + * device ID, $initial_person_info, $session_entry_url) before events leave + * the client. Also applied to the person-profile bags ($set / $set_once), + * which mirror the same URL fields. * See src/utils/event-utils.ts in posthog-js (getEventProperties, getPersonInfo) * for the list of properties posthog sets automatically. */ @@ -77,30 +115,15 @@ export function applyPrivacyFilters( anonymity: Anonymity, ): CaptureResult | null { if (event === null) return null; - const properties = event.properties; - if (anonymity === Anonymity.Anonymous) { - // drop referrer information for anonymous users - properties["$referrer"] = null; - properties["$referring_domain"] = null; - properties["$initial_referrer"] = null; - properties["$initial_referring_domain"] = null; - - // drop device ID, which is a UUID persisted in local storage - properties["$device_id"] = null; - } - - // the url leaks a lot of private data like the call name or the user. - // Its stripped down to the bare minimum to only give insights about the host (develop, main or sfu) - if (typeof properties["$current_url"] === "string") { - properties["$current_url"] = properties["$current_url"] - .split("/") - .slice(0, 3) - .join(""); - } - - // drop $initial_person_info for increased privacy. - delete properties["$initial_person_info"]; + stripPrivacyFields(event.properties, anonymity); + // posthog can stash person-profile updates either at the top level + // of CaptureResult or nested inside properties depending on the pipeline + // stage; clean both spots so nothing slips through. + stripPrivacyFields(event.$set, anonymity); + stripPrivacyFields(event.$set_once, anonymity); + stripPrivacyFields(event.properties["$set"], anonymity); + stripPrivacyFields(event.properties["$set_once"], anonymity); return event; }