strip URL fields from $set / $set_once

This commit is contained in:
fkwp
2026-05-18 18:37:27 +02:00
parent a08a561c4c
commit e55c15ccb8
2 changed files with 106 additions and 24 deletions

View File

@@ -138,6 +138,65 @@ describe("PosthogAnalytics", () => {
it("passes null events through unchanged", () => {
expect(applyPrivacyFilters(null, Anonymity.Pseudonymous)).toBeNull();
});
it("strips URL fields nested inside $set_once", () => {
const secretUrl =
"https://call.example.com/room/#/?password=hunter2&roomId=abc";
const out = applyPrivacyFilters(
makeEvent({
$current_url: "https://call.example.com/x",
$set_once: {
$current_url: secretUrl,
$initial_current_url: secretUrl,
$session_entry_url: secretUrl,
$initial_person_info: { r: "x", u: secretUrl },
},
}),
Anonymity.Pseudonymous,
);
const setOnce = out?.properties["$set_once"] as Record<string, unknown>;
expect(setOnce["$current_url"]).not.toContain("password");
expect(setOnce["$initial_current_url"]).not.toContain("password");
expect(setOnce).not.toHaveProperty("$session_entry_url");
expect(setOnce).not.toHaveProperty("$initial_person_info");
});
it("strips URL fields nested inside $set", () => {
const secretUrl =
"https://call.example.com/room/#/?password=hunter2&roomId=abc";
const out = applyPrivacyFilters(
makeEvent({
$current_url: "https://call.example.com/x",
$set: {
$current_url: secretUrl,
$session_entry_url: secretUrl,
},
}),
Anonymity.Pseudonymous,
);
const set = out?.properties["$set"] as Record<string, unknown>;
expect(set["$current_url"]).not.toContain("password");
expect(set).not.toHaveProperty("$session_entry_url");
});
it("nulls referrer fields inside $set_once when anonymous", () => {
const out = applyPrivacyFilters(
makeEvent({
$current_url: "https://x/y",
$set_once: {
$initial_referrer: "https://leaky",
$initial_referring_domain: "leaky",
},
}),
Anonymity.Anonymous,
);
const setOnce = out?.properties["$set_once"] as Record<string, unknown>;
expect(setOnce["$initial_referrer"]).toBeNull();
expect(setOnce["$initial_referring_domain"]).toBeNull();
});
});
// Verifies that applyPrivacyFilters is actually wired into posthog.init via

View File

@@ -66,9 +66,47 @@ export enum RegistrationType {
Registered,
}
// Sanitize URL / referrer / device fields on a single posthog properties bag.
// Applied to event.properties and to the person-profile bags ($set / $set_once),
// since posthog mirrors the same URL fields into those.
function stripPrivacyFields(
obj: Properties | undefined,
anonymity: Anonymity,
): void {
if (!obj) return;
if (anonymity === Anonymity.Anonymous) {
// drop referrer information for anonymous users
obj["$referrer"] = null;
obj["$referring_domain"] = null;
obj["$initial_referrer"] = null;
obj["$initial_referring_domain"] = null;
// drop device ID, which is a UUID persisted in local storage
obj["$device_id"] = null;
}
// the url leaks a lot of private data like the call name or the user
// (room password / room ID can land in the hash/query). Strip down to
// scheme + host so we still get host-level insights (develop / main / sfu).
for (const key of ["$current_url", "$initial_current_url"]) {
if (typeof obj[key] === "string") {
obj[key] = (obj[key] as string).split("/").slice(0, 3).join("");
}
}
// $session_entry_url carries the full untrimmed URL; $initial_person_info
// bundles initial referrer + URL into a nested object that bypasses the
// per-key strips above. Drop both.
delete obj["$session_entry_url"];
delete obj["$initial_person_info"];
}
/**
* Strip PII from posthog's built-in properties (URL, referrer fields,
* device ID, $initial_person_info) before events leave the client.
* device ID, $initial_person_info, $session_entry_url) before events leave
* the client. Also applied to the person-profile bags ($set / $set_once),
* which mirror the same URL fields.
* See src/utils/event-utils.ts in posthog-js (getEventProperties, getPersonInfo)
* for the list of properties posthog sets automatically.
*/
@@ -77,30 +115,15 @@ export function applyPrivacyFilters(
anonymity: Anonymity,
): CaptureResult | null {
if (event === null) return null;
const properties = event.properties;
if (anonymity === Anonymity.Anonymous) {
// drop referrer information for anonymous users
properties["$referrer"] = null;
properties["$referring_domain"] = null;
properties["$initial_referrer"] = null;
properties["$initial_referring_domain"] = null;
// drop device ID, which is a UUID persisted in local storage
properties["$device_id"] = null;
}
// the url leaks a lot of private data like the call name or the user.
// Its stripped down to the bare minimum to only give insights about the host (develop, main or sfu)
if (typeof properties["$current_url"] === "string") {
properties["$current_url"] = properties["$current_url"]
.split("/")
.slice(0, 3)
.join("");
}
// drop $initial_person_info for increased privacy.
delete properties["$initial_person_info"];
stripPrivacyFields(event.properties, anonymity);
// posthog can stash person-profile updates either at the top level
// of CaptureResult or nested inside properties depending on the pipeline
// stage; clean both spots so nothing slips through.
stripPrivacyFields(event.$set, anonymity);
stripPrivacyFields(event.$set_once, anonymity);
stripPrivacyFields(event.properties["$set"], anonymity);
stripPrivacyFields(event.properties["$set_once"], anonymity);
return event;
}