Posthog: drop $initial_person_info from outgoing events (#3968)

* Posthog: drop $initial_person_info from outgoing events

* Posthog: migrate from sanitize_properties to before_send

* strip URL fields from $set / $set_once

* enable mask_personal_data_properties

* review

* update tests to check for `delete` (not anymore `=null`)
rename: `applyPrivacyFilters`->`santizeSensitiveData`

---------

Co-authored-by: Timo K <toger5@hotmail.de>
This commit is contained in:
fkwp
2026-05-19 18:06:59 +02:00
committed by GitHub
parent 0e3aaaafe8
commit f0db134b6e
4 changed files with 512 additions and 44 deletions

View File

@@ -14,8 +14,13 @@ import {
beforeAll,
afterAll,
} from "vitest";
import posthog, { type CaptureResult } from "posthog-js";
import { PosthogAnalytics } from "./PosthogAnalytics";
import {
Anonymity,
santizeSensitiveData,
PosthogAnalytics,
} from "./PosthogAnalytics";
import { mockConfig } from "../utils/test";
describe("PosthogAnalytics", () => {
@@ -88,4 +93,154 @@ describe("PosthogAnalytics", () => {
expect(PosthogAnalytics.instance.isEnabled()).toBe(true);
});
});
describe("applyPrivacyFilters", () => {
const makeEvent = (properties: Record<string, unknown>): CaptureResult =>
({ event: "anyEvent", properties }) as unknown as CaptureResult;
it("drops $initial_person_info regardless of anonymity", () => {
const out = santizeSensitiveData(
makeEvent({
$current_url: "https://call.example.com/some/private/path",
$initial_person_info: {
r: "https://example.com/referrer",
u: "https://call.example.com/some/private/path",
},
}),
Anonymity.Pseudonymous,
);
expect(out?.properties).not.toHaveProperty("$initial_person_info");
});
it("strips hash from $current_url", () => {
const out = santizeSensitiveData(
makeEvent({ $current_url: "https://call.example.com/#/x/y/z" }),
Anonymity.Pseudonymous,
);
expect(out?.properties["$current_url"]).not.toContain("/x/y/z");
});
it("nulls referrer and device fields when anonymous", () => {
const out = santizeSensitiveData(
makeEvent({
$current_url: "https://x/y",
$referrer: "https://leaky",
$initial_referrer: "https://leaky-too",
$device_id: "uuid",
}),
Anonymity.Anonymous,
);
expect(out?.properties["$referrer"]).toBeUndefined();
expect(out?.properties["$initial_referrer"]).toBeUndefined();
expect(out?.properties["$device_id"]).toBeUndefined();
});
it("passes null events through unchanged", () => {
expect(santizeSensitiveData(null, Anonymity.Pseudonymous)).toBeNull();
});
it("strips URL fields nested inside $set_once", () => {
const secretUrl =
"https://call.example.com/room/#/?password=hunter2&roomId=abc";
const out = santizeSensitiveData(
makeEvent({
$current_url: "https://call.example.com/x",
$set_once: {
$current_url: secretUrl,
$initial_current_url: secretUrl,
$session_entry_url: secretUrl,
$initial_person_info: { r: "x", u: secretUrl },
},
}),
Anonymity.Pseudonymous,
);
const setOnce = out?.properties["$set_once"] as Record<string, unknown>;
expect(setOnce["$current_url"]).not.toContain("password");
expect(setOnce["$initial_current_url"]).not.toContain("password");
expect(setOnce).not.toHaveProperty("$session_entry_url");
expect(setOnce).not.toHaveProperty("$initial_person_info");
});
it("strips URL fields nested inside $set", () => {
const secretUrl =
"https://call.example.com/room/#/?password=hunter2&roomId=abc";
const out = santizeSensitiveData(
makeEvent({
$current_url: "https://call.example.com/x",
$set: {
$current_url: secretUrl,
$session_entry_url: secretUrl,
},
}),
Anonymity.Pseudonymous,
);
const set = out?.properties["$set"] as Record<string, unknown>;
expect(set["$current_url"]).not.toContain("password");
expect(set).not.toHaveProperty("$session_entry_url");
});
it("nulls referrer fields inside $set_once when anonymous", () => {
const out = santizeSensitiveData(
makeEvent({
$current_url: "https://x/y",
$set_once: {
$initial_referrer: "https://leaky",
$initial_referring_domain: "leaky",
},
}),
Anonymity.Anonymous,
);
const setOnce = out?.properties["$set_once"] as Record<string, unknown>;
expect(setOnce["$initial_referrer"]).toBeUndefined();
expect(setOnce["$initial_referring_domain"]).toBeUndefined();
});
});
// Verifies that applyPrivacyFilters is actually wired into posthog.init via
// the before_send hook — guards against typos in the option name or future
// posthog-js bumps renaming/removing the hook. The filter logic itself is
// covered by the applyPrivacyFilters block above.
describe("posthog.init wiring", () => {
beforeAll(() => {
vi.stubEnv("VITE_PACKAGE", "full");
});
beforeEach(() => {
mockConfig({
posthog: {
api_host: "https://api.example.com.localhost",
api_key: "api_key",
},
});
PosthogAnalytics.resetInstance();
});
afterAll(() => {
vi.unstubAllEnvs();
});
it("passes events through the privacy filter via before_send", () => {
const initSpy = vi.spyOn(posthog, "init");
expect(PosthogAnalytics.instance.isEnabled()).toBe(true);
const beforeSend = initSpy.mock.calls[0][1]?.before_send;
expect(beforeSend).toBeInstanceOf(Function);
const event = {
event: "anyEvent",
properties: {
$current_url: "https://call.example.com/x/y",
$initial_person_info: { r: "x" },
},
} as unknown as CaptureResult;
const out = (beforeSend as (e: CaptureResult) => CaptureResult | null)(
event,
);
expect(out?.properties).not.toHaveProperty("$initial_person_info");
});
});
});

View File

@@ -7,6 +7,7 @@ Please see LICENSE in the repository root for full details.
import posthog, {
type CaptureOptions,
type CaptureResult,
type PostHog,
type Properties,
} from "posthog-js";
@@ -65,6 +66,73 @@ export enum RegistrationType {
Registered,
}
// Sanitize URL / referrer / device fields on a single posthog properties bag.
// Applied to event.properties and to the person-profile bags ($set / $set_once),
// since posthog mirrors the same URL fields into those.
function stripSensitiveFields(
obj: Properties | undefined,
anonymity: Anonymity,
): void {
if (!obj) return;
if (anonymity === Anonymity.Anonymous) {
// drop referrer information for anonymous users
delete obj["$referrer"];
delete obj["$referring_domain"];
delete obj["$initial_referrer"];
delete obj["$initial_referring_domain"];
// drop device ID, which is a UUID persisted in local storage
delete obj["$device_id"];
}
// the url leaks a lot of private data like the call name or the user
// (room password / room ID can land in the hash/query). Strip down to
// scheme + host so we still get host-level insights (develop / main / sfu).
for (const key of ["$current_url", "$initial_current_url"]) {
if (typeof obj[key] === "string") {
try {
const url = new URL(obj[key]);
obj[key] = url.protocol + "//" + url.hostname + url.pathname;
} catch {
obj[key] = null;
}
}
}
// $session_entry_url carries the full untrimmed URL; $initial_person_info
// bundles initial referrer + URL into a nested object that bypasses the
// per-key strips above. Drop both.
delete obj["$session_entry_url"];
delete obj["$initial_person_info"];
}
/**
* Strip PII from posthog's built-in properties (URL, referrer fields,
* device ID, $initial_person_info, $session_entry_url) before events leave
* the client. Also applied to the person-profile bags ($set / $set_once),
* which mirror the same URL fields.
* See src/utils/event-utils.ts in posthog-js (getEventProperties, getPersonInfo)
* for the list of properties posthog sets automatically.
*/
export function santizeSensitiveData(
event: CaptureResult | null,
anonymity: Anonymity,
): CaptureResult | null {
if (event === null) return null;
stripSensitiveFields(event.properties, anonymity);
// posthog can stash person-profile updates either at the top level
// of CaptureResult or nested inside properties depending on the pipeline
// stage; clean both spots so nothing slips through.
stripSensitiveFields(event.$set, anonymity);
stripSensitiveFields(event.$set_once, anonymity);
stripSensitiveFields(event.properties["$set"], anonymity);
stripSensitiveFields(event.properties["$set_once"], anonymity);
return event;
}
interface PlatformProperties {
appVersion: string;
matrixBackend: "embedded" | "jssdk";
@@ -129,13 +197,16 @@ export class PosthogAnalytics {
}
if (apiKey && apiHost) {
const beforeSend = (event: CaptureResult | null): CaptureResult | null =>
santizeSensitiveData(event, this.anonymity);
this.posthog.init(apiKey, {
api_host: apiHost,
autocapture: false,
mask_all_text: true,
mask_all_element_attributes: true,
mask_personal_data_properties: true,
capture_pageview: false,
sanitize_properties: this.sanitizeProperties,
before_send: beforeSend,
respect_dnt: true,
advanced_disable_decide: true,
});
@@ -148,34 +219,6 @@ export class PosthogAnalytics {
}
}
private sanitizeProperties = (
properties: Properties,
_eventName: string,
): Properties => {
// Callback from posthog to sanitize properties before sending them to the server.
// Here we sanitize posthog's built in properties which leak PII e.g. url reporting.
// See utils.js _.info.properties in posthog-js.
if (this.anonymity == Anonymity.Anonymous) {
// drop referrer information for anonymous users
properties["$referrer"] = null;
properties["$referring_domain"] = null;
properties["$initial_referrer"] = null;
properties["$initial_referring_domain"] = null;
// drop device ID, which is a UUID persisted in local storage
properties["$device_id"] = null;
}
// the url leaks a lot of private data like the call name or the user.
// Its stripped down to the bare minimum to only give insights about the host (develop, main or sfu)
properties["$current_url"] = (properties["$current_url"] as string)
.split("/")
.slice(0, 3)
.join("");
return properties;
};
private registerSuperProperties(properties: Properties): void {
if (this.enabled) {
this.posthog.register(properties);