Posthog: drop $initial_person_info from outgoing events (#3968)

* Posthog: drop $initial_person_info from outgoing events

* Posthog: migrate from sanitize_properties to before_send

* strip URL fields from $set / $set_once

* enable mask_personal_data_properties

* review

* update tests to check for `delete` (not anymore `=null`)
rename: `applyPrivacyFilters`->`santizeSensitiveData`

---------

Co-authored-by: Timo K <toger5@hotmail.de>
This commit is contained in:
fkwp
2026-05-19 18:06:59 +02:00
committed by GitHub
parent 0e3aaaafe8
commit f0db134b6e
4 changed files with 512 additions and 44 deletions

View File

@@ -7,6 +7,7 @@ Please see LICENSE in the repository root for full details.
import posthog, {
type CaptureOptions,
type CaptureResult,
type PostHog,
type Properties,
} from "posthog-js";
@@ -65,6 +66,73 @@ export enum RegistrationType {
Registered,
}
// Sanitize URL / referrer / device fields on a single posthog properties bag.
// Applied to event.properties and to the person-profile bags ($set / $set_once),
// since posthog mirrors the same URL fields into those.
function stripSensitiveFields(
obj: Properties | undefined,
anonymity: Anonymity,
): void {
if (!obj) return;
if (anonymity === Anonymity.Anonymous) {
// drop referrer information for anonymous users
delete obj["$referrer"];
delete obj["$referring_domain"];
delete obj["$initial_referrer"];
delete obj["$initial_referring_domain"];
// drop device ID, which is a UUID persisted in local storage
delete obj["$device_id"];
}
// the url leaks a lot of private data like the call name or the user
// (room password / room ID can land in the hash/query). Strip down to
// scheme + host so we still get host-level insights (develop / main / sfu).
for (const key of ["$current_url", "$initial_current_url"]) {
if (typeof obj[key] === "string") {
try {
const url = new URL(obj[key]);
obj[key] = url.protocol + "//" + url.hostname + url.pathname;
} catch {
obj[key] = null;
}
}
}
// $session_entry_url carries the full untrimmed URL; $initial_person_info
// bundles initial referrer + URL into a nested object that bypasses the
// per-key strips above. Drop both.
delete obj["$session_entry_url"];
delete obj["$initial_person_info"];
}
/**
* Strip PII from posthog's built-in properties (URL, referrer fields,
* device ID, $initial_person_info, $session_entry_url) before events leave
* the client. Also applied to the person-profile bags ($set / $set_once),
* which mirror the same URL fields.
* See src/utils/event-utils.ts in posthog-js (getEventProperties, getPersonInfo)
* for the list of properties posthog sets automatically.
*/
export function santizeSensitiveData(
event: CaptureResult | null,
anonymity: Anonymity,
): CaptureResult | null {
if (event === null) return null;
stripSensitiveFields(event.properties, anonymity);
// posthog can stash person-profile updates either at the top level
// of CaptureResult or nested inside properties depending on the pipeline
// stage; clean both spots so nothing slips through.
stripSensitiveFields(event.$set, anonymity);
stripSensitiveFields(event.$set_once, anonymity);
stripSensitiveFields(event.properties["$set"], anonymity);
stripSensitiveFields(event.properties["$set_once"], anonymity);
return event;
}
interface PlatformProperties {
appVersion: string;
matrixBackend: "embedded" | "jssdk";
@@ -129,13 +197,16 @@ export class PosthogAnalytics {
}
if (apiKey && apiHost) {
const beforeSend = (event: CaptureResult | null): CaptureResult | null =>
santizeSensitiveData(event, this.anonymity);
this.posthog.init(apiKey, {
api_host: apiHost,
autocapture: false,
mask_all_text: true,
mask_all_element_attributes: true,
mask_personal_data_properties: true,
capture_pageview: false,
sanitize_properties: this.sanitizeProperties,
before_send: beforeSend,
respect_dnt: true,
advanced_disable_decide: true,
});
@@ -148,34 +219,6 @@ export class PosthogAnalytics {
}
}
private sanitizeProperties = (
properties: Properties,
_eventName: string,
): Properties => {
// Callback from posthog to sanitize properties before sending them to the server.
// Here we sanitize posthog's built in properties which leak PII e.g. url reporting.
// See utils.js _.info.properties in posthog-js.
if (this.anonymity == Anonymity.Anonymous) {
// drop referrer information for anonymous users
properties["$referrer"] = null;
properties["$referring_domain"] = null;
properties["$initial_referrer"] = null;
properties["$initial_referring_domain"] = null;
// drop device ID, which is a UUID persisted in local storage
properties["$device_id"] = null;
}
// the url leaks a lot of private data like the call name or the user.
// Its stripped down to the bare minimum to only give insights about the host (develop, main or sfu)
properties["$current_url"] = (properties["$current_url"] as string)
.split("/")
.slice(0, 3)
.join("");
return properties;
};
private registerSuperProperties(properties: Properties): void {
if (this.enabled) {
this.posthog.register(properties);