From 1de8d93b4bdb5090f4112aec0af49736b4e00820 Mon Sep 17 00:00:00 2001 From: Valere Date: Wed, 25 Feb 2026 15:47:25 +0100 Subject: [PATCH] feat: video auto fit based on video stream size --- src/state/MediaViewModel.test.ts | 15 -- src/state/MediaViewModel.ts | 63 ++++++-- src/tile/GridTile.tsx | 41 ++--- src/tile/SpotlightTile.tsx | 22 ++- src/utils/videoFit.test.ts | 251 +++++++++++++++++++++++++++++++ src/utils/videoFit.ts | 94 ++++++++++++ 6 files changed, 441 insertions(+), 45 deletions(-) create mode 100644 src/utils/videoFit.test.ts create mode 100644 src/utils/videoFit.ts diff --git a/src/state/MediaViewModel.test.ts b/src/state/MediaViewModel.test.ts index 92868216..a7bbb571 100644 --- a/src/state/MediaViewModel.test.ts +++ b/src/state/MediaViewModel.test.ts @@ -92,21 +92,6 @@ test("control a participant's volume", () => { }); }); -test("toggle fit/contain for a participant's video", () => { - const vm = createRemoteMedia(rtcMembership, {}, mockRemoteParticipant({})); - withTestScheduler(({ expectObservable, schedule }) => { - schedule("-ab|", { - a: () => vm.toggleFitContain(), - b: () => vm.toggleFitContain(), - }); - expectObservable(vm.cropVideo$).toBe("abc", { - a: true, - b: false, - c: true, - }); - }); -}); - test("local media remembers whether it should always be shown", () => { const vm1 = createLocalMedia( rtcMembership, diff --git a/src/state/MediaViewModel.ts b/src/state/MediaViewModel.ts index 3da69c46..57b0428a 100644 --- a/src/state/MediaViewModel.ts +++ b/src/state/MediaViewModel.ts @@ -43,6 +43,8 @@ import { switchMap, throttleTime, distinctUntilChanged, + concat, + take, } from "rxjs"; import { alwaysShowSelf } from "../settings/settings"; @@ -55,6 +57,7 @@ import { platform } from "../Platform"; import { type MediaDevices } from "./MediaDevices"; import { type Behavior } from "./Behavior"; import { type ObservableScope } from "./ObservableScope"; +import { videoFit$, videoSizeFromParticipant$ } from "../utils/videoFit.ts"; export function observeTrackReference$( participant: Participant, @@ -67,6 +70,10 @@ export function observeTrackReference$( ); } +/** + * Helper function to observe the RTC stats for a given participant and track source. + * It polls the stats every second and emits the latest stats object. + */ export function observeRtpStreamStats$( participant: Participant, source: Track.Source, @@ -76,7 +83,9 @@ export function observeRtpStreamStats$( > { return combineLatest([ observeTrackReference$(participant, source), - interval(1000).pipe(startWith(0)), + // This is used also for detecting video orientation, + // and we want that to be more responsive than the connection stats, so we poll more frequently at the start. + concat(interval(300).pipe(take(3)), interval(1000)).pipe(startWith(0)), ]).pipe( switchMap(async ([trackReference]) => { const track = trackReference?.publication?.track; @@ -90,7 +99,6 @@ export function observeRtpStreamStats$( if (!report) { return undefined; } - for (const v of report.values()) { if (v.type === type) { return v; @@ -103,6 +111,13 @@ export function observeRtpStreamStats$( ); } +/** + * Helper function to observe the inbound RTP stats for a given participant and track source. + * To be used for remote participants' audio and video tracks. + * It polls the stats every second and emits the latest stats object. + * @param participant - The LiveKit participant whose track stats we want to observe. + * @param source - The source of the track (e.g. Track.Source.Camera or Track.Source.Microphone). + */ export function observeInboundRtpStreamStats$( participant: Participant, source: Track.Source, @@ -112,6 +127,13 @@ export function observeInboundRtpStreamStats$( ); } +/** + * Helper function to observe the outbound RTP stats for a given participant and track source. + * To be used for the local participant's audio and video tracks. + * It polls the stats every second and emits the latest stats object. + * @param participant - The LiveKit participant whose track stats we want to observe. + * @param source - The source of the track (e.g. Track.Source.Camera or Track.Source.Microphone). + */ export function observeOutboundRtpStreamStats$( participant: Participant, source: Track.Source, @@ -263,7 +285,6 @@ abstract class BaseMediaViewModel { protected readonly participant$: Observable< LocalParticipant | RemoteParticipant | null >, - encryptionSystem: EncryptionSystem, audioSource: AudioSource, videoSource: VideoSource, @@ -397,13 +418,12 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel { return this._videoEnabled$; } - private readonly _cropVideo$ = new BehaviorSubject(true); /** - * Whether the tile video should be contained inside the tile or be cropped to fit. + * Whether the tile video should be contained inside the tile (video-fit contain) or be cropped to fit (video-fit cover). */ - public readonly cropVideo$: Behavior = this._cropVideo$; + public readonly videoFit$: Behavior<"cover" | "contain">; - public constructor( + protected constructor( scope: ObservableScope, id: string, userId: string, @@ -443,10 +463,12 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel { this._videoEnabled$ = this.scope.behavior( media$.pipe(map((m) => m?.cameraTrack?.isMuted === false)), ); - } - public toggleFitContain(): void { - this._cropVideo$.next(!this._cropVideo$.value); + this.videoFit$ = videoFit$( + this.scope, + videoSizeFromParticipant$(participant$), + this.actualSize$, + ); } public get local(): boolean { @@ -456,9 +478,28 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel { public abstract get audioStreamStats$(): Observable< RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats | undefined >; + public abstract get videoStreamStats$(): Observable< RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats | undefined >; + + private readonly _actualSize$ = new BehaviorSubject< + { width: number; height: number } | undefined + >(undefined); + public readonly actualSize$ = this._actualSize$.asObservable(); + + /** + * Set the actual dimensions of the html element. + * This can be used to determine the best video fit (fit to frame / keep ratio). + * @param width - The actual width of the html element displaying the video. + * @param height - The actual height of the html element displaying the video. + */ + public setActualDimensions(width: number, height: number): void { + this._actualSize$.next({ + width, + height, + }); + } } /** @@ -616,6 +657,7 @@ export class RemoteUserMediaViewModel extends BaseUserMediaViewModel { // This private field is used to override the value from the superclass private __speaking$: Behavior; + public get speaking$(): Behavior { return this.__speaking$; } @@ -661,6 +703,7 @@ export class RemoteUserMediaViewModel extends BaseUserMediaViewModel { // This private field is used to override the value from the superclass private __videoEnabled$: Behavior; + public get videoEnabled$(): Behavior { return this.__videoEnabled$; } diff --git a/src/tile/GridTile.tsx b/src/tile/GridTile.tsx index 92262f05..ad158db1 100644 --- a/src/tile/GridTile.tsx +++ b/src/tile/GridTile.tsx @@ -11,6 +11,7 @@ import { type ReactNode, type Ref, useCallback, + useEffect, useRef, useState, } from "react"; @@ -26,7 +27,6 @@ import { VolumeOffIcon, VisibilityOnIcon, UserProfileIcon, - ExpandIcon, VolumeOffSolidIcon, SwitchCameraSolidIcon, } from "@vector-im/compound-design-tokens/assets/web/icons"; @@ -37,6 +37,7 @@ import { Menu, } from "@vector-im/compound-web"; import { useObservableEagerState } from "observable-hooks"; +import useMeasure from "react-use-measure"; import styles from "./GridTile.module.css"; import { @@ -105,18 +106,26 @@ const UserMediaTile: FC = ({ const audioEnabled = useBehavior(vm.audioEnabled$); const videoEnabled = useBehavior(vm.videoEnabled$); const speaking = useBehavior(vm.speaking$); - const cropVideo = useBehavior(vm.cropVideo$); - const onSelectFitContain = useCallback( - (e: Event) => { - e.preventDefault(); - vm.toggleFitContain(); - }, - [vm], - ); + const videoFit = useBehavior(vm.videoFit$); + const rtcBackendIdentity = vm.rtcBackendIdentity; const handRaised = useBehavior(vm.handRaised$); const reaction = useBehavior(vm.reaction$); + // We need to keep track of the tile size. + // We use this to get the tile ratio, and compare it to the video ratio to decide + // whether to fit the video to frame or keep the ratio. + const [measureRef, bounds] = useMeasure(); + // There is already a ref being passed in, so we need to merge it with the measureRef. + const tileRef = useMergedRefs(ref, measureRef); + + // Whenever bounds change, inform the viewModel + useEffect(() => { + if (bounds.width > 0 && bounds.height > 0) { + vm.setActualDimensions(bounds.width, bounds.height); + } + }, [bounds.width, bounds.height, vm]); + const AudioIcon = locallyMuted ? VolumeOffSolidIcon : audioEnabled @@ -132,12 +141,10 @@ const UserMediaTile: FC = ({ const menu = ( <> {menuStart} - + {/* + No additional menu item (used to be the manual fit to frame. + Placeholder for future menu items that should be placed here. + */} {menuEnd} ); @@ -150,13 +157,13 @@ const UserMediaTile: FC = ({ const tile = ( = ({ vm, ...props }) => { - const cropVideo = useBehavior(vm.cropVideo$); + const videoFit = useBehavior(vm.videoFit$); const baseProps: SpotlightUserMediaItemBaseProps & RefAttributes = { - videoFit: cropVideo ? "cover" : "contain", + videoFit, ...props, }; @@ -147,7 +148,22 @@ const SpotlightItem: FC = ({ "aria-hidden": ariaHidden, }) => { const ourRef = useRef(null); - const ref = useMergedRefs(ourRef, theirRef); + + // We need to keep track of the tile size. + // We use this to get the tile ratio, and compare it to the video ratio to decide + // whether to fit the video to frame or keep the ratio. + const [measureRef, bounds] = useMeasure(); + + // Whenever bounds change, inform the viewModel + useEffect(() => { + if (bounds.width > 0 && bounds.height > 0) { + if (!(vm instanceof ScreenShareViewModel)) { + vm.setActualDimensions(bounds.width, bounds.height); + } + } + }, [bounds.width, bounds.height, vm]); + + const ref = useMergedRefs(ourRef, theirRef, measureRef); const focusUrl = useBehavior(vm.focusUrl$); const displayName = useBehavior(vm.displayName$); const mxcAvatarUrl = useBehavior(vm.mxcAvatarUrl$); diff --git a/src/utils/videoFit.test.ts b/src/utils/videoFit.test.ts new file mode 100644 index 00000000..9390e8d4 --- /dev/null +++ b/src/utils/videoFit.test.ts @@ -0,0 +1,251 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +import { describe, expect, test, vi } from "vitest"; +import { + LocalTrack, + type LocalTrackPublication, + type RemoteTrackPublication, + Track, +} from "livekit-client"; + +import { ObservableScope } from "../state/ObservableScope"; +import { videoFit$, videoSizeFromParticipant$ } from "./videoFit"; +import { constant } from "../state/Behavior"; +import { + flushPromises, + mockLocalParticipant, + mockRemoteParticipant, +} from "./test"; + +describe("videoFit$ defaults", () => { + test.each([ + { + videoSize: { width: 1920, height: 1080 }, + tileSize: undefined, + }, + { + videoSize: { width: 1080, height: 1920 }, + tileSize: undefined, + }, + { + videoSize: undefined, + tileSize: { width: 1920, height: 1080 }, + }, + { + videoSize: undefined, + tileSize: { width: 1080, height: 1920 }, + }, + ])( + "videoFit$ returns `cover` when videoSize is $videoSize and tileSize is $tileSize", + ({ videoSize, tileSize }) => { + const scope = new ObservableScope(); + const videoSize$ = constant(videoSize); + const tileSize$ = constant(tileSize); + + const fit = videoFit$(scope, videoSize$, tileSize$); + expect(fit.value).toBe("cover"); + }, + ); +}); + +const VIDEO_480_L = { width: 640, height: 480 }; +const VIDEO_720_L = { width: 1280, height: 720 }; +const VIDEO_1080_L = { width: 1920, height: 1080 }; + +// Some sizes from real world testing, which don't match the standard video sizes exactly +const TILE_SIZE_1_L = { width: 180, height: 135 }; +const TILE_SIZE_3_P = { width: 379, height: 542 }; +const TILE_SIZE_4_L = { width: 957, height: 542 }; +// This is the size of an iPhone Xr in portrait mode +const TILE_SIZE_5_P = { width: 414, height: 896 }; + +export function invertSize(size: { width: number; height: number }): { + width: number; + height: number; +} { + return { + width: size.height, + height: size.width, + }; +} + +test.each([ + { + videoSize: VIDEO_480_L, + tileSize: TILE_SIZE_1_L, + expected: "cover", + }, + { + videoSize: invertSize(VIDEO_480_L), + tileSize: TILE_SIZE_1_L, + expected: "contain", + }, + { + videoSize: VIDEO_720_L, + tileSize: TILE_SIZE_4_L, + expected: "cover", + }, + { + videoSize: invertSize(VIDEO_720_L), + tileSize: TILE_SIZE_4_L, + expected: "contain", + }, + { + videoSize: invertSize(VIDEO_1080_L), + tileSize: TILE_SIZE_3_P, + expected: "cover", + }, + { + videoSize: VIDEO_1080_L, + tileSize: TILE_SIZE_5_P, + expected: "contain", + }, + { + videoSize: invertSize(VIDEO_1080_L), + tileSize: TILE_SIZE_5_P, + expected: "cover", + }, + { + // square video + videoSize: { width: 400, height: 400 }, + tileSize: VIDEO_480_L, + expected: "contain", + }, +])( + "videoFit$ returns $expected when videoSize is $videoSize and tileSize is $tileSize", + ({ videoSize, tileSize, expected }) => { + const scope = new ObservableScope(); + const videoSize$ = constant(videoSize); + const tileSize$ = constant(tileSize); + + const fit = videoFit$(scope, videoSize$, tileSize$); + expect(fit.value).toBe(expected); + }, +); + +describe("extracting video size from participant stats", () => { + function createMockRtpStats( + isInbound: boolean, + props: Partial = {}, + ): RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats { + const baseStats = { + id: "mock-stats-id", + timestamp: Date.now(), + type: isInbound ? "inbound-rtp" : "outbound-rtp", + kind: "video", + ...props, + }; + + return baseStats as RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats; + } + + test("get stats for local user", async () => { + const localParticipant = mockLocalParticipant({ + identity: "@local:example.org:AAAAAA", + }); + + const mockReport: RTCStatsReport = new Map([ + [ + "OT01V639885149", + createMockRtpStats(false, { + frameWidth: 1280, + frameHeight: 720, + }), + ], + ]); + + const track = { + source: Track.Source.Camera, + getRTCStatsReport: vi + .fn() + .mockImplementation(async () => Promise.resolve(mockReport)), + } as Partial as LocalTrack; + + // Set up the prototype chain (there is an instanceof check in getRTCStatsReport) + Object.setPrototypeOf(track, LocalTrack.prototype); + + localParticipant.getTrackPublication = vi + .fn() + .mockImplementation((source: Track.Source) => { + if (source === Track.Source.Camera) { + return { + track, + } as unknown as LocalTrackPublication; + } else { + return undefined; + } + }); + + const videoDimensions$ = videoSizeFromParticipant$( + constant(localParticipant), + ); + + const publishedDimensions: { width: number; height: number }[] = []; + videoDimensions$.subscribe((dimensions) => { + if (dimensions) publishedDimensions.push(dimensions); + }); + + await flushPromises(); + + const dimension = publishedDimensions.pop(); + expect(dimension).toEqual({ width: 1280, height: 720 }); + }); + + test("get stats for remote user", async () => { + // vi.useFakeTimers() + const remoteParticipant = mockRemoteParticipant({ + identity: "@bob:example.org:AAAAAA", + }); + + const mockReport: RTCStatsReport = new Map([ + [ + "OT01V639885149", + createMockRtpStats(true, { + frameWidth: 480, + frameHeight: 640, + }), + ], + ]); + + const track = { + source: Track.Source.Camera, + getRTCStatsReport: vi + .fn() + .mockImplementation(async () => Promise.resolve(mockReport)), + } as Partial as LocalTrack; + + // Set up the prototype chain (there is an instanceof check in getRTCStatsReport) + Object.setPrototypeOf(track, LocalTrack.prototype); + + remoteParticipant.getTrackPublication = vi + .fn() + .mockImplementation((source: Track.Source) => { + if (source === Track.Source.Camera) { + return { + track, + } as unknown as RemoteTrackPublication; + } else { + return undefined; + } + }); + + const videoDimensions$ = videoSizeFromParticipant$( + constant(remoteParticipant), + ); + + const publishedDimensions: { width: number; height: number }[] = []; + videoDimensions$.subscribe((dimensions) => { + if (dimensions) publishedDimensions.push(dimensions); + }); + + await flushPromises(); + + const dimension = publishedDimensions.pop(); + expect(dimension).toEqual({ width: 480, height: 640 }); + }); +}); diff --git a/src/utils/videoFit.ts b/src/utils/videoFit.ts new file mode 100644 index 00000000..fdd91be7 --- /dev/null +++ b/src/utils/videoFit.ts @@ -0,0 +1,94 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +import { combineLatest, map, type Observable, of, switchMap } from "rxjs"; +import { + type LocalParticipant, + type RemoteParticipant, + Track, +} from "livekit-client"; + +import { type ObservableScope } from "../state/ObservableScope.ts"; +import { type Behavior } from "../state/Behavior.ts"; +import { + observeInboundRtpStreamStats$, + observeOutboundRtpStreamStats$, +} from "../state/MediaViewModel.ts"; + +type Size = { + width: number; + height: number; +}; + +export function videoFit$( + scope: ObservableScope, + videoSize$: Observable, + tileSize$: Observable, +): Behavior<"cover" | "contain"> { + const fit$ = combineLatest([videoSize$, tileSize$]).pipe( + map(([videoSize, tileSize]) => { + if (!videoSize || !tileSize) { + // If we don't have the sizes, default to cover to avoid black bars. + // This is a reasonable default as it will ensure the video fills the tile, even if it means cropping. + return "cover"; + } + const videoAspectRatio = videoSize.width / videoSize.height; + const tileAspectRatio = tileSize.width / tileSize.height; + + // If video is landscape (ratio > 1) and tile is portrait (ratio < 1) or vice versa, + // we want to use "contain" (fit) mode to avoid excessive cropping + const videoIsLandscape = videoAspectRatio > 1; + const tileIsLandscape = tileAspectRatio > 1; + + // If the orientations are the same, use the cover mode (Preserves the aspect ratio, and the image fills the container.) + // If they're not the same orientation, use the contain mode (Preserves the aspect ratio, but the image is letterboxed - black bars- to fit within the container.) + return videoIsLandscape === tileIsLandscape ? "cover" : "contain"; + }), + ); + + return scope.behavior(fit$, "cover"); +} + +/** + * Helper function to get the video size from a participant. + * It observes the participant's video track stats and extracts the frame width and height. + * @param participant$ - an Observable of a LocalParticipant or RemoteParticipant, or null if no participant is selected. + * @returns an Observable of the video size (width and height) or undefined if the size cannot be determined. + */ +export function videoSizeFromParticipant$( + participant$: Observable, +): Observable<{ width: number; height: number } | undefined> { + return participant$ + .pipe( + // If we have a participant, observe their video track stats. If not, return undefined. + switchMap((p) => { + if (!p) return of(undefined); + if (p.isLocal) { + return observeOutboundRtpStreamStats$(p, Track.Source.Camera); + } else { + return observeInboundRtpStreamStats$(p, Track.Source.Camera); + } + }), + ) + .pipe( + // Extract the frame width and height from the stats. If we don't have valid stats, return undefined. + map((stats) => { + if (!stats) return undefined; + if ( + // For video tracks, frameWidth and frameHeight should be numbers. If they're not, we can't determine the size. + typeof stats.frameWidth !== "number" || + typeof stats.frameHeight !== "number" + ) { + return undefined; + } + return { + width: stats.frameWidth, + height: stats.frameHeight, + }; + }), + ); +}