feat: video auto fit based on video stream size

This commit is contained in:
Valere
2026-02-25 15:47:25 +01:00
parent 4a0e89730d
commit 1de8d93b4b
6 changed files with 441 additions and 45 deletions

View File

@@ -92,21 +92,6 @@ test("control a participant's volume", () => {
});
});
test("toggle fit/contain for a participant's video", () => {
const vm = createRemoteMedia(rtcMembership, {}, mockRemoteParticipant({}));
withTestScheduler(({ expectObservable, schedule }) => {
schedule("-ab|", {
a: () => vm.toggleFitContain(),
b: () => vm.toggleFitContain(),
});
expectObservable(vm.cropVideo$).toBe("abc", {
a: true,
b: false,
c: true,
});
});
});
test("local media remembers whether it should always be shown", () => {
const vm1 = createLocalMedia(
rtcMembership,

View File

@@ -43,6 +43,8 @@ import {
switchMap,
throttleTime,
distinctUntilChanged,
concat,
take,
} from "rxjs";
import { alwaysShowSelf } from "../settings/settings";
@@ -55,6 +57,7 @@ import { platform } from "../Platform";
import { type MediaDevices } from "./MediaDevices";
import { type Behavior } from "./Behavior";
import { type ObservableScope } from "./ObservableScope";
import { videoFit$, videoSizeFromParticipant$ } from "../utils/videoFit.ts";
export function observeTrackReference$(
participant: Participant,
@@ -67,6 +70,10 @@ export function observeTrackReference$(
);
}
/**
* Helper function to observe the RTC stats for a given participant and track source.
* It polls the stats every second and emits the latest stats object.
*/
export function observeRtpStreamStats$(
participant: Participant,
source: Track.Source,
@@ -76,7 +83,9 @@ export function observeRtpStreamStats$(
> {
return combineLatest([
observeTrackReference$(participant, source),
interval(1000).pipe(startWith(0)),
// This is used also for detecting video orientation,
// and we want that to be more responsive than the connection stats, so we poll more frequently at the start.
concat(interval(300).pipe(take(3)), interval(1000)).pipe(startWith(0)),
]).pipe(
switchMap(async ([trackReference]) => {
const track = trackReference?.publication?.track;
@@ -90,7 +99,6 @@ export function observeRtpStreamStats$(
if (!report) {
return undefined;
}
for (const v of report.values()) {
if (v.type === type) {
return v;
@@ -103,6 +111,13 @@ export function observeRtpStreamStats$(
);
}
/**
* Helper function to observe the inbound RTP stats for a given participant and track source.
* To be used for remote participants' audio and video tracks.
* It polls the stats every second and emits the latest stats object.
* @param participant - The LiveKit participant whose track stats we want to observe.
* @param source - The source of the track (e.g. Track.Source.Camera or Track.Source.Microphone).
*/
export function observeInboundRtpStreamStats$(
participant: Participant,
source: Track.Source,
@@ -112,6 +127,13 @@ export function observeInboundRtpStreamStats$(
);
}
/**
* Helper function to observe the outbound RTP stats for a given participant and track source.
* To be used for the local participant's audio and video tracks.
* It polls the stats every second and emits the latest stats object.
* @param participant - The LiveKit participant whose track stats we want to observe.
* @param source - The source of the track (e.g. Track.Source.Camera or Track.Source.Microphone).
*/
export function observeOutboundRtpStreamStats$(
participant: Participant,
source: Track.Source,
@@ -263,7 +285,6 @@ abstract class BaseMediaViewModel {
protected readonly participant$: Observable<
LocalParticipant | RemoteParticipant | null
>,
encryptionSystem: EncryptionSystem,
audioSource: AudioSource,
videoSource: VideoSource,
@@ -397,13 +418,12 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel {
return this._videoEnabled$;
}
private readonly _cropVideo$ = new BehaviorSubject(true);
/**
* Whether the tile video should be contained inside the tile or be cropped to fit.
* Whether the tile video should be contained inside the tile (video-fit contain) or be cropped to fit (video-fit cover).
*/
public readonly cropVideo$: Behavior<boolean> = this._cropVideo$;
public readonly videoFit$: Behavior<"cover" | "contain">;
public constructor(
protected constructor(
scope: ObservableScope,
id: string,
userId: string,
@@ -443,10 +463,12 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel {
this._videoEnabled$ = this.scope.behavior(
media$.pipe(map((m) => m?.cameraTrack?.isMuted === false)),
);
}
public toggleFitContain(): void {
this._cropVideo$.next(!this._cropVideo$.value);
this.videoFit$ = videoFit$(
this.scope,
videoSizeFromParticipant$(participant$),
this.actualSize$,
);
}
public get local(): boolean {
@@ -456,9 +478,28 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel {
public abstract get audioStreamStats$(): Observable<
RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats | undefined
>;
public abstract get videoStreamStats$(): Observable<
RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats | undefined
>;
private readonly _actualSize$ = new BehaviorSubject<
{ width: number; height: number } | undefined
>(undefined);
public readonly actualSize$ = this._actualSize$.asObservable();
/**
* Set the actual dimensions of the html element.
* This can be used to determine the best video fit (fit to frame / keep ratio).
* @param width - The actual width of the html element displaying the video.
* @param height - The actual height of the html element displaying the video.
*/
public setActualDimensions(width: number, height: number): void {
this._actualSize$.next({
width,
height,
});
}
}
/**
@@ -616,6 +657,7 @@ export class RemoteUserMediaViewModel extends BaseUserMediaViewModel {
// This private field is used to override the value from the superclass
private __speaking$: Behavior<boolean>;
public get speaking$(): Behavior<boolean> {
return this.__speaking$;
}
@@ -661,6 +703,7 @@ export class RemoteUserMediaViewModel extends BaseUserMediaViewModel {
// This private field is used to override the value from the superclass
private __videoEnabled$: Behavior<boolean>;
public get videoEnabled$(): Behavior<boolean> {
return this.__videoEnabled$;
}

View File

@@ -11,6 +11,7 @@ import {
type ReactNode,
type Ref,
useCallback,
useEffect,
useRef,
useState,
} from "react";
@@ -26,7 +27,6 @@ import {
VolumeOffIcon,
VisibilityOnIcon,
UserProfileIcon,
ExpandIcon,
VolumeOffSolidIcon,
SwitchCameraSolidIcon,
} from "@vector-im/compound-design-tokens/assets/web/icons";
@@ -37,6 +37,7 @@ import {
Menu,
} from "@vector-im/compound-web";
import { useObservableEagerState } from "observable-hooks";
import useMeasure from "react-use-measure";
import styles from "./GridTile.module.css";
import {
@@ -105,18 +106,26 @@ const UserMediaTile: FC<UserMediaTileProps> = ({
const audioEnabled = useBehavior(vm.audioEnabled$);
const videoEnabled = useBehavior(vm.videoEnabled$);
const speaking = useBehavior(vm.speaking$);
const cropVideo = useBehavior(vm.cropVideo$);
const onSelectFitContain = useCallback(
(e: Event) => {
e.preventDefault();
vm.toggleFitContain();
},
[vm],
);
const videoFit = useBehavior(vm.videoFit$);
const rtcBackendIdentity = vm.rtcBackendIdentity;
const handRaised = useBehavior(vm.handRaised$);
const reaction = useBehavior(vm.reaction$);
// We need to keep track of the tile size.
// We use this to get the tile ratio, and compare it to the video ratio to decide
// whether to fit the video to frame or keep the ratio.
const [measureRef, bounds] = useMeasure();
// There is already a ref being passed in, so we need to merge it with the measureRef.
const tileRef = useMergedRefs(ref, measureRef);
// Whenever bounds change, inform the viewModel
useEffect(() => {
if (bounds.width > 0 && bounds.height > 0) {
vm.setActualDimensions(bounds.width, bounds.height);
}
}, [bounds.width, bounds.height, vm]);
const AudioIcon = locallyMuted
? VolumeOffSolidIcon
: audioEnabled
@@ -132,12 +141,10 @@ const UserMediaTile: FC<UserMediaTileProps> = ({
const menu = (
<>
{menuStart}
<ToggleMenuItem
Icon={ExpandIcon}
label={t("video_tile.change_fit_contain")}
checked={cropVideo}
onSelect={onSelectFitContain}
/>
{/*
No additional menu item (used to be the manual fit to frame.
Placeholder for future menu items that should be placed here.
*/}
{menuEnd}
</>
);
@@ -150,13 +157,13 @@ const UserMediaTile: FC<UserMediaTileProps> = ({
const tile = (
<MediaView
ref={ref}
ref={tileRef}
video={video}
userId={vm.userId}
unencryptedWarning={unencryptedWarning}
encryptionStatus={encryptionStatus}
videoEnabled={videoEnabled}
videoFit={cropVideo ? "cover" : "contain"}
videoFit={videoFit}
className={classNames(className, styles.tile, {
[styles.speaking]: showSpeaking,
[styles.handRaised]: !showSpeaking && handRaised,

View File

@@ -27,6 +27,7 @@ import { useObservableRef } from "observable-hooks";
import { useTranslation } from "react-i18next";
import classNames from "classnames";
import { type TrackReferenceOrPlaceholder } from "@livekit/components-core";
import useMeasure from "react-use-measure";
import FullScreenMaximiseIcon from "../icons/FullScreenMaximise.svg?react";
import FullScreenMinimiseIcon from "../icons/FullScreenMinimise.svg?react";
@@ -105,11 +106,11 @@ const SpotlightUserMediaItem: FC<SpotlightUserMediaItemProps> = ({
vm,
...props
}) => {
const cropVideo = useBehavior(vm.cropVideo$);
const videoFit = useBehavior(vm.videoFit$);
const baseProps: SpotlightUserMediaItemBaseProps &
RefAttributes<HTMLDivElement> = {
videoFit: cropVideo ? "cover" : "contain",
videoFit,
...props,
};
@@ -147,7 +148,22 @@ const SpotlightItem: FC<SpotlightItemProps> = ({
"aria-hidden": ariaHidden,
}) => {
const ourRef = useRef<HTMLDivElement | null>(null);
const ref = useMergedRefs(ourRef, theirRef);
// We need to keep track of the tile size.
// We use this to get the tile ratio, and compare it to the video ratio to decide
// whether to fit the video to frame or keep the ratio.
const [measureRef, bounds] = useMeasure();
// Whenever bounds change, inform the viewModel
useEffect(() => {
if (bounds.width > 0 && bounds.height > 0) {
if (!(vm instanceof ScreenShareViewModel)) {
vm.setActualDimensions(bounds.width, bounds.height);
}
}
}, [bounds.width, bounds.height, vm]);
const ref = useMergedRefs(ourRef, theirRef, measureRef);
const focusUrl = useBehavior(vm.focusUrl$);
const displayName = useBehavior(vm.displayName$);
const mxcAvatarUrl = useBehavior(vm.mxcAvatarUrl$);

251
src/utils/videoFit.test.ts Normal file
View File

@@ -0,0 +1,251 @@
/*
Copyright 2026 Element Creations Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { describe, expect, test, vi } from "vitest";
import {
LocalTrack,
type LocalTrackPublication,
type RemoteTrackPublication,
Track,
} from "livekit-client";
import { ObservableScope } from "../state/ObservableScope";
import { videoFit$, videoSizeFromParticipant$ } from "./videoFit";
import { constant } from "../state/Behavior";
import {
flushPromises,
mockLocalParticipant,
mockRemoteParticipant,
} from "./test";
describe("videoFit$ defaults", () => {
test.each([
{
videoSize: { width: 1920, height: 1080 },
tileSize: undefined,
},
{
videoSize: { width: 1080, height: 1920 },
tileSize: undefined,
},
{
videoSize: undefined,
tileSize: { width: 1920, height: 1080 },
},
{
videoSize: undefined,
tileSize: { width: 1080, height: 1920 },
},
])(
"videoFit$ returns `cover` when videoSize is $videoSize and tileSize is $tileSize",
({ videoSize, tileSize }) => {
const scope = new ObservableScope();
const videoSize$ = constant(videoSize);
const tileSize$ = constant(tileSize);
const fit = videoFit$(scope, videoSize$, tileSize$);
expect(fit.value).toBe("cover");
},
);
});
const VIDEO_480_L = { width: 640, height: 480 };
const VIDEO_720_L = { width: 1280, height: 720 };
const VIDEO_1080_L = { width: 1920, height: 1080 };
// Some sizes from real world testing, which don't match the standard video sizes exactly
const TILE_SIZE_1_L = { width: 180, height: 135 };
const TILE_SIZE_3_P = { width: 379, height: 542 };
const TILE_SIZE_4_L = { width: 957, height: 542 };
// This is the size of an iPhone Xr in portrait mode
const TILE_SIZE_5_P = { width: 414, height: 896 };
export function invertSize(size: { width: number; height: number }): {
width: number;
height: number;
} {
return {
width: size.height,
height: size.width,
};
}
test.each([
{
videoSize: VIDEO_480_L,
tileSize: TILE_SIZE_1_L,
expected: "cover",
},
{
videoSize: invertSize(VIDEO_480_L),
tileSize: TILE_SIZE_1_L,
expected: "contain",
},
{
videoSize: VIDEO_720_L,
tileSize: TILE_SIZE_4_L,
expected: "cover",
},
{
videoSize: invertSize(VIDEO_720_L),
tileSize: TILE_SIZE_4_L,
expected: "contain",
},
{
videoSize: invertSize(VIDEO_1080_L),
tileSize: TILE_SIZE_3_P,
expected: "cover",
},
{
videoSize: VIDEO_1080_L,
tileSize: TILE_SIZE_5_P,
expected: "contain",
},
{
videoSize: invertSize(VIDEO_1080_L),
tileSize: TILE_SIZE_5_P,
expected: "cover",
},
{
// square video
videoSize: { width: 400, height: 400 },
tileSize: VIDEO_480_L,
expected: "contain",
},
])(
"videoFit$ returns $expected when videoSize is $videoSize and tileSize is $tileSize",
({ videoSize, tileSize, expected }) => {
const scope = new ObservableScope();
const videoSize$ = constant(videoSize);
const tileSize$ = constant(tileSize);
const fit = videoFit$(scope, videoSize$, tileSize$);
expect(fit.value).toBe(expected);
},
);
describe("extracting video size from participant stats", () => {
function createMockRtpStats(
isInbound: boolean,
props: Partial<RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats> = {},
): RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats {
const baseStats = {
id: "mock-stats-id",
timestamp: Date.now(),
type: isInbound ? "inbound-rtp" : "outbound-rtp",
kind: "video",
...props,
};
return baseStats as RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats;
}
test("get stats for local user", async () => {
const localParticipant = mockLocalParticipant({
identity: "@local:example.org:AAAAAA",
});
const mockReport: RTCStatsReport = new Map([
[
"OT01V639885149",
createMockRtpStats(false, {
frameWidth: 1280,
frameHeight: 720,
}),
],
]);
const track = {
source: Track.Source.Camera,
getRTCStatsReport: vi
.fn()
.mockImplementation(async () => Promise.resolve(mockReport)),
} as Partial<LocalTrack> as LocalTrack;
// Set up the prototype chain (there is an instanceof check in getRTCStatsReport)
Object.setPrototypeOf(track, LocalTrack.prototype);
localParticipant.getTrackPublication = vi
.fn()
.mockImplementation((source: Track.Source) => {
if (source === Track.Source.Camera) {
return {
track,
} as unknown as LocalTrackPublication;
} else {
return undefined;
}
});
const videoDimensions$ = videoSizeFromParticipant$(
constant(localParticipant),
);
const publishedDimensions: { width: number; height: number }[] = [];
videoDimensions$.subscribe((dimensions) => {
if (dimensions) publishedDimensions.push(dimensions);
});
await flushPromises();
const dimension = publishedDimensions.pop();
expect(dimension).toEqual({ width: 1280, height: 720 });
});
test("get stats for remote user", async () => {
// vi.useFakeTimers()
const remoteParticipant = mockRemoteParticipant({
identity: "@bob:example.org:AAAAAA",
});
const mockReport: RTCStatsReport = new Map([
[
"OT01V639885149",
createMockRtpStats(true, {
frameWidth: 480,
frameHeight: 640,
}),
],
]);
const track = {
source: Track.Source.Camera,
getRTCStatsReport: vi
.fn()
.mockImplementation(async () => Promise.resolve(mockReport)),
} as Partial<LocalTrack> as LocalTrack;
// Set up the prototype chain (there is an instanceof check in getRTCStatsReport)
Object.setPrototypeOf(track, LocalTrack.prototype);
remoteParticipant.getTrackPublication = vi
.fn()
.mockImplementation((source: Track.Source) => {
if (source === Track.Source.Camera) {
return {
track,
} as unknown as RemoteTrackPublication;
} else {
return undefined;
}
});
const videoDimensions$ = videoSizeFromParticipant$(
constant(remoteParticipant),
);
const publishedDimensions: { width: number; height: number }[] = [];
videoDimensions$.subscribe((dimensions) => {
if (dimensions) publishedDimensions.push(dimensions);
});
await flushPromises();
const dimension = publishedDimensions.pop();
expect(dimension).toEqual({ width: 480, height: 640 });
});
});

94
src/utils/videoFit.ts Normal file
View File

@@ -0,0 +1,94 @@
/*
Copyright 2026 Element Creations Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import { combineLatest, map, type Observable, of, switchMap } from "rxjs";
import {
type LocalParticipant,
type RemoteParticipant,
Track,
} from "livekit-client";
import { type ObservableScope } from "../state/ObservableScope.ts";
import { type Behavior } from "../state/Behavior.ts";
import {
observeInboundRtpStreamStats$,
observeOutboundRtpStreamStats$,
} from "../state/MediaViewModel.ts";
type Size = {
width: number;
height: number;
};
export function videoFit$(
scope: ObservableScope,
videoSize$: Observable<Size | undefined>,
tileSize$: Observable<Size | undefined>,
): Behavior<"cover" | "contain"> {
const fit$ = combineLatest([videoSize$, tileSize$]).pipe(
map(([videoSize, tileSize]) => {
if (!videoSize || !tileSize) {
// If we don't have the sizes, default to cover to avoid black bars.
// This is a reasonable default as it will ensure the video fills the tile, even if it means cropping.
return "cover";
}
const videoAspectRatio = videoSize.width / videoSize.height;
const tileAspectRatio = tileSize.width / tileSize.height;
// If video is landscape (ratio > 1) and tile is portrait (ratio < 1) or vice versa,
// we want to use "contain" (fit) mode to avoid excessive cropping
const videoIsLandscape = videoAspectRatio > 1;
const tileIsLandscape = tileAspectRatio > 1;
// If the orientations are the same, use the cover mode (Preserves the aspect ratio, and the image fills the container.)
// If they're not the same orientation, use the contain mode (Preserves the aspect ratio, but the image is letterboxed - black bars- to fit within the container.)
return videoIsLandscape === tileIsLandscape ? "cover" : "contain";
}),
);
return scope.behavior(fit$, "cover");
}
/**
* Helper function to get the video size from a participant.
* It observes the participant's video track stats and extracts the frame width and height.
* @param participant$ - an Observable of a LocalParticipant or RemoteParticipant, or null if no participant is selected.
* @returns an Observable of the video size (width and height) or undefined if the size cannot be determined.
*/
export function videoSizeFromParticipant$(
participant$: Observable<LocalParticipant | RemoteParticipant | null>,
): Observable<{ width: number; height: number } | undefined> {
return participant$
.pipe(
// If we have a participant, observe their video track stats. If not, return undefined.
switchMap((p) => {
if (!p) return of(undefined);
if (p.isLocal) {
return observeOutboundRtpStreamStats$(p, Track.Source.Camera);
} else {
return observeInboundRtpStreamStats$(p, Track.Source.Camera);
}
}),
)
.pipe(
// Extract the frame width and height from the stats. If we don't have valid stats, return undefined.
map((stats) => {
if (!stats) return undefined;
if (
// For video tracks, frameWidth and frameHeight should be numbers. If they're not, we can't determine the size.
typeof stats.frameWidth !== "number" ||
typeof stats.frameHeight !== "number"
) {
return undefined;
}
return {
width: stats.frameWidth,
height: stats.frameHeight,
};
}),
);
}