feat: video auto fit based on video stream size

2026-04-25 09:35:33 +00:00 · 2026-02-25 15:47:25 +01:00
parent 4a0e89730d
commit 1de8d93b4b
6 changed files with 441 additions and 45 deletions
--- a/src/state/MediaViewModel.test.ts
+++ b/src/state/MediaViewModel.test.ts
@@ -92,21 +92,6 @@ test("control a participant's volume", () => {
  });
 });

-test("toggle fit/contain for a participant's video", () => {
-  const vm = createRemoteMedia(rtcMembership, {}, mockRemoteParticipant({}));
-  withTestScheduler(({ expectObservable, schedule }) => {
-    schedule("-ab|", {
-      a: () => vm.toggleFitContain(),
-      b: () => vm.toggleFitContain(),
-    });
-    expectObservable(vm.cropVideo$).toBe("abc", {
-      a: true,
-      b: false,
-      c: true,
-    });
-  });
-});
-
 test("local media remembers whether it should always be shown", () => {
  const vm1 = createLocalMedia(
    rtcMembership,
--- a/src/state/MediaViewModel.ts
+++ b/src/state/MediaViewModel.ts
@@ -43,6 +43,8 @@ import {
  switchMap,
  throttleTime,
  distinctUntilChanged,
+  concat,
+  take,
 } from "rxjs";

 import { alwaysShowSelf } from "../settings/settings";
@@ -55,6 +57,7 @@ import { platform } from "../Platform";
 import { type MediaDevices } from "./MediaDevices";
 import { type Behavior } from "./Behavior";
 import { type ObservableScope } from "./ObservableScope";
+import { videoFit$, videoSizeFromParticipant$ } from "../utils/videoFit.ts";

 export function observeTrackReference$(
  participant: Participant,
@@ -67,6 +70,10 @@ export function observeTrackReference$(
  );
 }

+/**
+ * Helper function to observe the RTC stats for a given participant and track source.
+ * It polls the stats every second and emits the latest stats object.
+ */
 export function observeRtpStreamStats$(
  participant: Participant,
  source: Track.Source,
@@ -76,7 +83,9 @@ export function observeRtpStreamStats$(
 > {
  return combineLatest([
    observeTrackReference$(participant, source),
-    interval(1000).pipe(startWith(0)),
+    // This is used also for detecting video orientation,
+    // and we want that to be more responsive than the connection stats, so we poll more frequently at the start.
+    concat(interval(300).pipe(take(3)), interval(1000)).pipe(startWith(0)),
  ]).pipe(
    switchMap(async ([trackReference]) => {
      const track = trackReference?.publication?.track;
@@ -90,7 +99,6 @@ export function observeRtpStreamStats$(
      if (!report) {
        return undefined;
      }
-
      for (const v of report.values()) {
        if (v.type === type) {
          return v;
@@ -103,6 +111,13 @@ export function observeRtpStreamStats$(
  );
 }

+/**
+ * Helper function to observe the inbound RTP stats for a given participant and track source.
+ * To be used for remote participants' audio and video tracks.
+ * It polls the stats every second and emits the latest stats object.
+ * @param participant - The LiveKit participant whose track stats we want to observe.
+ * @param source - The source of the track (e.g. Track.Source.Camera or Track.Source.Microphone).
+ */
 export function observeInboundRtpStreamStats$(
  participant: Participant,
  source: Track.Source,
@@ -112,6 +127,13 @@ export function observeInboundRtpStreamStats$(
  );
 }

+/**
+ * Helper function to observe the outbound RTP stats for a given participant and track source.
+ * To be used for the local participant's audio and video tracks.
+ * It polls the stats every second and emits the latest stats object.
+ * @param participant - The LiveKit participant whose track stats we want to observe.
+ * @param source - The source of the track (e.g. Track.Source.Camera or Track.Source.Microphone).
+ */
 export function observeOutboundRtpStreamStats$(
  participant: Participant,
  source: Track.Source,
@@ -263,7 +285,6 @@ abstract class BaseMediaViewModel {
    protected readonly participant$: Observable<
      LocalParticipant | RemoteParticipant | null
    >,
-
    encryptionSystem: EncryptionSystem,
    audioSource: AudioSource,
    videoSource: VideoSource,
@@ -397,13 +418,12 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel {
    return this._videoEnabled$;
  }

-  private readonly _cropVideo$ = new BehaviorSubject(true);
  /**
-   * Whether the tile video should be contained inside the tile or be cropped to fit.
+   * Whether the tile video should be contained inside the tile (video-fit contain) or be cropped to fit (video-fit cover).
   */
-  public readonly cropVideo$: Behavior<boolean> = this._cropVideo$;
+  public readonly videoFit$: Behavior<"cover" | "contain">;

-  public constructor(
+  protected constructor(
    scope: ObservableScope,
    id: string,
    userId: string,
@@ -443,10 +463,12 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel {
    this._videoEnabled$ = this.scope.behavior(
      media$.pipe(map((m) => m?.cameraTrack?.isMuted === false)),
    );
-  }

-  public toggleFitContain(): void {
-    this._cropVideo$.next(!this._cropVideo$.value);
+    this.videoFit$ = videoFit$(
+      this.scope,
+      videoSizeFromParticipant$(participant$),
+      this.actualSize$,
+    );
  }

  public get local(): boolean {
@@ -456,9 +478,28 @@ abstract class BaseUserMediaViewModel extends BaseMediaViewModel {
  public abstract get audioStreamStats$(): Observable<
    RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats | undefined
  >;
+
  public abstract get videoStreamStats$(): Observable<
    RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats | undefined
  >;
+
+  private readonly _actualSize$ = new BehaviorSubject<
+    { width: number; height: number } | undefined
+  >(undefined);
+  public readonly actualSize$ = this._actualSize$.asObservable();
+
+  /**
+   * Set the actual dimensions of the html element.
+   * This can be used to determine the best video fit (fit to frame / keep ratio).
+   * @param width - The actual width of the html element displaying the video.
+   * @param height - The actual height of the html element displaying the video.
+   */
+  public setActualDimensions(width: number, height: number): void {
+    this._actualSize$.next({
+      width,
+      height,
+    });
+  }
 }

 /**
@@ -616,6 +657,7 @@ export class RemoteUserMediaViewModel extends BaseUserMediaViewModel {

  // This private field is used to override the value from the superclass
  private __speaking$: Behavior<boolean>;
+
  public get speaking$(): Behavior<boolean> {
    return this.__speaking$;
  }
@@ -661,6 +703,7 @@ export class RemoteUserMediaViewModel extends BaseUserMediaViewModel {

  // This private field is used to override the value from the superclass
  private __videoEnabled$: Behavior<boolean>;
+
  public get videoEnabled$(): Behavior<boolean> {
    return this.__videoEnabled$;
  }
--- a/src/tile/GridTile.tsx
+++ b/src/tile/GridTile.tsx
@@ -11,6 +11,7 @@ import {
  type ReactNode,
  type Ref,
  useCallback,
+  useEffect,
  useRef,
  useState,
 } from "react";
@@ -26,7 +27,6 @@ import {
  VolumeOffIcon,
  VisibilityOnIcon,
  UserProfileIcon,
-  ExpandIcon,
  VolumeOffSolidIcon,
  SwitchCameraSolidIcon,
 } from "@vector-im/compound-design-tokens/assets/web/icons";
@@ -37,6 +37,7 @@ import {
  Menu,
 } from "@vector-im/compound-web";
 import { useObservableEagerState } from "observable-hooks";
+import useMeasure from "react-use-measure";

 import styles from "./GridTile.module.css";
 import {
@@ -105,18 +106,26 @@ const UserMediaTile: FC<UserMediaTileProps> = ({
  const audioEnabled = useBehavior(vm.audioEnabled$);
  const videoEnabled = useBehavior(vm.videoEnabled$);
  const speaking = useBehavior(vm.speaking$);
-  const cropVideo = useBehavior(vm.cropVideo$);
-  const onSelectFitContain = useCallback(
-    (e: Event) => {
-      e.preventDefault();
-      vm.toggleFitContain();
-    },
-    [vm],
-  );
+  const videoFit = useBehavior(vm.videoFit$);
+
  const rtcBackendIdentity = vm.rtcBackendIdentity;
  const handRaised = useBehavior(vm.handRaised$);
  const reaction = useBehavior(vm.reaction$);

+  // We need to keep track of the tile size.
+  // We use this to get the tile ratio, and compare it to the video ratio to decide
+  // whether to fit the video to frame or keep the ratio.
+  const [measureRef, bounds] = useMeasure();
+  // There is already a ref being passed in, so we need to merge it with the measureRef.
+  const tileRef = useMergedRefs(ref, measureRef);
+
+  // Whenever bounds change, inform the viewModel
+  useEffect(() => {
+    if (bounds.width > 0 && bounds.height > 0) {
+      vm.setActualDimensions(bounds.width, bounds.height);
+    }
+  }, [bounds.width, bounds.height, vm]);
+
  const AudioIcon = locallyMuted
    ? VolumeOffSolidIcon
    : audioEnabled
@@ -132,12 +141,10 @@ const UserMediaTile: FC<UserMediaTileProps> = ({
  const menu = (
    <>
      {menuStart}
-      <ToggleMenuItem
-        Icon={ExpandIcon}
-        label={t("video_tile.change_fit_contain")}
-        checked={cropVideo}
-        onSelect={onSelectFitContain}
-      />
+      {/*
+       No additional menu item (used to be the manual fit to frame.
+       Placeholder for future menu items that should be placed here.
+       */}
      {menuEnd}
    </>
  );
@@ -150,13 +157,13 @@ const UserMediaTile: FC<UserMediaTileProps> = ({

  const tile = (
    <MediaView
-      ref={ref}
+      ref={tileRef}
      video={video}
      userId={vm.userId}
      unencryptedWarning={unencryptedWarning}
      encryptionStatus={encryptionStatus}
      videoEnabled={videoEnabled}
-      videoFit={cropVideo ? "cover" : "contain"}
+      videoFit={videoFit}
      className={classNames(className, styles.tile, {
        [styles.speaking]: showSpeaking,
        [styles.handRaised]: !showSpeaking && handRaised,
--- a/src/tile/SpotlightTile.tsx
+++ b/src/tile/SpotlightTile.tsx
@@ -27,6 +27,7 @@ import { useObservableRef } from "observable-hooks";
 import { useTranslation } from "react-i18next";
 import classNames from "classnames";
 import { type TrackReferenceOrPlaceholder } from "@livekit/components-core";
+import useMeasure from "react-use-measure";

 import FullScreenMaximiseIcon from "../icons/FullScreenMaximise.svg?react";
 import FullScreenMinimiseIcon from "../icons/FullScreenMinimise.svg?react";
@@ -105,11 +106,11 @@ const SpotlightUserMediaItem: FC<SpotlightUserMediaItemProps> = ({
  vm,
  ...props
 }) => {
-  const cropVideo = useBehavior(vm.cropVideo$);
+  const videoFit = useBehavior(vm.videoFit$);

  const baseProps: SpotlightUserMediaItemBaseProps &
    RefAttributes<HTMLDivElement> = {
-    videoFit: cropVideo ? "cover" : "contain",
+    videoFit,
    ...props,
  };

@@ -147,7 +148,22 @@ const SpotlightItem: FC<SpotlightItemProps> = ({
  "aria-hidden": ariaHidden,
 }) => {
  const ourRef = useRef<HTMLDivElement | null>(null);
-  const ref = useMergedRefs(ourRef, theirRef);
+
+  // We need to keep track of the tile size.
+  // We use this to get the tile ratio, and compare it to the video ratio to decide
+  // whether to fit the video to frame or keep the ratio.
+  const [measureRef, bounds] = useMeasure();
+
+  // Whenever bounds change, inform the viewModel
+  useEffect(() => {
+    if (bounds.width > 0 && bounds.height > 0) {
+      if (!(vm instanceof ScreenShareViewModel)) {
+        vm.setActualDimensions(bounds.width, bounds.height);
+      }
+    }
+  }, [bounds.width, bounds.height, vm]);
+
+  const ref = useMergedRefs(ourRef, theirRef, measureRef);
  const focusUrl = useBehavior(vm.focusUrl$);
  const displayName = useBehavior(vm.displayName$);
  const mxcAvatarUrl = useBehavior(vm.mxcAvatarUrl$);
--- a/src/utils/videoFit.test.ts
+++ b/src/utils/videoFit.test.ts
@@ -0,0 +1,251 @@
+/*
+Copyright 2026 Element Creations Ltd.
+
+SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
+Please see LICENSE in the repository root for full details.
+*/
+
+import { describe, expect, test, vi } from "vitest";
+import {
+  LocalTrack,
+  type LocalTrackPublication,
+  type RemoteTrackPublication,
+  Track,
+} from "livekit-client";
+
+import { ObservableScope } from "../state/ObservableScope";
+import { videoFit$, videoSizeFromParticipant$ } from "./videoFit";
+import { constant } from "../state/Behavior";
+import {
+  flushPromises,
+  mockLocalParticipant,
+  mockRemoteParticipant,
+} from "./test";
+
+describe("videoFit$ defaults", () => {
+  test.each([
+    {
+      videoSize: { width: 1920, height: 1080 },
+      tileSize: undefined,
+    },
+    {
+      videoSize: { width: 1080, height: 1920 },
+      tileSize: undefined,
+    },
+    {
+      videoSize: undefined,
+      tileSize: { width: 1920, height: 1080 },
+    },
+    {
+      videoSize: undefined,
+      tileSize: { width: 1080, height: 1920 },
+    },
+  ])(
+    "videoFit$ returns `cover` when videoSize is $videoSize and tileSize is $tileSize",
+    ({ videoSize, tileSize }) => {
+      const scope = new ObservableScope();
+      const videoSize$ = constant(videoSize);
+      const tileSize$ = constant(tileSize);
+
+      const fit = videoFit$(scope, videoSize$, tileSize$);
+      expect(fit.value).toBe("cover");
+    },
+  );
+});
+
+const VIDEO_480_L = { width: 640, height: 480 };
+const VIDEO_720_L = { width: 1280, height: 720 };
+const VIDEO_1080_L = { width: 1920, height: 1080 };
+
+// Some sizes from real world testing, which don't match the standard video sizes exactly
+const TILE_SIZE_1_L = { width: 180, height: 135 };
+const TILE_SIZE_3_P = { width: 379, height: 542 };
+const TILE_SIZE_4_L = { width: 957, height: 542 };
+// This is the size of an iPhone Xr in portrait mode
+const TILE_SIZE_5_P = { width: 414, height: 896 };
+
+export function invertSize(size: { width: number; height: number }): {
+  width: number;
+  height: number;
+} {
+  return {
+    width: size.height,
+    height: size.width,
+  };
+}
+
+test.each([
+  {
+    videoSize: VIDEO_480_L,
+    tileSize: TILE_SIZE_1_L,
+    expected: "cover",
+  },
+  {
+    videoSize: invertSize(VIDEO_480_L),
+    tileSize: TILE_SIZE_1_L,
+    expected: "contain",
+  },
+  {
+    videoSize: VIDEO_720_L,
+    tileSize: TILE_SIZE_4_L,
+    expected: "cover",
+  },
+  {
+    videoSize: invertSize(VIDEO_720_L),
+    tileSize: TILE_SIZE_4_L,
+    expected: "contain",
+  },
+  {
+    videoSize: invertSize(VIDEO_1080_L),
+    tileSize: TILE_SIZE_3_P,
+    expected: "cover",
+  },
+  {
+    videoSize: VIDEO_1080_L,
+    tileSize: TILE_SIZE_5_P,
+    expected: "contain",
+  },
+  {
+    videoSize: invertSize(VIDEO_1080_L),
+    tileSize: TILE_SIZE_5_P,
+    expected: "cover",
+  },
+  {
+    // square video
+    videoSize: { width: 400, height: 400 },
+    tileSize: VIDEO_480_L,
+    expected: "contain",
+  },
+])(
+  "videoFit$ returns $expected when videoSize is $videoSize and tileSize is $tileSize",
+  ({ videoSize, tileSize, expected }) => {
+    const scope = new ObservableScope();
+    const videoSize$ = constant(videoSize);
+    const tileSize$ = constant(tileSize);
+
+    const fit = videoFit$(scope, videoSize$, tileSize$);
+    expect(fit.value).toBe(expected);
+  },
+);
+
+describe("extracting video size from participant stats", () => {
+  function createMockRtpStats(
+    isInbound: boolean,
+    props: Partial<RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats> = {},
+  ): RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats {
+    const baseStats = {
+      id: "mock-stats-id",
+      timestamp: Date.now(),
+      type: isInbound ? "inbound-rtp" : "outbound-rtp",
+      kind: "video",
+      ...props,
+    };
+
+    return baseStats as RTCInboundRtpStreamStats | RTCOutboundRtpStreamStats;
+  }
+
+  test("get stats for local user", async () => {
+    const localParticipant = mockLocalParticipant({
+      identity: "@local:example.org:AAAAAA",
+    });
+
+    const mockReport: RTCStatsReport = new Map([
+      [
+        "OT01V639885149",
+        createMockRtpStats(false, {
+          frameWidth: 1280,
+          frameHeight: 720,
+        }),
+      ],
+    ]);
+
+    const track = {
+      source: Track.Source.Camera,
+      getRTCStatsReport: vi
+        .fn()
+        .mockImplementation(async () => Promise.resolve(mockReport)),
+    } as Partial<LocalTrack> as LocalTrack;
+
+    // Set up the prototype chain (there is an instanceof check in getRTCStatsReport)
+    Object.setPrototypeOf(track, LocalTrack.prototype);
+
+    localParticipant.getTrackPublication = vi
+      .fn()
+      .mockImplementation((source: Track.Source) => {
+        if (source === Track.Source.Camera) {
+          return {
+            track,
+          } as unknown as LocalTrackPublication;
+        } else {
+          return undefined;
+        }
+      });
+
+    const videoDimensions$ = videoSizeFromParticipant$(
+      constant(localParticipant),
+    );
+
+    const publishedDimensions: { width: number; height: number }[] = [];
+    videoDimensions$.subscribe((dimensions) => {
+      if (dimensions) publishedDimensions.push(dimensions);
+    });
+
+    await flushPromises();
+
+    const dimension = publishedDimensions.pop();
+    expect(dimension).toEqual({ width: 1280, height: 720 });
+  });
+
+  test("get stats for remote user", async () => {
+    // vi.useFakeTimers()
+    const remoteParticipant = mockRemoteParticipant({
+      identity: "@bob:example.org:AAAAAA",
+    });
+
+    const mockReport: RTCStatsReport = new Map([
+      [
+        "OT01V639885149",
+        createMockRtpStats(true, {
+          frameWidth: 480,
+          frameHeight: 640,
+        }),
+      ],
+    ]);
+
+    const track = {
+      source: Track.Source.Camera,
+      getRTCStatsReport: vi
+        .fn()
+        .mockImplementation(async () => Promise.resolve(mockReport)),
+    } as Partial<LocalTrack> as LocalTrack;
+
+    // Set up the prototype chain (there is an instanceof check in getRTCStatsReport)
+    Object.setPrototypeOf(track, LocalTrack.prototype);
+
+    remoteParticipant.getTrackPublication = vi
+      .fn()
+      .mockImplementation((source: Track.Source) => {
+        if (source === Track.Source.Camera) {
+          return {
+            track,
+          } as unknown as RemoteTrackPublication;
+        } else {
+          return undefined;
+        }
+      });
+
+    const videoDimensions$ = videoSizeFromParticipant$(
+      constant(remoteParticipant),
+    );
+
+    const publishedDimensions: { width: number; height: number }[] = [];
+    videoDimensions$.subscribe((dimensions) => {
+      if (dimensions) publishedDimensions.push(dimensions);
+    });
+
+    await flushPromises();
+
+    const dimension = publishedDimensions.pop();
+    expect(dimension).toEqual({ width: 480, height: 640 });
+  });
+});
--- a/src/utils/videoFit.ts
+++ b/src/utils/videoFit.ts
@@ -0,0 +1,94 @@
+/*
+Copyright 2026 Element Creations Ltd.
+
+SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
+Please see LICENSE in the repository root for full details.
+*/
+
+import { combineLatest, map, type Observable, of, switchMap } from "rxjs";
+import {
+  type LocalParticipant,
+  type RemoteParticipant,
+  Track,
+} from "livekit-client";
+
+import { type ObservableScope } from "../state/ObservableScope.ts";
+import { type Behavior } from "../state/Behavior.ts";
+import {
+  observeInboundRtpStreamStats$,
+  observeOutboundRtpStreamStats$,
+} from "../state/MediaViewModel.ts";
+
+type Size = {
+  width: number;
+  height: number;
+};
+
+export function videoFit$(
+  scope: ObservableScope,
+  videoSize$: Observable<Size | undefined>,
+  tileSize$: Observable<Size | undefined>,
+): Behavior<"cover" | "contain"> {
+  const fit$ = combineLatest([videoSize$, tileSize$]).pipe(
+    map(([videoSize, tileSize]) => {
+      if (!videoSize || !tileSize) {
+        // If we don't have the sizes, default to cover to avoid black bars.
+        // This is a reasonable default as it will ensure the video fills the tile, even if it means cropping.
+        return "cover";
+      }
+      const videoAspectRatio = videoSize.width / videoSize.height;
+      const tileAspectRatio = tileSize.width / tileSize.height;
+
+      // If video is landscape (ratio > 1) and tile is portrait (ratio < 1) or vice versa,
+      // we want to use "contain" (fit) mode to avoid excessive cropping
+      const videoIsLandscape = videoAspectRatio > 1;
+      const tileIsLandscape = tileAspectRatio > 1;
+
+      // If the orientations are the same, use the cover mode (Preserves the aspect ratio, and the image fills the container.)
+      // If they're not the same orientation, use the contain mode (Preserves the aspect ratio, but the image is letterboxed - black bars- to fit within the container.)
+      return videoIsLandscape === tileIsLandscape ? "cover" : "contain";
+    }),
+  );
+
+  return scope.behavior(fit$, "cover");
+}
+
+/**
+ * Helper function to get the video size from a participant.
+ * It observes the participant's video track stats and extracts the frame width and height.
+ * @param participant$ - an Observable of a LocalParticipant or RemoteParticipant, or null if no participant is selected.
+ * @returns an Observable of the video size (width and height) or undefined if the size cannot be determined.
+ */
+export function videoSizeFromParticipant$(
+  participant$: Observable<LocalParticipant | RemoteParticipant | null>,
+): Observable<{ width: number; height: number } | undefined> {
+  return participant$
+    .pipe(
+      // If we have a participant, observe their video track stats. If not, return undefined.
+      switchMap((p) => {
+        if (!p) return of(undefined);
+        if (p.isLocal) {
+          return observeOutboundRtpStreamStats$(p, Track.Source.Camera);
+        } else {
+          return observeInboundRtpStreamStats$(p, Track.Source.Camera);
+        }
+      }),
+    )
+    .pipe(
+      // Extract the frame width and height from the stats. If we don't have valid stats, return undefined.
+      map((stats) => {
+        if (!stats) return undefined;
+        if (
+          // For video tracks, frameWidth and frameHeight should be numbers. If they're not, we can't determine the size.
+          typeof stats.frameWidth !== "number" ||
+          typeof stats.frameHeight !== "number"
+        ) {
+          return undefined;
+        }
+        return {
+          width: stats.frameWidth,
+          height: stats.frameHeight,
+        };
+      }),
+    );
+}