From Javier Taibo, "I have found that since version 1.1, FFMPEG changed the way audio streams are retrieved, from packed to planar format. SDL interprets packed audio, as is used in the osgmovie example. To make the audio work when the OSGffmpeg plug-in is compiled against recent FFMPEG versions, FFmpegDecoderAudio must check for planar formats and in these cases request the samples as packed. This way all works as before. It can be checked with osgmovie example application.

$ osgmovie --audio movie.avi.ffmpeg FFmpegImageStream::open audio failed, audio stream will be disabled: unknown audio format With the attached FFmpegDecoderAudio.cpp, audio sounds correctly. I am also attaching a modified version of FindFFmpeg.cmake that allows to set as FFMPEG_DIR the ffmpeg compiled in the source directory structure. It should not break anything as it only adds some additional search paths. " git-svn-id: http://svn.openscenegraph.org/osg/OpenSceneGraph/trunk@14654 16af8721-9629-0410-8352-f15c8da7e697
2015-01-07 14:14:55 +00:00
parent bf794165d5
commit d409ffcb91
9 changed files with 270 additions and 121 deletions
--- a/src/osgPlugins/ffmpeg/FFmpegDecoderAudio.cpp
+++ b/src/osgPlugins/ffmpeg/FFmpegDecoderAudio.cpp
@@ -13,6 +13,10 @@
 #define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000
 #endif

+#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55,28,1)
+#define av_frame_alloc  avcodec_alloc_frame
+#define av_frame_free  avcodec_free_frame
+#endif

 #if LIBAVCODEC_VERSION_MAJOR < 56
   #define AV_CODEC_ID_NONE CODEC_ID_NONE
@@ -22,65 +26,85 @@ namespace osgFFmpeg {

 static int decode_audio(AVCodecContext *avctx, int16_t *samples,
                         int *frame_size_ptr,
-                         const uint8_t *buf, int buf_size)
+                         const uint8_t *buf, int buf_size,
+                         SwrContext *swr_context,
+                         int out_sample_rate,
+                         int out_nb_channels,
+                         AVSampleFormat out_sample_format)
 {
-#if LIBAVCODEC_VERSION_MAJOR >= 56
-
-    AVFrame *frame = av_frame_alloc();
-
-    if (!frame) return AVERROR(ENOMEM);
+#if LIBAVCODEC_VERSION_MAJOR >= 53 || (LIBAVCODEC_VERSION_MAJOR==52 && LIBAVCODEC_VERSION_MINOR>=32)

    AVPacket avpkt;
    av_init_packet(&avpkt);
    avpkt.data = const_cast<uint8_t *>(buf);
    avpkt.size = buf_size;

-    int got_frame = 0;
-    int result = avcodec_decode_audio4(avctx, frame, &got_frame, &avpkt);
+    AVFrame *frame = av_frame_alloc();
+    int ret, got_frame = 0;

-    if (result>=0 && got_frame)
-    {
+    if (!frame)
+        return AVERROR(ENOMEM);
+
+    ret = avcodec_decode_audio4(avctx, frame, &got_frame, &avpkt);
+
+    if (ret >= 0 && got_frame) {
        int ch, plane_size;
        int planar = av_sample_fmt_is_planar(avctx->sample_fmt);
-        int data_size = av_samples_get_buffer_size(&plane_size, avctx->channels, frame->nb_samples, avctx->sample_fmt, 1);
-        if (*frame_size_ptr < data_size)
-        {
+
+        int out_samples;
+        // if sample rate changes, number of samples is different
+        if ( out_sample_rate !=  avctx->sample_rate ) {
+//            out_samples = av_rescale_rnd(swr_get_delay(swr_context, avctx->sample_rate) +
+//                                 frame->nb_samples, out_sample_rate, avctx->sample_rate, AV_ROUND_UP);
+            out_samples = av_rescale_rnd(frame->nb_samples, out_sample_rate, avctx->sample_rate, AV_ROUND_UP);
+        }
+        else {
+            out_samples = frame->nb_samples;
+        }
+
+        int output_data_size = av_samples_get_buffer_size(&plane_size, out_nb_channels,
+                                                    out_samples,
+                                                    out_sample_format, 1);
+
+        if (*frame_size_ptr < output_data_size) {
            av_log(avctx, AV_LOG_ERROR, "output buffer size is too small for "
-            "the current frame (%d < %d)\n", *frame_size_ptr, data_size);
+                                        "the current frame (%d < %d)\n", *frame_size_ptr, output_data_size);
            av_frame_free(&frame);
            return AVERROR(EINVAL);
        }
-        memcpy(samples, frame->extended_data[0], plane_size);
-        if (planar && avctx->channels > 1)
-        {
-            uint8_t *out = ((uint8_t *)samples) + plane_size;
-            for (ch = 1; ch < avctx->channels; ch++)
-            {
-                memcpy(out, frame->extended_data[ch], plane_size);
-                out += plane_size;
+
+        // if resampling is needed, call swr_convert
+        if ( swr_context != NULL ) {
+
+            out_samples = swr_convert(swr_context, (uint8_t **)&samples, out_samples,
+                        (const uint8_t **)frame->extended_data, frame->nb_samples);
+
+            // recompute output_data_size following swr_convert result (number of samples actually converted)
+            output_data_size = av_samples_get_buffer_size(&plane_size, out_nb_channels,
+                                                    out_samples,
+                                                    out_sample_format, 1);
+        }
+        else {
+
+            memcpy(samples, frame->extended_data[0], plane_size);
+
+            if (planar && avctx->channels > 1) {
+                uint8_t *out = ((uint8_t *)samples) + plane_size;
+                for (ch = 1; ch < avctx->channels; ch++) {
+                    memcpy(out, frame->extended_data[ch], plane_size);
+                    out += plane_size;
+                }
            }
        }
-        *frame_size_ptr = data_size;
-    }
-    else
-    {
+
+        *frame_size_ptr = output_data_size;
+
+    } else {
        *frame_size_ptr = 0;
    }
-
    av_frame_free(&frame);
+    return ret;

-    return result;
-
-#elif LIBAVCODEC_VERSION_MAJOR >= 53 || (LIBAVCODEC_VERSION_MAJOR==52 && LIBAVCODEC_VERSION_MINOR>=32)
-
-    // following code segment copied from ffmpeg's avcodec_decode_audio2()
-    // implementation to avoid warnings about deprecated function usage.
-    AVPacket avpkt;
-    av_init_packet(&avpkt);
-    avpkt.data = const_cast<uint8_t *>(buf);
-    avpkt.size = buf_size;
-
-    return avcodec_decode_audio3(avctx, samples, frame_size_ptr, &avpkt);
 #else
    // fallback for older versions of ffmpeg that don't have avcodec_decode_audio3.
    return avcodec_decode_audio2(avctx, samples, frame_size_ptr, buf, buf_size);
@@ -100,9 +124,9 @@ FFmpegDecoderAudio::FFmpegDecoderAudio(PacketQueue & packets, FFmpegClocks & clo
    m_audio_buf_index(0),
    m_end_of_stream(false),
    m_paused(true),
-    m_exit(false)
+    m_exit(false),
+    m_swr_context(NULL)
 {
-
 }


@@ -114,7 +138,7 @@ FFmpegDecoderAudio::~FFmpegDecoderAudio()



-void FFmpegDecoderAudio::open(AVStream * const stream)
+void FFmpegDecoderAudio::open(AVStream * const stream, FFmpegParameters* parameters)
 {
    try
    {
@@ -125,51 +149,60 @@ void FFmpegDecoderAudio::open(AVStream * const stream)
        m_stream = stream;
        m_context = stream->codec;

-        m_frequency = m_context->sample_rate;
-        m_nb_channels = m_context->channels;
+        m_in_sample_rate = m_context->sample_rate;
+        m_in_nb_channels = m_context->channels;
+        m_in_sample_format = m_context->sample_fmt;

-        OSG_INFO<<"FFmpegDecoderAudio::open(..), m_nb_channels="<<m_nb_channels<<", m_context->sample_fmt="<<m_context->sample_fmt<<std::endl;
+        AVDictionaryEntry *opt_out_sample_rate = av_dict_get( *parameters->getOptions(), "out_sample_rate", NULL, 0 );
+        if ( opt_out_sample_rate )
+            m_out_sample_rate = atoi(opt_out_sample_rate->value);
+        else
+            m_out_sample_rate = m_in_sample_rate;

-        switch (m_context->sample_fmt)
+        AVDictionaryEntry *opt_out_sample_format = av_dict_get( *parameters->getOptions(), "out_sample_format", NULL, 0 );
+        if ( opt_out_sample_format )
+            m_out_sample_format = (AVSampleFormat) atoi(opt_out_sample_format->value);
+        else
+            // always packed, planar formats are evil!
+            m_out_sample_format = av_get_packed_sample_fmt( m_in_sample_format );
+
+        AVDictionaryEntry *opt_out_nb_channels = av_dict_get( *parameters->getOptions(), "out_nb_channels", NULL, 0 );
+        if ( opt_out_nb_channels )
+            m_out_nb_channels = atoi(opt_out_nb_channels->value);
+        else
+            m_out_nb_channels = m_in_nb_channels;
+
+        if ( m_in_sample_rate != m_out_sample_rate
+            || m_in_nb_channels != m_out_nb_channels
+            || m_in_sample_format != m_out_sample_format )
        {
-        case AV_SAMPLE_FMT_NONE:
-            throw std::runtime_error("invalid audio format AV_SAMPLE_FMT_NONE");
-        case AV_SAMPLE_FMT_U8:
-            m_sample_format = osg::AudioStream::SAMPLE_FORMAT_U8;
-            break;
-        case AV_SAMPLE_FMT_S16:
-            m_sample_format = osg::AudioStream::SAMPLE_FORMAT_S16;
-            break;
-        case AV_SAMPLE_FMT_S32:
-            m_sample_format = osg::AudioStream::SAMPLE_FORMAT_S32;
-            break;
-        case AV_SAMPLE_FMT_FLT:
-            m_sample_format = osg::AudioStream::SAMPLE_FORMAT_F32;
-            break;
-        case AV_SAMPLE_FMT_DBL:
-            throw std::runtime_error("unhandled audio format AV_SAMPLE_FMT_DBL");
+#if 0
+printf("### CONVERTING from sample format %s TO %s\n\t\tFROM %d TO %d channels\n\t\tFROM %d Hz to %d Hz\n",
+            av_get_sample_fmt_name(m_in_sample_format),
+            av_get_sample_fmt_name(m_out_sample_format),
+            m_in_nb_channels,
+            m_out_nb_channels,
+            m_in_sample_rate,
+            m_out_sample_rate);
+#endif
+            m_swr_context = swr_alloc_set_opts(NULL,
+                    av_get_default_channel_layout(m_out_nb_channels),
+                    m_out_sample_format,
+                    m_out_sample_rate,
+                    av_get_default_channel_layout(m_in_nb_channels),
+                    m_in_sample_format,
+                    m_in_sample_rate,
+                    0, NULL );

-        case AV_SAMPLE_FMT_U8P:
-            m_sample_format = osg::AudioStream::SAMPLE_FORMAT_U8;
-            m_context->request_sample_fmt = av_get_packed_sample_fmt( m_context->sample_fmt );
-            break;
-        case AV_SAMPLE_FMT_S16P:
-            m_sample_format = osg::AudioStream::SAMPLE_FORMAT_S16;
-            m_context->request_sample_fmt = av_get_packed_sample_fmt( m_context->sample_fmt );
-            break;
-        case AV_SAMPLE_FMT_S32P:
-            m_sample_format = osg::AudioStream::SAMPLE_FORMAT_S32;
-            m_context->request_sample_fmt = av_get_packed_sample_fmt( m_context->sample_fmt );
-            break;
-        case AV_SAMPLE_FMT_FLTP:
-            m_sample_format = osg::AudioStream::SAMPLE_FORMAT_F32;
-            m_context->request_sample_fmt = av_get_packed_sample_fmt( m_context->sample_fmt );
-            break;
-        case AV_SAMPLE_FMT_DBLP:
-            throw std::runtime_error("unhandled audio format AV_SAMPLE_FMT_DBLP");
+            int err = swr_init(m_swr_context);

-        default:
-            throw std::runtime_error("unknown audio format");
+            if ( err ) {
+                char error_string[512];
+                av_strerror(err, error_string, 512);
+                OSG_WARN << "FFmpegDecoderAudio - WARNING: Error initializing resampling context : " << error_string << std::endl;
+                swr_free(&m_swr_context);
+                throw std::runtime_error("swr_init() failed");;
+            }
        }

        // Check stream sanity
@@ -189,6 +222,10 @@ void FFmpegDecoderAudio::open(AVStream * const stream)
        // Open codec
        if (avcodec_open2(m_context, p_codec, NULL) < 0)
            throw std::runtime_error("avcodec_open() failed");
+
+        m_context->get_buffer = avcodec_default_get_buffer;
+        m_context->release_buffer = avcodec_default_release_buffer;
+
    }

    catch (...)
@@ -219,6 +256,7 @@ void FFmpegDecoderAudio::close(bool waitForThreadToExit)
        if (waitForThreadToExit)
            join();
    }
+    swr_free(&m_swr_context);
 }

 void FFmpegDecoderAudio::setVolume(float volume)
@@ -300,7 +338,7 @@ void FFmpegDecoderAudio::fillBuffer(void * const buffer, size_t size)

        m_audio_buf_index += fill_size;

-        adjustBufferEndTps(fill_size);
+        adjustBufferEndPts(fill_size);
    }
 }

@@ -357,38 +395,37 @@ void FFmpegDecoderAudio::decodeLoop()
 }


-
-void FFmpegDecoderAudio::adjustBufferEndTps(const size_t buffer_size)
+void FFmpegDecoderAudio::adjustBufferEndPts(const size_t buffer_size)
 {
-    int sample_size = nbChannels() * frequency();
+    int bytes_per_second = nbChannels() * frequency();

    switch (sampleFormat())
    {
    case osg::AudioStream::SAMPLE_FORMAT_U8:
-        sample_size *= 1;
+        bytes_per_second *= 1;
        break;

    case osg::AudioStream::SAMPLE_FORMAT_S16:
-        sample_size *= 2;
+        bytes_per_second *= 2;
        break;

    case osg::AudioStream::SAMPLE_FORMAT_S24:
-        sample_size *= 3;
+        bytes_per_second *= 3;
        break;

    case osg::AudioStream::SAMPLE_FORMAT_S32:
-        sample_size *= 4;
+        bytes_per_second *= 4;
        break;

    case osg::AudioStream::SAMPLE_FORMAT_F32:
-        sample_size *= 4;
+        bytes_per_second *= 4;
        break;

    default:
        throw std::runtime_error("unsupported audio sample format");
    }

-    m_clocks.audioAdjustBufferEndPts(double(buffer_size) / double(sample_size));
+    m_clocks.audioAdjustBufferEndPts(double(buffer_size) / double(bytes_per_second));
 }


@@ -403,7 +440,7 @@ size_t FFmpegDecoderAudio::decodeFrame(void * const buffer, const size_t size)
        {
            int data_size = size;

-            const int bytes_decoded = decode_audio(m_context, reinterpret_cast<int16_t*>(buffer), &data_size, m_packet_data, m_bytes_remaining);
+            const int bytes_decoded = decode_audio(m_context, reinterpret_cast<int16_t*>(buffer), &data_size, m_packet_data, m_bytes_remaining, m_swr_context, m_out_sample_rate, m_out_nb_channels, m_out_sample_format);

            if (bytes_decoded < 0)
            {
@@ -464,5 +501,33 @@ size_t FFmpegDecoderAudio::decodeFrame(void * const buffer, const size_t size)
 }


+/**
+ *
+ */
+osg::AudioStream::SampleFormat FFmpegDecoderAudio::sampleFormat() const
+{
+    switch (m_out_sample_format)
+    {
+    case AV_SAMPLE_FMT_NONE:
+        throw std::runtime_error("invalid audio format AV_SAMPLE_FMT_NONE");
+    case AV_SAMPLE_FMT_U8:
+        return osg::AudioStream::SAMPLE_FORMAT_U8;
+        break;
+    case AV_SAMPLE_FMT_S16:
+        return osg::AudioStream::SAMPLE_FORMAT_S16;
+        break;
+    case AV_SAMPLE_FMT_S32:
+        return osg::AudioStream::SAMPLE_FORMAT_S32;
+        break;
+    case AV_SAMPLE_FMT_FLT:
+        return osg::AudioStream::SAMPLE_FORMAT_F32;
+        break;
+    case AV_SAMPLE_FMT_DBL:
+        throw std::runtime_error("unhandled audio format AV_SAMPLE_FMT_DBL");
+
+    default:
+        throw std::runtime_error("unknown audio format");
+    }
+}

 } // namespace osgFFmpeg