Add function for getting loudnorm stats

Getting these stats is otherwise impossible.
PyAV-Org · Nov 21, 2024 · ad99283 · ad99283
1 parent 74f6aec
commit ad99283
Show file tree

Hide file tree

Showing 7 changed files with 331 additions and 1 deletion.
diff --git a/av/filter/loudnorm.pxd b/av/filter/loudnorm.pxd
@@ -0,0 +1,4 @@
+from av.audio.stream cimport AudioStream
+
+
+cpdef bytes stats(str loudnorm_args, AudioStream stream)
diff --git a/av/filter/loudnorm.pyi b/av/filter/loudnorm.pyi
@@ -0,0 +1,3 @@
+from av.audio.stream import AudioStream
+
+def stats(loudnorm_args: str, stream: AudioStream) -> bytes: ...
diff --git a/av/filter/loudnorm.pyx b/av/filter/loudnorm.pyx
@@ -0,0 +1,63 @@
+# av/filter/loudnorm.pyx
+
+cimport libav as lib
+from cpython.bytes cimport PyBytes_FromString
+from libc.stdlib cimport free
+
+from av.audio.codeccontext cimport AudioCodecContext
+from av.audio.stream cimport AudioStream
+from av.container.core cimport Container
+from av.stream cimport Stream
+
+
+cdef extern from "libavcodec/avcodec.h":
+    ctypedef struct AVCodecContext:
+        pass
+
+cdef extern from "libavformat/avformat.h":
+    ctypedef struct AVFormatContext:
+        pass
+
+cdef extern from "loudnorm_impl.h":
+    char* loudnorm_get_stats(
+        AVFormatContext* fmt_ctx,
+        int audio_stream_index,
+        const char* loudnorm_args
+    ) nogil
+
+
+cpdef bytes stats(str loudnorm_args, AudioStream stream):
+    """
+    Get loudnorm statistics for an audio stream.
+
+    Args:
+        loudnorm_args (str): Arguments for the loudnorm filter (e.g. "i=-24.0:lra=7.0:tp=-2.0")
+        stream (AudioStream): Input audio stream to analyze
+
+    Returns:
+        bytes: JSON string containing the loudnorm statistics
+    """
+
+    if "print_format=json" not in loudnorm_args:
+        loudnorm_args = loudnorm_args + ":print_format=json"
+
+    cdef Container container = stream.container
+    cdef AVFormatContext* format_ptr = container.ptr
+
+    container.ptr = NULL  # Prevent double-free
+
+    cdef int stream_index = stream.index
+    cdef bytes py_args = loudnorm_args.encode("utf-8")
+    cdef const char* c_args = py_args
+    cdef char* result
+
+    with nogil:
+        result = loudnorm_get_stats(format_ptr, stream_index, c_args)
+
+    if result == NULL:
+        raise RuntimeError("Failed to get loudnorm stats")
+
+    py_result = result[:]  # Make a copy of the string
+    free(result)  # Free the C string
+
+    return py_result
diff --git a/av/filter/loudnorm_impl.c b/av/filter/loudnorm_impl.c
@@ -0,0 +1,211 @@
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libavfilter/avfilter.h>
+#include <libavfilter/buffersink.h>
+#include <libavfilter/buffersrc.h>
+#include <libavutil/opt.h>
+#include <string.h>
+
+#ifdef _WIN32
+    #include <windows.h>
+#else
+    #include <pthread.h>
+#endif
+
+#ifdef _WIN32
+    static CRITICAL_SECTION json_mutex;
+    static CONDITION_VARIABLE json_cond;
+    static int mutex_initialized = 0;
+#else
+    static pthread_mutex_t json_mutex = PTHREAD_MUTEX_INITIALIZER;
+    static pthread_cond_t json_cond = PTHREAD_COND_INITIALIZER;
+#endif
+
+static char json_buffer[2048] = {0};
+static int json_captured = 0;
+
+// Custom logging callback
+static void logging_callback(void *ptr, int level, const char *fmt, va_list vl) {
+    char line[2048];
+    vsnprintf(line, sizeof(line), fmt, vl);
+
+    const char *json_start = strstr(line, "{");
+    if (json_start) {
+        #ifdef _WIN32
+        EnterCriticalSection(&json_mutex);
+        #else
+        pthread_mutex_lock(&json_mutex);
+        #endif
+
+        strncpy(json_buffer, json_start, sizeof(json_buffer) - 1);
+        json_captured = 1;
+
+        #ifdef _WIN32
+        WakeConditionVariable(&json_cond);
+        LeaveCriticalSection(&json_mutex);
+        #else
+        pthread_cond_signal(&json_cond);
+        pthread_mutex_unlock(&json_mutex);
+        #endif
+    }
+}
+
+char* loudnorm_get_stats(
+    AVFormatContext* fmt_ctx,
+    int audio_stream_index,
+    const char* loudnorm_args
+) {
+    char* result = NULL;
+    json_captured = 0;    // Reset the captured flag
+    memset(json_buffer, 0, sizeof(json_buffer));  // Clear the buffer
+
+    #ifdef _WIN32
+    // Initialize synchronization objects if needed
+    if (!mutex_initialized) {
+        InitializeCriticalSection(&json_mutex);
+        InitializeConditionVariable(&json_cond);
+        mutex_initialized = 1;
+    }
+    #endif
+
+    av_log_set_callback(logging_callback);
+
+    AVFilterGraph *filter_graph = NULL;
+    AVFilterContext *src_ctx = NULL, *sink_ctx = NULL, *loudnorm_ctx = NULL;
+
+    AVCodec *codec = NULL;
+    AVCodecContext *codec_ctx = NULL;
+    int ret;
+
+    AVCodecParameters *codecpar = fmt_ctx->streams[audio_stream_index]->codecpar;
+    codec = (AVCodec *)avcodec_find_decoder(codecpar->codec_id);
+    codec_ctx = avcodec_alloc_context3(codec);
+    avcodec_parameters_to_context(codec_ctx, codecpar);
+    avcodec_open2(codec_ctx, codec, NULL);
+
+    char ch_layout_str[64];
+    av_channel_layout_describe(&codecpar->ch_layout, ch_layout_str, sizeof(ch_layout_str));
+
+    filter_graph = avfilter_graph_alloc();
+
+    char args[512];
+    snprintf(args, sizeof(args),
+        "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=%s",
+        fmt_ctx->streams[audio_stream_index]->time_base.num,
+        fmt_ctx->streams[audio_stream_index]->time_base.den,
+        codecpar->sample_rate,
+        av_get_sample_fmt_name(codec_ctx->sample_fmt),
+        ch_layout_str);
+
+    avfilter_graph_create_filter(&src_ctx, avfilter_get_by_name("abuffer"),
+        "src", args, NULL, filter_graph);
+    avfilter_graph_create_filter(&sink_ctx, avfilter_get_by_name("abuffersink"),
+        "sink", NULL, NULL, filter_graph);
+    avfilter_graph_create_filter(&loudnorm_ctx, avfilter_get_by_name("loudnorm"),
+        "loudnorm", loudnorm_args, NULL, filter_graph);
+
+    avfilter_link(src_ctx, 0, loudnorm_ctx, 0);
+    avfilter_link(loudnorm_ctx, 0, sink_ctx, 0);
+    avfilter_graph_config(filter_graph, NULL);
+
+    AVPacket *packet = av_packet_alloc();
+    AVFrame *frame = av_frame_alloc();
+    AVFrame *filt_frame = av_frame_alloc();
+
+    while ((ret = av_read_frame(fmt_ctx, packet)) >= 0) {
+        if (packet->stream_index != audio_stream_index) {
+            av_packet_unref(packet);
+            continue;
+        }
+
+        ret = avcodec_send_packet(codec_ctx, packet);
+        if (ret < 0) {
+            av_packet_unref(packet);
+            continue;
+        }
+
+        while (ret >= 0) {
+            ret = avcodec_receive_frame(codec_ctx, frame);
+            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
+            if (ret < 0) goto end;
+
+            ret = av_buffersrc_add_frame_flags(src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
+            if (ret < 0) goto end;
+
+            while (1) {
+                ret = av_buffersink_get_frame(sink_ctx, filt_frame);
+                if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
+                if (ret < 0) goto end;
+                av_frame_unref(filt_frame);
+            }
+        }
+        av_packet_unref(packet);
+    }
+
+    // Flush decoder
+    avcodec_send_packet(codec_ctx, NULL);
+    while (avcodec_receive_frame(codec_ctx, frame) >= 0) {
+        av_buffersrc_add_frame(src_ctx, frame);
+    }
+
+    // Flush filter
+    av_buffersrc_add_frame(src_ctx, NULL);
+    while (av_buffersink_get_frame(sink_ctx, filt_frame) >= 0) {
+        av_frame_unref(filt_frame);
+    }
+
+    // Force stats print
+    if (loudnorm_ctx) {
+        av_log_set_level(AV_LOG_INFO);
+        av_opt_set(loudnorm_ctx, "print_format", "json", AV_OPT_SEARCH_CHILDREN);
+        av_opt_set(loudnorm_ctx, "measured_i", NULL, AV_OPT_SEARCH_CHILDREN);
+        av_opt_set(loudnorm_ctx, "measured_lra", NULL, AV_OPT_SEARCH_CHILDREN);
+        av_opt_set(loudnorm_ctx, "measured_tp", NULL, AV_OPT_SEARCH_CHILDREN);
+        av_opt_set(loudnorm_ctx, "measured_thresh", NULL, AV_OPT_SEARCH_CHILDREN);
+        avfilter_init_str(loudnorm_ctx, NULL);
+    }
+
+    avfilter_graph_request_oldest(filter_graph);
+
+end:
+    avcodec_free_context(&codec_ctx);
+    avfilter_graph_free(&filter_graph);
+    avformat_close_input(&fmt_ctx);
+    av_frame_free(&filt_frame);
+    av_frame_free(&frame);
+    av_packet_free(&packet);
+
+    #ifdef _WIN32
+    EnterCriticalSection(&json_mutex);
+    while (!json_captured) {
+        if (!SleepConditionVariableCS(&json_cond, &json_mutex, 5000)) { // 5 second timeout
+            fprintf(stderr, "Timeout waiting for JSON data\n");
+            break;
+        }
+    }
+    if (json_captured) {
+        result = _strdup(json_buffer);  // Use _strdup on Windows
+    }
+    LeaveCriticalSection(&json_mutex);
+    #else
+    struct timespec timeout;
+    clock_gettime(CLOCK_REALTIME, &timeout);
+    timeout.tv_sec += 5;  // 5 second timeout
+
+    pthread_mutex_lock(&json_mutex);
+    while (json_captured == 0) {
+        int ret = pthread_cond_timedwait(&json_cond, &json_mutex, &timeout);
+        if (ret == ETIMEDOUT) {
+            fprintf(stderr, "Timeout waiting for JSON data\n");
+            break;
+        }
+    }
+    if (json_captured) {
+        result = strdup(json_buffer);
+    }
+    pthread_mutex_unlock(&json_mutex);
+    #endif
+
+    av_log_set_callback(av_log_default_callback);
+    return result;
+}
diff --git a/av/filter/loudnorm_impl.h b/av/filter/loudnorm_impl.h
@@ -0,0 +1,12 @@
+#ifndef AV_FILTER_LOUDNORM_H
+#define AV_FILTER_LOUDNORM_H
+
+#include <libavcodec/avcodec.h>
+
+char* loudnorm_get_stats(
+    AVFormatContext* fmt_ctx,
+    int audio_stream_index,
+    const char* loudnorm_args
+);
+
+#endif // AV_FILTER_LOUDNORM_H
diff --git a/setup.py b/setup.py
@@ -153,10 +153,36 @@ def parse_cflags(raw_flags):
         "library_dirs": [],
     }
 
+loudnorm_extension = Extension(
+    "av.filter.loudnorm",
+    sources=[
+        "av/filter/loudnorm.pyx",
+        "av/filter/loudnorm_impl.c",
+    ],
+    include_dirs=["av/filter"] + extension_extra["include_dirs"],
+    libraries=extension_extra["libraries"],
+    library_dirs=extension_extra["library_dirs"],
+)
+
+# Add the cythonized loudnorm extension to ext_modules
+ext_modules = cythonize(
+    loudnorm_extension,
+    compiler_directives={
+        "c_string_type": "str",
+        "c_string_encoding": "ascii",
+        "embedsignature": True,
+        "language_level": 3,
+    },
+    build_dir="src",
+    include_path=["include"],
+)
+
 # Construct the modules that we find in the "av" directory.
-ext_modules = []
 for dirname, dirnames, filenames in os.walk("av"):
     for filename in filenames:
+        if filename == "loudnorm.pyx":
+            continue
+
         # We are looking for Cython sources.
         if filename.startswith(".") or os.path.splitext(filename)[1] != ".pyx":
             continue

diff --git a/tests/test_streams.py b/tests/test_streams.py
@@ -26,6 +26,17 @@ def test_stream_tuples(self) -> None:
             audio_streams = tuple([s for s in container.streams if s.type == "audio"])
             assert audio_streams == container.streams.audio
 
+    def test_loudnorm(self) -> None:
+        container = av.open(
+            fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")
+        )
+        audio = container.streams.audio[0]
+        stats = av.filter.loudnorm.stats("i=-24.0:lra=7.0:tp=-2.0", audio)
+
+        assert isinstance(stats, bytes) and len(stats) > 30
+        assert b"inf" not in stats
+        assert b'"input_i"' in stats
+
     def test_selection(self) -> None:
         container = av.open(
             fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")