Skip to content

Commit

Permalink
Add function for getting loudnorm stats
Browse files Browse the repository at this point in the history
Getting these stats is otherwise impossible.
  • Loading branch information
WyattBlue authored Nov 21, 2024
1 parent 74f6aec commit ad99283
Show file tree
Hide file tree
Showing 7 changed files with 331 additions and 1 deletion.
4 changes: 4 additions & 0 deletions av/filter/loudnorm.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from av.audio.stream cimport AudioStream


cpdef bytes stats(str loudnorm_args, AudioStream stream)
3 changes: 3 additions & 0 deletions av/filter/loudnorm.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from av.audio.stream import AudioStream

def stats(loudnorm_args: str, stream: AudioStream) -> bytes: ...
63 changes: 63 additions & 0 deletions av/filter/loudnorm.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# av/filter/loudnorm.pyx

cimport libav as lib
from cpython.bytes cimport PyBytes_FromString
from libc.stdlib cimport free

from av.audio.codeccontext cimport AudioCodecContext
from av.audio.stream cimport AudioStream
from av.container.core cimport Container
from av.stream cimport Stream


cdef extern from "libavcodec/avcodec.h":
ctypedef struct AVCodecContext:
pass

cdef extern from "libavformat/avformat.h":
ctypedef struct AVFormatContext:
pass

cdef extern from "loudnorm_impl.h":
char* loudnorm_get_stats(
AVFormatContext* fmt_ctx,
int audio_stream_index,
const char* loudnorm_args
) nogil


cpdef bytes stats(str loudnorm_args, AudioStream stream):
"""
Get loudnorm statistics for an audio stream.
Args:
loudnorm_args (str): Arguments for the loudnorm filter (e.g. "i=-24.0:lra=7.0:tp=-2.0")
stream (AudioStream): Input audio stream to analyze
Returns:
bytes: JSON string containing the loudnorm statistics
"""

if "print_format=json" not in loudnorm_args:
loudnorm_args = loudnorm_args + ":print_format=json"

cdef Container container = stream.container
cdef AVFormatContext* format_ptr = container.ptr

container.ptr = NULL # Prevent double-free

cdef int stream_index = stream.index
cdef bytes py_args = loudnorm_args.encode("utf-8")
cdef const char* c_args = py_args
cdef char* result

with nogil:
result = loudnorm_get_stats(format_ptr, stream_index, c_args)

if result == NULL:
raise RuntimeError("Failed to get loudnorm stats")

py_result = result[:] # Make a copy of the string
free(result) # Free the C string

return py_result
211 changes: 211 additions & 0 deletions av/filter/loudnorm_impl.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <string.h>

#ifdef _WIN32
#include <windows.h>
#else
#include <pthread.h>
#endif

#ifdef _WIN32
static CRITICAL_SECTION json_mutex;
static CONDITION_VARIABLE json_cond;
static int mutex_initialized = 0;
#else
static pthread_mutex_t json_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t json_cond = PTHREAD_COND_INITIALIZER;
#endif

static char json_buffer[2048] = {0};
static int json_captured = 0;

// Custom logging callback
static void logging_callback(void *ptr, int level, const char *fmt, va_list vl) {
char line[2048];
vsnprintf(line, sizeof(line), fmt, vl);

const char *json_start = strstr(line, "{");
if (json_start) {
#ifdef _WIN32
EnterCriticalSection(&json_mutex);
#else
pthread_mutex_lock(&json_mutex);
#endif

strncpy(json_buffer, json_start, sizeof(json_buffer) - 1);
json_captured = 1;

#ifdef _WIN32
WakeConditionVariable(&json_cond);
LeaveCriticalSection(&json_mutex);
#else
pthread_cond_signal(&json_cond);
pthread_mutex_unlock(&json_mutex);
#endif
}
}

char* loudnorm_get_stats(
AVFormatContext* fmt_ctx,
int audio_stream_index,
const char* loudnorm_args
) {
char* result = NULL;
json_captured = 0; // Reset the captured flag
memset(json_buffer, 0, sizeof(json_buffer)); // Clear the buffer

#ifdef _WIN32
// Initialize synchronization objects if needed
if (!mutex_initialized) {
InitializeCriticalSection(&json_mutex);
InitializeConditionVariable(&json_cond);
mutex_initialized = 1;
}
#endif

av_log_set_callback(logging_callback);

AVFilterGraph *filter_graph = NULL;
AVFilterContext *src_ctx = NULL, *sink_ctx = NULL, *loudnorm_ctx = NULL;

AVCodec *codec = NULL;
AVCodecContext *codec_ctx = NULL;
int ret;

AVCodecParameters *codecpar = fmt_ctx->streams[audio_stream_index]->codecpar;
codec = (AVCodec *)avcodec_find_decoder(codecpar->codec_id);
codec_ctx = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(codec_ctx, codecpar);
avcodec_open2(codec_ctx, codec, NULL);

char ch_layout_str[64];
av_channel_layout_describe(&codecpar->ch_layout, ch_layout_str, sizeof(ch_layout_str));

filter_graph = avfilter_graph_alloc();

char args[512];
snprintf(args, sizeof(args),
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=%s",
fmt_ctx->streams[audio_stream_index]->time_base.num,
fmt_ctx->streams[audio_stream_index]->time_base.den,
codecpar->sample_rate,
av_get_sample_fmt_name(codec_ctx->sample_fmt),
ch_layout_str);

avfilter_graph_create_filter(&src_ctx, avfilter_get_by_name("abuffer"),
"src", args, NULL, filter_graph);
avfilter_graph_create_filter(&sink_ctx, avfilter_get_by_name("abuffersink"),
"sink", NULL, NULL, filter_graph);
avfilter_graph_create_filter(&loudnorm_ctx, avfilter_get_by_name("loudnorm"),
"loudnorm", loudnorm_args, NULL, filter_graph);

avfilter_link(src_ctx, 0, loudnorm_ctx, 0);
avfilter_link(loudnorm_ctx, 0, sink_ctx, 0);
avfilter_graph_config(filter_graph, NULL);

AVPacket *packet = av_packet_alloc();
AVFrame *frame = av_frame_alloc();
AVFrame *filt_frame = av_frame_alloc();

while ((ret = av_read_frame(fmt_ctx, packet)) >= 0) {
if (packet->stream_index != audio_stream_index) {
av_packet_unref(packet);
continue;
}

ret = avcodec_send_packet(codec_ctx, packet);
if (ret < 0) {
av_packet_unref(packet);
continue;
}

while (ret >= 0) {
ret = avcodec_receive_frame(codec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
if (ret < 0) goto end;

ret = av_buffersrc_add_frame_flags(src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF);
if (ret < 0) goto end;

while (1) {
ret = av_buffersink_get_frame(sink_ctx, filt_frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
if (ret < 0) goto end;
av_frame_unref(filt_frame);
}
}
av_packet_unref(packet);
}

// Flush decoder
avcodec_send_packet(codec_ctx, NULL);
while (avcodec_receive_frame(codec_ctx, frame) >= 0) {
av_buffersrc_add_frame(src_ctx, frame);
}

// Flush filter
av_buffersrc_add_frame(src_ctx, NULL);
while (av_buffersink_get_frame(sink_ctx, filt_frame) >= 0) {
av_frame_unref(filt_frame);
}

// Force stats print
if (loudnorm_ctx) {
av_log_set_level(AV_LOG_INFO);
av_opt_set(loudnorm_ctx, "print_format", "json", AV_OPT_SEARCH_CHILDREN);
av_opt_set(loudnorm_ctx, "measured_i", NULL, AV_OPT_SEARCH_CHILDREN);
av_opt_set(loudnorm_ctx, "measured_lra", NULL, AV_OPT_SEARCH_CHILDREN);
av_opt_set(loudnorm_ctx, "measured_tp", NULL, AV_OPT_SEARCH_CHILDREN);
av_opt_set(loudnorm_ctx, "measured_thresh", NULL, AV_OPT_SEARCH_CHILDREN);
avfilter_init_str(loudnorm_ctx, NULL);
}

avfilter_graph_request_oldest(filter_graph);

end:
avcodec_free_context(&codec_ctx);
avfilter_graph_free(&filter_graph);
avformat_close_input(&fmt_ctx);
av_frame_free(&filt_frame);
av_frame_free(&frame);
av_packet_free(&packet);

#ifdef _WIN32
EnterCriticalSection(&json_mutex);
while (!json_captured) {
if (!SleepConditionVariableCS(&json_cond, &json_mutex, 5000)) { // 5 second timeout
fprintf(stderr, "Timeout waiting for JSON data\n");
break;
}
}
if (json_captured) {
result = _strdup(json_buffer); // Use _strdup on Windows
}
LeaveCriticalSection(&json_mutex);
#else
struct timespec timeout;
clock_gettime(CLOCK_REALTIME, &timeout);
timeout.tv_sec += 5; // 5 second timeout

pthread_mutex_lock(&json_mutex);
while (json_captured == 0) {
int ret = pthread_cond_timedwait(&json_cond, &json_mutex, &timeout);
if (ret == ETIMEDOUT) {
fprintf(stderr, "Timeout waiting for JSON data\n");
break;
}
}
if (json_captured) {
result = strdup(json_buffer);
}
pthread_mutex_unlock(&json_mutex);
#endif

av_log_set_callback(av_log_default_callback);
return result;
}
12 changes: 12 additions & 0 deletions av/filter/loudnorm_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef AV_FILTER_LOUDNORM_H
#define AV_FILTER_LOUDNORM_H

#include <libavcodec/avcodec.h>

char* loudnorm_get_stats(
AVFormatContext* fmt_ctx,
int audio_stream_index,
const char* loudnorm_args
);

#endif // AV_FILTER_LOUDNORM_H
28 changes: 27 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,36 @@ def parse_cflags(raw_flags):
"library_dirs": [],
}

loudnorm_extension = Extension(
"av.filter.loudnorm",
sources=[
"av/filter/loudnorm.pyx",
"av/filter/loudnorm_impl.c",
],
include_dirs=["av/filter"] + extension_extra["include_dirs"],
libraries=extension_extra["libraries"],
library_dirs=extension_extra["library_dirs"],
)

# Add the cythonized loudnorm extension to ext_modules
ext_modules = cythonize(
loudnorm_extension,
compiler_directives={
"c_string_type": "str",
"c_string_encoding": "ascii",
"embedsignature": True,
"language_level": 3,
},
build_dir="src",
include_path=["include"],
)

# Construct the modules that we find in the "av" directory.
ext_modules = []
for dirname, dirnames, filenames in os.walk("av"):
for filename in filenames:
if filename == "loudnorm.pyx":
continue

# We are looking for Cython sources.
if filename.startswith(".") or os.path.splitext(filename)[1] != ".pyx":
continue
Expand Down
11 changes: 11 additions & 0 deletions tests/test_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ def test_stream_tuples(self) -> None:
audio_streams = tuple([s for s in container.streams if s.type == "audio"])
assert audio_streams == container.streams.audio

def test_loudnorm(self) -> None:
container = av.open(
fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")
)
audio = container.streams.audio[0]
stats = av.filter.loudnorm.stats("i=-24.0:lra=7.0:tp=-2.0", audio)

assert isinstance(stats, bytes) and len(stats) > 30
assert b"inf" not in stats
assert b'"input_i"' in stats

def test_selection(self) -> None:
container = av.open(
fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")
Expand Down

0 comments on commit ad99283

Please sign in to comment.