/*	Copyright (C) 2024-2024 Martin Guy <martinwguy@gmail.com>
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation, either version 3 of the License, or
 *	(at your option) any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/* libav.c: Interface between spettro and libav */

#include "spettro.h"

#if HAVE_LIBAV

#include "libav.h" /* includes a_file.h which includes av{format,codec}.h */
#include <libavutil/frame.h>	/* for av_frame_{alloc,free}() */

/*
 * Data that is part of the call-to-call state machine
 *
 * We keep:
 * - Maybe an encoded packet that we read from the stream but couldn't give
 *   to the decoder because its input buffer was full
 * - A frame of decoded data, remembering what sample offset in the file its
 *   data[0][0] corresponds to
 */

/* Have we read a packet from the file but not sent it to the decoder yet? */
static bool packet_is_pending = FALSE;
static AVPacket packet;	    /* If so, this is the packet in question */

static AVFrame *frame;	    /* A frame of decoded data */

/* Keep these from call to call so that the usual case of decoding audio
 * sequentially is more efficient, avoiding having to seek all the time.
 */
static AVFormatContext *fmt_ctx = NULL;
static AVCodecContext *dec_ctx = NULL;
static int audio_stream_index = -1;
#define audio_stream (fmt_ctx->streams[audio_stream_index])
#define stream_start_samples round((double)audio_stream->start_time \
    * audio_stream->time_base.num / audio_stream->time_base.den \
    * dec_ctx->sample_rate)
/* AVCodecContext->time_base "is the fundamental unit of time (in seconds)
 * in terms of which frame timestamps are represented." (AVFormatContext docs)
 * whereas
 * AVStream->time_base "is the fundamental unit of time (in seconds)
 * in terms of which frame timestamps are represented." (AVStream docs)
 */
#define frame_start_samples(frame) round(\
    ((double)frame->pts * audio_stream->time_base.num \
			/ audio_stream->time_base.den \
     - (double)audio_stream->start_time * audio_stream->time_base.num \
					/ audio_stream->time_base.den) \
    * dec_ctx->sample_rate)

bool
libav_open(audio_file_t *af, char *filename)
{
#if LIBAVCODEC_VERSION_MAJOR >= 59
    const
#endif
    AVCodec *dec;

    if (avformat_open_input(&fmt_ctx, filename, NULL, NULL) < 0) {
	/* Most likely because the file doesn't exist.
	 * Let the caller complain to avoid multiple error messages */
        return FALSE;
    }

    if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
        fprintf(stdout, "libav cannot find stream information in %s\n", filename);
        avformat_close_input(&fmt_ctx);
        return FALSE;
    }

    /* select the audio stream */
    audio_stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
    if (audio_stream_index < 0) {
	/* It's probably a file with an image but no audio */
        avformat_close_input(&fmt_ctx);
        return FALSE;
    }

    /* create decoding context */
    dec_ctx = avcodec_alloc_context3(dec);
    if (!dec_ctx) {
	fprintf(stdout, "libav cannot allocate its codec context\n");
        avformat_close_input(&fmt_ctx);
        return FALSE;
    }
    avcodec_parameters_to_context(dec_ctx, fmt_ctx->streams[audio_stream_index]->codecpar);

    /* We ask for 16-bit samples, assuming !NO_CACHE.
     * This is not guaranteed to work (and for MP3 and Ogg it returns floats).
     */
    dec_ctx->request_sample_fmt = AV_SAMPLE_FMT_S16;

    /* Enable multi-threading for speed improvement */
    if (dec_ctx->codec->capabilities | AV_CODEC_CAP_FRAME_THREADS)
       dec_ctx->thread_type = FF_THREAD_FRAME;
    else if (dec_ctx->codec->capabilities | AV_CODEC_CAP_SLICE_THREADS)
       dec_ctx->thread_type = FF_THREAD_SLICE;
    else
       dec_ctx->thread_count = 1; /* don't use multithreading */

    /* init the audio decoder */
    if (avcodec_open2(dec_ctx, dec, NULL) < 0) {
        fprintf(stdout, "Cannot open audio decoder\n");
        avformat_close_input(&fmt_ctx);
        return FALSE;
    }

    /* Allocate a frame structure for libav_read_file()'s repeated use */
    frame = av_frame_alloc();
    if (!frame) {
	fprintf(stdout, "Could not allocate a libav frame structure.\n");
        avcodec_free_context(&(dec_ctx));
        avformat_close_input(&(fmt_ctx));
	return FALSE;
    }

    if (dec_ctx->delay != 0) {
	fprintf(stdout, "Warning: This format has a delay of %d samples before it outputs valid audio.\n", dec_ctx->delay);
    }

    af->filename = filename;
    af->sample_rate = dec_ctx->sample_rate;
#if LIBAVCODEC_VERSION_MAJOR >= 59
    af->channels = dec_ctx->ch_layout.nb_channels;
#else
    af->channels = dec_ctx->channels;
#endif

    /* Calculate the audio file's length.
     * We would use fmt_ctx->streams[audio_stream_index]->duration but this
     * gives huge negative values for audio+video files, resulting in 0.7s
     * while the overall file duration seems to be more reliable.
     */
    af->frames = round((double)fmt_ctx->duration * af->sample_rate / AV_TIME_BASE);

    if (audio_stream->start_time == AV_NOPTS_VALUE)
	audio_stream->start_time = 0LL;
    if (audio_stream->start_time != 0) {
	fprintf(stdout, "Warning: Audio file's start time is %lld (%g seconds)\n",
			(long long int)audio_stream->start_time,
			(double)audio_stream->start_time * audio_stream->time_base.num / audio_stream->time_base.den);
    }

    return TRUE;
}

/* Read encoded packets from the audio file and feed as many as possible to
 * the decoder. This strategy is faster than packet-by-packet according to
 * https://stackoverflow.com/questions/55186822  In practice, it takes two
 * packets, decodes them, takes another two packets and so on and doesn't
 * seem to run any faster than packet-by-packet decoding.
 */
static bool
feed_decoder()
{
    int ret;

    if (packet_is_pending) {
	if ((ret = avcodec_send_packet(dec_ctx, &packet)) < 0) {
	    fprintf(stdout, "Failed to feed the pending packet to the decoder: %s\n",
			    av_err2str(ret));
	    return FALSE;
	}
	packet_is_pending = FALSE;
    }

    /* Force-feed the codec with as many input packets as it will take. */
    while ((ret = av_read_frame(fmt_ctx, &packet)) >= 0) {
	int ret2;

	/* Discard packets that are not part of the audio stream */
	if (packet.stream_index != audio_stream_index) {
	    av_packet_unref(&packet);
	    continue;	/* Fetch next packet */
	}
	if ((ret2 = avcodec_send_packet(dec_ctx, &packet)) < 0) {
	    if (ret2 == AVERROR(EAGAIN)) {
		/* Decoder's input buffer is full. Remember to give it
		 * this packet again when we next start feeding it. */
		packet_is_pending = TRUE;
		break;	/* Stop reading and sending packets */
	    }
	    fprintf(stdout, "Error sending a packet to the libav decoder: %s\n", av_err2str(ret));
	    return FALSE;
	}
    }
    if (ret == AVERROR_EOF) {
	/* Make the decoder return AVERROR_EOF when it has drained */
	avcodec_send_packet(dec_ctx, NULL);
    }

    /* Fetch the first packet of decoded data */
    if ((ret = avcodec_receive_frame(dec_ctx, frame)) < 0) {
	fprintf(stdout, "feed_decoder failed to read a decoded frame: %s\n",
		av_err2str(ret));
	return FALSE;
    }

    return TRUE;
}

/*
 * Read samples from the audio file and convert them to the desired format
 * into the buffer "write_to".
 *
 * Returns the number of frames written, or a negative value on errors.
 *
 * During playing, this gets called for just a few hundred samples at a time,
 * and seeking/reading some/flushing on every call is both expensive and prone
 * to bubbly discontinuities for compressed audio files, as we haven't figured
 * out how to do sample-accurate seeking/reading with libav yet.
 */
int
libav_read_frames(audio_file_t *af,
		  void *write_to,
		  frames_t start,
		  frames_t frames_to_read,
		  af_format_t format)
{
    int frames_read = 0;    /* How many frames have we written to write_to? */
    int ret;

    if (frame->pts == AV_NOPTS_VALUE) frame->pts = 0;

    /* Sanity checks */
    if (format != af_signed) {
	fprintf(stdout, "Can't read floats with libav yet.\n");
	return -1;
    }

    /* Do we need to seek in the stream? */
    /* If start == frame->pts + frame->nb_samples, it means we have exactly
     * exhausted this frame and will need to read the next one
     */
    if (start < frame_start_samples(frame) ||
	start > frame_start_samples(frame) + frame->nb_samples) {
	AVRational time_base = audio_stream->time_base;

	/* Where we need to start decoding from to get valid output. */
	/* Empirically, dec_ctx->delay is always 0. */
	int64_t timestamp = round((double)start
				    * time_base.den / time_base.num
				    / af->sample_rate - dec_ctx->delay)
			    + stream_start_samples;

	/* Prepare for seek by flushing all remaining data from the
	 * decoder's output buffer and resetting the decoder.
	 */
	while (avcodec_receive_frame(dec_ctx, frame) >= 0)
	    ;
	if (avcodec_send_packet(dec_ctx, NULL) < 0) {
	    fprintf(stdout, "Failed to send NULL packet to flush decoder.\n");
	}
	avcodec_flush_buffers(dec_ctx);

	if ((ret = avformat_seek_file(fmt_ctx, audio_stream_index,
			       INT64_MIN, timestamp, timestamp,
			       AVSEEK_FLAG_ANY)) < 0) {
	    fprintf(stdout, "Failed to seek to time %g in audio file: %s\n",
		    (double)start / af->sample_rate, av_err2str(ret));
	    return -1;
	}

	packet_is_pending = FALSE;
	feed_decoder();
    }

    /* Read decoded data until we have read all requested samples */
    while (frames_read < frames_to_read) {
	int i;	/* Index into data items in decoded frame */
	int ch;	/* Channel loop variable */
	int data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);

	/* If we've exhausted this packet, get more data. */
	if (start + frames_read >= frame_start_samples(frame) + frame->nb_samples) {
	    if ((ret = avcodec_receive_frame(dec_ctx, frame)) < 0) {
		switch (ret) {
		case AVERROR(EAGAIN):
		    feed_decoder();
		    break;
		case AVERROR_EOF:
		    fprintf(stdout, "receive_frame got EOF\n");
		    return frames_read;
		default:
		    fprintf(stdout, "Error receiving data from decoder: %s\n",
				    av_err2str(ret));
		    return frames_read;
		}
	    }
	}

	for (i = start - frame_start_samples(frame) + frames_read;
	     i < frame->nb_samples &&
	     i < start - frame_start_samples(frame) + frames_to_read;
	     i++) {
	    for (ch=0; ch < af->channels; ch++) {
		switch (frame->format) {
		case AV_SAMPLE_FMT_S16:
		    if (frame->data[ch] == NULL) /* Dunno why this happens */
			*(short *)write_to = 0;
		    else
			*(short *)write_to = *(short *)(frame->data[ch] + data_size * i);
		    break;
		case AV_SAMPLE_FMT_FLTP:
		    if (frame->data[ch] == NULL)
			*(short *)write_to = 0;
		    else
			*(short *)write_to = round(*(float *)(frame->data[ch] + data_size * i) * 32767.0);
		    break;
		default:
		    fprintf(stdout, "Codec returns unhandled data format %s\n",
			    av_get_sample_fmt_name(frame->format));
		    return -1;
		}
		write_to += sizeof(short);
	    }
	    frames_read++;
	}
    }

    return frames_read;
}

void
libav_close(audio_file_t *af)
{
    av_frame_free(&frame);
    avcodec_free_context(&(dec_ctx));
    avformat_close_input(&(fmt_ctx));
}

#endif /* HAVE_LIBAV */
