FFmpeg 解码音视频实例及碰到的问题记录（一）

原创已于 2023-01-28 17:08:46 修改 · 1.5k 阅读

1 ·

本内容遵循CC 4.0 BY-SA版权协议

标签

#ffmpeg #video decoding

于 2018-09-17 11:51:33 首次发布

视频处理专栏收录该内容

19 篇文章

订阅专栏

本文介绍了在使用FFmpeg3.4.1进行音视频解码时的步骤，包括av_register_all()、avformat_open_input()、avformat_find_stream_info()等关键函数的使用，并详细讲解了从读取文件到解码帧的数据流程。在实践中遇到的问题，如AVFrame中data与linesize的关系，也在文中提及。

最近项目开发中需要使用FFmpeg进行音视频的解码，在使用过程中遇到了一些问题对其进行记录。

FFmpeg版本：FFmpeg3.4.1 下载地址如下：ffmpeg3.4.1Win32开发包-编解码工具类资源-CSDN下载

1、视频解码：

av_register_all(); 遍历注册所有的组件，包括各种编解码器、解复用器等等；
AVFormatContext *pFormatCtx = avformat_alloc_context(); 主要存储视音频封装格式中包含的信息，此变量非常重要，几乎贯穿了解码过程的始终，此后主要为pFormatCtx变量分配内存；
avformat_open_input(&pFormatCtx, pInput_test, NULL, NULL); 打开输入视频文件，读取文件头信息，将文件头信息读取到pFormatCtx 结构体内，为后续解码做准备；
avformat_find_stream_info(pFormatCtx, NULL); 获取文件流信息，主要根据pFormatCtx结构体内的已有信息对pFormatCtx结构体内流字段赋值流信息，即，可看作进一步为pFormatCtx结构体进行赋值；
AVCodecParameters *pCodecCtx = pFormatCtx->streams[v_stream_idx]->codecpar; 提取pFormatCtx结构体内的对应流编码器信息
AVCodec *pCodec = avcodec_find_decoder(pCodecCtx->codec_id); 根据编解码的编码id号查找对应的解码器信息
AVCodecContext *pEnc = avcodec_alloc_context3(pCodec); 根据初始化AVCodecContext,只是分配，还没打开
avcodec_open2(pEnc, pCodec, NULL); 打开解码器
av_read_frame(pFormatCtx, packet); 从pFormatCtx结构体内流字段读取视频文件压缩流到packet变量内
avcodec_send_packet(pEnc, packet); 将packet内文件压缩流导入到对应解码器内
avcodec_receive_frame(pEnc, pFrame); 从解码器pEnc内解码压缩流到pFrame变量内（存储解压缩后的音视频数据）
pFrame结构体内的 uint8_t *data[AV_NUM_DATA_POINTERS];成员内存储了音视频流解码后的数据，可对其进行相应操作（可保存可进行图像处理等等）。

以下是示例代码（由于从项目中扣取出来，大致流程无问题，未进行验证可能会有一些小问题，现先记录后续再进行验证）：

#include <windows.h>
#include <stdint.h>
#include <iostream>

extern "C"
{
#include "libavcodec/avcodec.h"   
#include "libavformat/avformat.h"  
#include "libavutil/channel_layout.h"  
#include "libavutil/common.h"  
#include "libavutil/imgutils.h"  
#include "libswscale/swscale.h"   
#include "libavutil/imgutils.h"      
#include "libavutil/opt.h"         
#include "libavutil/mathematics.h"      
#include "libavutil/samplefmt.h"   
};

#define ES_STREAM_VIDEO 1
#define ES_STREAM_AUDIO 2

#pragma comment(lib, "FFmpeg/lib/avcodec.lib")  
#pragma comment(lib, "FFmpeg/lib/avformat.lib")  
#pragma comment(lib, "FFmpeg/lib/avdevice.lib")  
#pragma comment(lib, "FFmpeg/lib/avfilter.lib")  
#pragma comment(lib, "FFmpeg/lib/avutil.lib")   
#pragma comment(lib, "FFmpeg/lib/swresample.lib")  
#pragma comment(lib, "FFmpeg/lib/swscale.lib")  

typedef struct AVMediaPacket
{
	BYTE* m_data[TL_NUM_DATA_POINTERS];
	int m_linesize[TL_NUM_DATA_POINTERS];
	BYTE* m_pBuf;
	int m_buf_size;
	int m_max_size;
	int m_cur_size;//ES包使用
	int m_packet_type;//0ES包 1Frame

	//enum AVPixelFormat for video frames
	//enum AVSampleFormat for audio
	int m_pixel_format;
	int m_channel_count;
	LONGLONG m_channel_layout;
	int m_nb_samples;  // 单个声道音频样本个数
	int m_sample_rate; // 音频采样率

	int m_width;
	int m_height;
	/**
	* The content of the picture is interlaced.
	* - encoding: Set by user.
	* - decoding: Set by libavcodec. (default 0)
	*/
	int m_interlaced_frame;//0逐行帧，1隔行帧
	/**
	* If the content is interlaced, is top field displayed first.
	* - encoding: Set by user.
	* - decoding: Set by libavcodec.
	*/
	int m_top_field_first;//0偶底场优先，1上奇场优先
	int m_pict_type;

	int m_es_stream_type;//是视频帧还是音频帧

	LONGLONG m_pos;
	LONGLONG m_origin_size;//原始ES包的字节数
	LONGLONG m_dts;//ES包使用
	LONGLONG m_pts;
	LONGLONG m_origin_ts;
	LONGLONG m_sys_pts;//转换成系统时间的pts(毫秒)
	LONGLONG m_duration;//帧时长
	LONGLONG m_sys_duration;//转换成系统时间的长度(毫秒)

	int m_align;//数据的对齐方式

	int m_scale_mod;//视频帧用到的变换方式(该帧是经过该变换方式得来的)

	int m_stream_index;//同AVPacket的stream_index
	int m_flags;//同AVPacket的flags
}AVMediaPacket;


/********** 查询FFmpeg支持的编解码器 ************/
void CheckEncoderDecoder()
{
	char *info = (char *)malloc(40000);
	memset(info, 0, 40000);

	AVCodec *c_temp = av_codec_next(NULL);

	while (c_temp != NULL)
	{
		if (c_temp->decode != NULL)
		{
			strcat(info, "[Decode]");
		}
		else
		{
			strcat(info, "[Encode]");
		}
		switch (c_temp->type)
		{
		case AVMEDIA_TYPE_VIDEO:
			strcat(info, "[Video]");
			break;
		case AVMEDIA_TYPE_AUDIO:
			strcat(info, "[Audeo]");
			break;
		default:
			strcat(info, "[Other]");
			break;
		}
		sprintf(info, "%s %10s\n", info, c_temp->name);
		c_temp = c_temp->next;
	}
	puts(info);
	free(info);
}

void InitMediaPacket(AVMediaPacket* pFrame,int nPacketType, int nEsStreamType)
{
	for (int i = 0; i < TL_NUM_DATA_POINTERS; i++)
	{
		pFrame->m_data[i] = NULL;   // 音视频数据
		pFrame->m_linesize[i] = 0;  // 每行数据的size
	}
	pFrame->m_pBuf = NULL;  // 音视频数据指针

	pFrame->m_buf_size = 0;
	pFrame->m_max_size = 0;
	pFrame->m_cur_size = 0;//ES包使用
	pFrame->m_packet_type = nPacketType;//0ES包 1Frame

	//enum AVPixelFormat for video frames
	//enum AVSampleFormat for audio
	pFrame->m_pixel_format = AV_PIX_FMT_NONE; 
	pFrame->m_channel_count = -1;
	pFrame->m_channel_layout = -1;
	pFrame->m_nb_samples = -1;  // 单个声道音频样本个数
	pFrame->m_sample_rate = -1; // 音频采样率

	pFrame->m_width = -1;
	pFrame->m_height = -1;
	/**
	* The content of the picture is interlaced.
	* - encoding: Set by user.
	* - decoding: Set by libavcodec. (default 0)
	*/
	pFrame->m_interlaced_frame = -1;//0逐行帧，1隔行帧
	/**
	* If the content is interlaced, is top field displayed first.
	* - encoding: Set by user.
	* - decoding: Set by libavcodec.
	*/
	pFrame->m_top_field_first = -1;//0偶底场优先，1上奇场优先
	pFrame->m_pict_type = -1;

	pFrame->m_es_stream_type = nEsStreamType;//是视频帧还是音频帧

	pFrame->m_pos = -1;
	pFrame->m_origin_size = -1;//原始ES包的字节数
	pFrame->m_dts = -1;//ES包使用
	pFrame->m_pts = -1;
	pFrame->m_origin_ts = -1;
	pFrame->m_sys_pts = -1;//转换成系统时间的pts(毫秒)
	pFrame->m_duration = -1;//帧时长
	pFrame->m_sys_duration = -1;//转换成系统时间的长度(毫秒)

	pFrame->m_align = -1;//数据的对齐方式

	pFrame->m_scale_mod = -1;//视频帧用到的变换方式(该帧是经过该变换方式得来的)

	pFrame->m_stream_index = -1;//同AVPacket的stream_index
	pFrame->m_flags = -1;//同AVPacket的flags
}

BOOL MallocMediaFrameBuf(AVMediaPacket* pFrame, int nBufLen)
{
	if (nBufLen <= 0)
		return FALSE;

	for (int i = 0; i < TL_NUM_DATA_POINTERS; i++)
	{
		pFrame->m_data[i] = NULL;   // 音视频数据
		pFrame->m_linesize[i] = 0;  // 每行数据的size
	}

	pFrame->m_pBuf = (BYTE*)malloc(nBufLen*sizeof(BYTE));

	pFrame->m_buf_size = nBufLen;

	if (pFrame->m_pBuf)
		return TRUE;
	else
		return FALSE;
}

void ReleaseMediaPacket(AVMediaPacket* pFrame)
{
	if (pFrame->m_pBuf)
	{
		free(pFrame->m_pBuf);
		pFrame->m_pBuf = NULL;
	}
}



int main()
{
	const char *pInput_test = "E:\\C++\\VideoExaminationSystem\\TEST.gxf";
    FILE *fout = fopen("E://test.yuv", "wb+");
    AVMediaPacket src_frame;
    InitMediaPacket(&src_frame, 1, ES_STREAM_VIDEO);

	//1.注册所有组件
	av_register_all();

	CheckEncoderDecoder();  // 查询此FFmpeg版本支持的编解码器

	//封装格式上下文，统领全局的结构体，保存了视频文件封装格式的相关信息
	AVFormatContext *pFormatCtx = avformat_alloc_context();  //主要存储视音频封装格式中包含的信息

	//2.打开输入视频文件，读取文件头信息
	if (avformat_open_input(&pFormatCtx, pInput_test, NULL, NULL) != 0)
	{
		printf("%s", "无法打开输入视频文件");
		return -1;
	}

	//3.获取视频文件信息
	if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
	{
		printf("%s", "无法获取视频文件信息");
		return -1;
	}

	//获取视频流的索引位置
	//遍历所有类型的流（音频流、视频流、字幕流），找到视频流
	int v_stream_idx = -1;  // 视频流
	
	//number of streams
	for (int i = 0; i < pFormatCtx->nb_streams; i++)
	{
		//流的类型
		if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
		{
			v_stream_idx = i;  // 记录视频流索引
            break;
		}
	}

	if (v_stream_idx == -1)
	{
		printf("%s", "找不到视频流\n");
		return -1;
	}

	//只有知道视频的编码方式，才能够根据编码方式去找到解码器
	//获取视频流中的编解码上下文
	AVCodecParameters *pCodecCtx = NULL;
	AVCodec *pCodec = NULL;
	AVCodecContext *pEnc = NULL;
	AVPacket *packet = NULL;
	AVFrame *pFrame = NULL;
	int src_buf_len = -1;

	pCodecCtx = pFormatCtx->streams[v_stream_idx]->codecpar;
	//4.根据编解码上下文中的编码id查找对应的解码
	pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
	if (pCodec == NULL)
	{
		printf("%s", "找不到视频解码器\n");
		return -1;
	}
	pEnc = avcodec_alloc_context3(pCodec);  // 初始化AVCodecContext,只是分配，还没打开

	//5.打开解码器
	if (avcodec_open2(pEnc, pCodec, NULL)<0)
	{
		printf("%s", "视频解码器无法打开\n");
		return -1;
	}

	//输出视频信息
	printf("视频的文件格式：%s\n", pFormatCtx->iformat->name);
	printf("视频时长：%d\n", (pFormatCtx->duration) / 1000000);
	printf("视频的宽高：%d,%d\n", pCodecCtx->width, pCodecCtx->height);
	printf("视频解码器的名称：%s\n", pCodec->name);
	std::cout << std::endl;

    src_frame.m_pixel_format = pCodecCtx->format;
	src_frame.m_width = pCodecCtx->width;
	src_frame.m_height = pCodecCtx->height;

	src_buf_len = av_image_get_buffer_size((AVPixelFormat)src_frame.m_pixel_format, src_frame.m_width, src_frame.m_height, 1);

    if (!MallocMediaFrameBuf(&src_frame, src_buf_len))
	{
		return NULL;
	}

	// 根据视频格式填充结构体相关字段
	av_image_fill_arrays(src_frame.m_data, src_frame.m_linesize, src_frame.m_pBuf, (AVPixelFormat)src_frame.m_pixel_format, src_frame.m_width, src_frame.m_height, 1);
		
		
	//准备读取
	//缓冲区，开辟空间,AVPacket用于存储一帧一帧的压缩数据（H264）
	packet = (AVPacket*)av_malloc(sizeof(AVPacket));

	//内存分配, AVFrame用于存储解码后的像素数据(YUV)
	pFrame = av_frame_alloc();

	int ret = -1;
	int video_frame_count = 0;

	//6.一帧一帧的读取压缩数据
	while (av_read_frame(pFormatCtx, packet) >= 0)
	{
		//只要视频压缩数据（根据流的索引位置判断）
		if (packet->stream_index == v_stream_idx  && (VIDEO_DETECT_ONLY == nDetectFlag || AV_DETECT == nDetectFlag))
		{
			//7.解码一帧视频压缩数据，得到视频像素数据
			ret = avcodec_send_packet(pEnc, packet);
			if (ret < 0)
			{
				printf("%s", "解码错误");
				return -1;
			}

			ret = avcodec_receive_frame(pEnc, pFrame);

			if (!ret)  // 0表示解码成功
			{
				for (int j = 0, cont = 0; j < TL_NUM_DATA_POINTERS && cont < src_buf_len; j++ )
				{
					int size = pFrame->linesize[j];

					if (size <= 0)
						break;
					int radio = pFrame->linesize[0] / size;
					for (int i = 0; i < pFrame->height / radio; i++)
					{
						memcpy(src_frame.m_pBuf + cont, pFrame->data[j] + i * size, pFrame->width / radio);
						cont += pFrame->width / radio;
					}

                    fwrite(src_frame->m_data[j], pFrame->width*pFrame->height/(radio*radio), 1, fout);
				}                

				video_frame_count++;
				printf("Decode Video Frame Number : %d\n", video_frame_count);
			}
		}

		//释放资源
		av_packet_unref(packet);
	}

	// Free the YUV frame
	av_frame_free(&pFrame);

	// Close the codecs
	avcodec_close(pEnc);

	// Close the video file
	avformat_close_input(&pFormatCtx);

	if(fout)
	{
		fclose(fout);
		fout = NULL;
	}

    ReleaseMediaPacket(&src_frame);

	std::cout << "I: Finish Video Scale." << std::endl;

	system("pause");
	return 0;
}

在调试过程中遇到了以下问题：

具体见上一篇博客：《FFmpeg中AVFrame中data与linesize的联系》FFmpeg中AVFrame中data与linesize的联系_lifei092的博客-CSDN博客_ffmpeg linesize