[alsa-devel] [PATCH 04/35] axfer: add support for a container of Microsoft/IBM RIFF/Wave format

Takashi Sakamoto o-takashi at sakamocchi.jp
Tue Nov 13 07:41:16 CET 2018


This commit adds support for data of Microsoft/IBM RIFF/Wave format. In
this data format, values in each of field are encoded in both bit/little
byte order but inner a file the same order is used. Magic bytes in the
beginning of data indicated which byte order is used for the file.

Signed-off-by: Takashi Sakamoto <o-takashi at sakamocchi.jp>
---
 axfer/Makefile.am           |   3 +-
 axfer/container-riff-wave.c | 575 ++++++++++++++++++++++++++++++++++++
 axfer/container.c           |   9 +-
 axfer/container.h           |   4 +
 4 files changed, 586 insertions(+), 5 deletions(-)
 create mode 100644 axfer/container-riff-wave.c

diff --git a/axfer/Makefile.am b/axfer/Makefile.am
index 3913d0c..092c966 100644
--- a/axfer/Makefile.am
+++ b/axfer/Makefile.am
@@ -24,4 +24,5 @@ axfer_SOURCES = \
 	main.c \
 	subcmd-list.c \
 	container.h \
-	container.c
+	container.c \
+	container-riff-wave.c
diff --git a/axfer/container-riff-wave.c b/axfer/container-riff-wave.c
new file mode 100644
index 00000000..db82493
--- /dev/null
+++ b/axfer/container-riff-wave.c
@@ -0,0 +1,575 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// container-riff-wave.c - a parser/builder for a container of RIFF/Wave File.
+//
+// Copyright (c) 2018 Takashi Sakamoto <o-takashi at sakamocchi.jp>
+//
+// Licensed under the terms of the GNU General Public License, version 2.
+
+#include "container.h"
+#include "misc.h"
+
+// Not portable to all of UNIX platforms.
+#include <endian.h>
+
+// References:
+// - 'Resource Interchange File Format (RIFF)' at msdn.microsoft.com
+// - 'Multiple channel audio data and WAVE files' at msdn.microsoft.com
+// - RFC 2361 'WAVE and AVI Codec Registries' at ietf.org
+// - 'mmreg.h' in Wine project
+// - 'mmreg.h' in ReactOS project
+
+#define RIFF_MAGIC		"RIF"	// A common part.
+
+#define RIFF_CHUNK_ID_LE	"RIFF"
+#define RIFF_CHUNK_ID_BE	"RIFX"
+#define RIFF_FORM_WAVE		"WAVE"
+#define FMT_SUBCHUNK_ID		"fmt "
+#define DATA_SUBCHUNK_ID	"data"
+
+// See 'WAVE and AVI Codec Registries (Historic Registry)' in 'iana.org'.
+// https://www.iana.org/assignments/wave-avi-codec-registry/
+enum wave_format {
+	WAVE_FORMAT_PCM			= 0x0001,
+	WAVE_FORMAT_ADPCM		= 0x0002,
+	WAVE_FORMAT_IEEE_FLOAT		= 0x0003,
+	WAVE_FORMAT_ALAW		= 0x0006,
+	WAVE_FORMAT_MULAW		= 0x0007,
+	WAVE_FORMAT_G723_ADPCM		= 0x0014,
+	// The others are not supported.
+};
+
+struct format_map {
+	enum wave_format wformat;
+	snd_pcm_format_t format;
+};
+
+static const struct format_map format_maps[] = {
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_U8},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S16_LE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S16_BE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S24_LE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S24_BE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S32_LE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S32_BE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S24_3LE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S24_3BE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S20_3LE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S20_3BE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S18_3LE},
+	{WAVE_FORMAT_PCM,	SND_PCM_FORMAT_S18_3BE},
+	{WAVE_FORMAT_IEEE_FLOAT, SND_PCM_FORMAT_FLOAT_LE},
+	{WAVE_FORMAT_IEEE_FLOAT, SND_PCM_FORMAT_FLOAT_BE},
+	{WAVE_FORMAT_IEEE_FLOAT, SND_PCM_FORMAT_FLOAT64_LE},
+	{WAVE_FORMAT_IEEE_FLOAT, SND_PCM_FORMAT_FLOAT64_BE},
+	{WAVE_FORMAT_ALAW,	SND_PCM_FORMAT_A_LAW},
+	{WAVE_FORMAT_MULAW,	SND_PCM_FORMAT_MU_LAW},
+	// Below sample formats are not currently supported, due to width of
+	// its sample.
+	//  - WAVE_FORMAT_ADPCM
+	//  - WAVE_FORMAT_G723_ADPCM
+	//  - WAVE_FORMAT_G723_ADPCM
+	//  - WAVE_FORMAT_G723_ADPCM
+	//  - WAVE_FORMAT_G723_ADPCM
+};
+
+struct riff_chunk {
+	uint8_t id[4];
+	uint32_t size;
+
+	uint8_t data[0];
+};
+
+struct riff_chunk_data {
+	uint8_t id[4];
+
+	uint8_t subchunks[0];
+};
+
+struct riff_subchunk {
+	uint8_t id[4];
+	uint32_t size;
+
+	uint8_t data[0];
+};
+
+struct wave_fmt_subchunk {
+	uint8_t id[4];
+	uint32_t size;
+
+	uint16_t format;
+	uint16_t samples_per_frame;
+	uint32_t frames_per_second;
+	uint32_t average_bytes_per_second;
+	uint16_t bytes_per_frame;
+	uint16_t bits_per_sample;
+	uint8_t extension[0];
+};
+
+struct wave_data_subchunk {
+	uint8_t id[4];
+	uint32_t size;
+
+	uint8_t frames[0];
+};
+
+struct parser_state {
+	bool be;
+	enum wave_format format;
+	unsigned int samples_per_frame;
+	unsigned int frames_per_second;
+	unsigned int average_bytes_per_second;
+	unsigned int bytes_per_frame;
+	unsigned int bytes_per_sample;
+	unsigned int avail_bits_in_sample;
+	unsigned int byte_count;
+};
+
+static int parse_riff_chunk_header(struct parser_state *state,
+				   struct riff_chunk *chunk,
+				   uint64_t *byte_count)
+{
+	if (!memcmp(chunk->id, RIFF_CHUNK_ID_BE, sizeof(chunk->id)))
+		state->be = true;
+	else if (!memcmp(chunk->id, RIFF_CHUNK_ID_LE, sizeof(chunk->id)))
+		state->be = false;
+	else
+		return -EINVAL;
+
+	if (state->be)
+		*byte_count = be32toh(chunk->size);
+	else
+		*byte_count = le32toh(chunk->size);
+
+	return 0;
+}
+
+static int parse_riff_chunk(struct container_context *cntr,
+			    uint64_t *byte_count)
+{
+	struct parser_state *state = cntr->private_data;
+	union {
+		struct riff_chunk chunk;
+		struct riff_chunk_data chunk_data;
+	} buf = {0};
+	int err;
+
+	// Chunk header. 4 bytes were alread read to detect container type.
+	memcpy(buf.chunk.id, cntr->magic, sizeof(cntr->magic));
+	err = container_recursive_read(cntr,
+				       (char *)&buf.chunk + sizeof(cntr->magic),
+				       sizeof(buf.chunk) - sizeof(cntr->magic));
+	if (err < 0)
+		return err;
+	if (cntr->eof)
+		return 0;
+
+	err = parse_riff_chunk_header(state, &buf.chunk, byte_count);
+	if (err < 0)
+		return err;
+
+	// Chunk data header.
+	err = container_recursive_read(cntr, &buf, sizeof(buf.chunk_data));
+	if (err < 0)
+		return err;
+	if (cntr->eof)
+		return 0;
+
+	if (memcmp(buf.chunk_data.id, RIFF_FORM_WAVE,
+		   sizeof(buf.chunk_data.id)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int parse_wave_fmt_subchunk(struct parser_state *state,
+				   struct wave_fmt_subchunk *subchunk)
+{
+	if (state->be) {
+		state->format = be16toh(subchunk->format);
+		state->samples_per_frame = be16toh(subchunk->samples_per_frame);
+		state->frames_per_second = be32toh(subchunk->frames_per_second);
+		state->average_bytes_per_second =
+				be32toh(subchunk->average_bytes_per_second);
+		state->bytes_per_frame = be16toh(subchunk->bytes_per_frame);
+		state->avail_bits_in_sample =
+					be16toh(subchunk->bits_per_sample);
+	} else {
+		state->format = le16toh(subchunk->format);
+		state->samples_per_frame = le16toh(subchunk->samples_per_frame);
+		state->frames_per_second = le32toh(subchunk->frames_per_second);
+		state->average_bytes_per_second =
+				le32toh(subchunk->average_bytes_per_second);
+		state->bytes_per_frame = le16toh(subchunk->bytes_per_frame);
+		state->avail_bits_in_sample =
+					le16toh(subchunk->bits_per_sample);
+	}
+
+	if (state->average_bytes_per_second !=
+			state->bytes_per_frame * state->frames_per_second)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int parse_wave_data_subchunk(struct parser_state *state,
+				    struct wave_data_subchunk *subchunk)
+{
+	if (state->be)
+		state->byte_count = be32toh(subchunk->size);
+	else
+		state->byte_count = le32toh(subchunk->size);
+
+	return 0;
+}
+
+static int parse_wave_subchunk(struct container_context *cntr)
+{
+	union {
+		struct riff_subchunk subchunk;
+		struct wave_fmt_subchunk fmt_subchunk;
+		struct wave_data_subchunk data_subchunk;
+	} buf = {0};
+	enum {
+		SUBCHUNK_TYPE_UNKNOWN = -1,
+		SUBCHUNK_TYPE_FMT,
+		SUBCHUNK_TYPE_DATA,
+	} subchunk_type;
+	struct parser_state *state = cntr->private_data;
+	unsigned int required_size;
+	unsigned int subchunk_data_size;
+	int err;
+
+	while (1) {
+		err = container_recursive_read(cntr, &buf,
+					       sizeof(buf.subchunk));
+		if (err < 0)
+			return err;
+		if (cntr->eof)
+			return 0;
+
+		// Calculate the size of subchunk data.
+		if (state->be)
+			subchunk_data_size = be32toh(buf.subchunk.size);
+		else
+			subchunk_data_size = le32toh(buf.subchunk.size);
+
+		// Detect type of subchunk.
+		if (!memcmp(buf.subchunk.id, FMT_SUBCHUNK_ID,
+			    sizeof(buf.subchunk.id))) {
+			subchunk_type = SUBCHUNK_TYPE_FMT;
+		} else if (!memcmp(buf.subchunk.id, DATA_SUBCHUNK_ID,
+				   sizeof(buf.subchunk.id))) {
+			subchunk_type = SUBCHUNK_TYPE_DATA;
+		} else {
+			subchunk_type = SUBCHUNK_TYPE_UNKNOWN;
+		}
+
+		if (subchunk_type != SUBCHUNK_TYPE_UNKNOWN) {
+			// Parse data of this subchunk.
+			if (subchunk_type == SUBCHUNK_TYPE_FMT) {
+				required_size =
+					sizeof(struct wave_fmt_subchunk) -
+					sizeof(struct riff_chunk);
+			} else {
+				required_size =
+					sizeof(struct wave_data_subchunk)-
+					sizeof(struct riff_chunk);
+			}
+
+			if (subchunk_data_size < required_size)
+				return -EINVAL;
+
+			err = container_recursive_read(cntr, &buf.subchunk.data,
+						       required_size);
+			if (err < 0)
+				return err;
+			if (cntr->eof)
+				return 0;
+			subchunk_data_size -= required_size;
+
+			if (subchunk_type == SUBCHUNK_TYPE_FMT) {
+				err = parse_wave_fmt_subchunk(state,
+							&buf.fmt_subchunk);
+			} else if (subchunk_type == SUBCHUNK_TYPE_DATA) {
+				err = parse_wave_data_subchunk(state,
+							 &buf.data_subchunk);
+			}
+			if (err < 0)
+				return err;
+
+			// Found frame data.
+			if (subchunk_type == SUBCHUNK_TYPE_DATA)
+				break;
+		}
+
+		// Go to next subchunk.
+		while (subchunk_data_size > 0) {
+			unsigned int consume;
+
+			if (subchunk_data_size > sizeof(buf))
+				consume = sizeof(buf);
+			else
+				consume = subchunk_data_size;
+
+			err = container_recursive_read(cntr, &buf, consume);
+			if (err < 0)
+				return err;
+			if (cntr->eof)
+				return 0;
+			subchunk_data_size -= consume;
+		}
+	}
+
+	return 0;
+}
+
+static int parse_riff_wave_format(struct container_context *cntr)
+{
+	uint64_t byte_count;
+	int err;
+
+	err = parse_riff_chunk(cntr, &byte_count);
+	if (err < 0)
+		return err;
+
+	err = parse_wave_subchunk(cntr);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int wave_parser_pre_process(struct container_context *cntr,
+				   snd_pcm_format_t *format,
+				   unsigned int *samples_per_frame,
+				   unsigned int *frames_per_second,
+				   uint64_t *byte_count)
+{
+	struct parser_state *state = cntr->private_data;
+	int phys_width;
+	const struct format_map *map;
+	int i;
+	int err;
+
+	err = parse_riff_wave_format(cntr);
+	if (err < 0)
+		return err;
+
+	phys_width = 8 * state->average_bytes_per_second /
+		     state->samples_per_frame / state->frames_per_second;
+
+	for (i = 0; i < ARRAY_SIZE(format_maps); ++i) {
+		map = &format_maps[i];
+		if (state->format != map->wformat)
+			continue;
+		if (state->avail_bits_in_sample !=
+					snd_pcm_format_width(map->format))
+			continue;
+		if (phys_width != snd_pcm_format_physical_width(map->format))
+			continue;
+
+		if (state->be && snd_pcm_format_big_endian(map->format) != 1)
+			continue;
+
+		break;
+	}
+	if (i == ARRAY_SIZE(format_maps))
+		return -EINVAL;
+
+	// Set parameters.
+	*format = format_maps[i].format;
+	*samples_per_frame = state->samples_per_frame;
+	*frames_per_second = state->frames_per_second;
+	phys_width /= 8;
+	*byte_count = state->byte_count;
+
+	return 0;
+}
+
+struct builder_state {
+	bool be;
+	enum wave_format format;
+	unsigned int avail_bits_in_sample;
+	unsigned int bytes_per_sample;
+	unsigned int samples_per_frame;
+	unsigned int frames_per_second;
+};
+
+static void build_riff_chunk_header(struct riff_chunk *chunk,
+				    uint64_t byte_count, bool be)
+{
+	uint64_t data_size = sizeof(struct riff_chunk_data) +
+			     sizeof(struct wave_fmt_subchunk) +
+			     sizeof(struct wave_data_subchunk) + byte_count;
+
+	if (be) {
+		memcpy(chunk->id, RIFF_CHUNK_ID_BE, sizeof(chunk->id));
+		chunk->size = htobe32(data_size);
+	} else {
+		memcpy(chunk->id, RIFF_CHUNK_ID_LE, sizeof(chunk->id));
+		chunk->size = htole32(data_size);
+	}
+}
+
+static void build_subchunk_header(struct riff_subchunk *subchunk,
+				  const char *const form, uint64_t size,
+				  bool be)
+{
+	memcpy(subchunk->id, form, sizeof(subchunk->id));
+	if (be)
+		subchunk->size = htobe32(size);
+	else
+		subchunk->size = htole32(size);
+}
+
+static void build_wave_format_subchunk(struct wave_fmt_subchunk *subchunk,
+				       struct builder_state *state)
+{
+	unsigned int bytes_per_frame =
+			state->bytes_per_sample * state->samples_per_frame;
+	unsigned int average_bytes_per_second = state->bytes_per_sample *
+			state->samples_per_frame * state->frames_per_second;
+	uint64_t size;
+
+	// No extensions.
+	size = sizeof(struct wave_fmt_subchunk) - sizeof(struct riff_subchunk);
+	build_subchunk_header((struct riff_subchunk *)subchunk, FMT_SUBCHUNK_ID,
+			      size, state->be);
+
+	if (state->be) {
+		subchunk->format = htobe16(state->format);
+		subchunk->samples_per_frame = htobe16(state->samples_per_frame);
+		subchunk->frames_per_second = htobe32(state->frames_per_second);
+		subchunk->average_bytes_per_second =
+					htobe32(average_bytes_per_second);
+		subchunk->bytes_per_frame = htobe16(bytes_per_frame);
+		subchunk->bits_per_sample =
+					htobe16(state->avail_bits_in_sample);
+	} else {
+		subchunk->format = htole16(state->format);
+		subchunk->samples_per_frame = htole16(state->samples_per_frame);
+		subchunk->frames_per_second = htole32(state->frames_per_second);
+		subchunk->average_bytes_per_second =
+					htole32(average_bytes_per_second);
+		subchunk->bytes_per_frame = htole16(bytes_per_frame);
+		subchunk->bits_per_sample =
+					htole16(state->avail_bits_in_sample);
+	}
+}
+
+static void build_wave_data_subchunk(struct wave_data_subchunk *subchunk,
+				     uint64_t byte_count, bool be)
+{
+	build_subchunk_header((struct riff_subchunk *)subchunk,
+			      DATA_SUBCHUNK_ID, byte_count, be);
+}
+
+static int write_riff_chunk_for_wave(struct container_context *cntr,
+				     uint64_t byte_count)
+{
+	struct builder_state *state = cntr->private_data;
+	union {
+		struct riff_chunk chunk;
+		struct riff_chunk_data chunk_data;
+		struct wave_fmt_subchunk fmt_subchunk;
+		struct wave_data_subchunk data_subchunk;
+	} buf = {0};
+	uint64_t total_byte_count;
+	int err;
+
+	// Chunk header.
+	total_byte_count = sizeof(struct riff_chunk_data) +
+			   sizeof(struct wave_fmt_subchunk) +
+			   sizeof(struct wave_data_subchunk);
+	if (byte_count > cntr->max_size - total_byte_count)
+		total_byte_count = cntr->max_size;
+	else
+		total_byte_count += byte_count;
+	build_riff_chunk_header(&buf.chunk, total_byte_count, state->be);
+	err = container_recursive_write(cntr, &buf, sizeof(buf.chunk));
+	if (err < 0)
+		return err;
+
+	// Chunk data header.
+	memcpy(buf.chunk_data.id, RIFF_FORM_WAVE, sizeof(buf.chunk_data.id));
+	err = container_recursive_write(cntr, &buf, sizeof(buf.chunk_data));
+	if (err < 0)
+		return err;
+
+	// A subchunk in the chunk data for WAVE format.
+	build_wave_format_subchunk(&buf.fmt_subchunk, state);
+	err = container_recursive_write(cntr, &buf, sizeof(buf.fmt_subchunk));
+	if (err < 0)
+		return err;
+
+	// A subchunk in the chunk data for WAVE data.
+	build_wave_data_subchunk(&buf.data_subchunk, byte_count, state->be);
+	return container_recursive_write(cntr, &buf, sizeof(buf.data_subchunk));
+}
+
+static int wave_builder_pre_process(struct container_context *cntr,
+				    snd_pcm_format_t *format,
+				    unsigned int *samples_per_frame,
+				    unsigned int *frames_per_second,
+				    uint64_t *byte_count)
+{
+	struct builder_state *state = cntr->private_data;
+	int i;
+
+	// Validate parameters.
+	for (i = 0; i < ARRAY_SIZE(format_maps); ++i) {
+		if (format_maps[i].format == *format)
+			break;
+	}
+	if (i == ARRAY_SIZE(format_maps))
+		return -EINVAL;
+
+	state->format = format_maps[i].wformat;
+	state->avail_bits_in_sample = snd_pcm_format_width(*format);
+	state->bytes_per_sample = snd_pcm_format_physical_width(*format) / 8;
+	state->samples_per_frame = *samples_per_frame;
+	state->frames_per_second = *frames_per_second;
+
+	state->be = (snd_pcm_format_big_endian(*format) == 1);
+
+	return write_riff_chunk_for_wave(cntr, *byte_count);
+}
+
+static int wave_builder_post_process(struct container_context *cntr,
+				     uint64_t handled_byte_count)
+{
+	int err;
+
+	err = container_seek_offset(cntr, 0);
+	if (err < 0)
+		return err;
+
+	return write_riff_chunk_for_wave(cntr, handled_byte_count);
+}
+
+const struct container_parser container_parser_riff_wave = {
+	.format = CONTAINER_FORMAT_RIFF_WAVE,
+	.magic =  RIFF_MAGIC,
+	.max_size = UINT32_MAX -
+		    sizeof(struct riff_chunk_data) -
+		    sizeof(struct wave_fmt_subchunk) -
+		    sizeof(struct wave_data_subchunk),
+	.ops = {
+		.pre_process	= wave_parser_pre_process,
+	},
+	.private_size = sizeof(struct parser_state),
+};
+
+const struct container_builder container_builder_riff_wave = {
+	.format = CONTAINER_FORMAT_RIFF_WAVE,
+	.max_size = UINT32_MAX -
+		    sizeof(struct riff_chunk_data) -
+		    sizeof(struct wave_fmt_subchunk) -
+		    sizeof(struct wave_data_subchunk),
+	.ops = {
+		.pre_process	= wave_builder_pre_process,
+		.post_process	= wave_builder_post_process,
+	},
+	.private_size = sizeof(struct builder_state),
+};
diff --git a/axfer/container.c b/axfer/container.c
index 77bbd6c..04042da 100644
--- a/axfer/container.c
+++ b/axfer/container.c
@@ -20,13 +20,14 @@ static const char *const cntr_type_labels[] = {
 };
 
 static const char *const cntr_format_labels[] = {
-	[CONTAINER_FORMAT_COUNT] = "",
+	[CONTAINER_FORMAT_RIFF_WAVE] = "riff/wave",
 };
 
 static const char *const suffixes[] = {
-	[CONTAINER_FORMAT_COUNT] = "",
+	[CONTAINER_FORMAT_RIFF_WAVE] = ".wav",
 };
 
+
 const char *const container_suffix_from_format(enum container_format format)
 {
 	return suffixes[format];
@@ -140,7 +141,7 @@ int container_parser_init(struct container_context *cntr,
 			  const char *const path, unsigned int verbose)
 {
 	const struct container_parser *parsers[] = {
-		NULL,
+		[CONTAINER_FORMAT_RIFF_WAVE] = &container_parser_riff_wave,
 	};
 	const struct container_parser *parser;
 	unsigned int size;
@@ -214,7 +215,7 @@ int container_builder_init(struct container_context *cntr,
 			   unsigned int verbose)
 {
 	const struct container_builder *builders[] = {
-		NULL,
+		[CONTAINER_FORMAT_RIFF_WAVE] = &container_builder_riff_wave,
 	};
 	const struct container_builder *builder;
 	int err;
diff --git a/axfer/container.h b/axfer/container.h
index 4a94d7f..5808933 100644
--- a/axfer/container.h
+++ b/axfer/container.h
@@ -25,6 +25,7 @@ enum container_type {
 };
 
 enum container_format {
+	CONTAINER_FORMAT_RIFF_WAVE = 0,
 	CONTAINER_FORMAT_COUNT,
 };
 
@@ -107,4 +108,7 @@ int container_recursive_write(struct container_context *cntr, void *buf,
 			      unsigned int byte_count);
 int container_seek_offset(struct container_context *cntr, off64_t offset);
 
+extern const struct container_parser container_parser_riff_wave;
+extern const struct container_builder container_builder_riff_wave;
+
 #endif
-- 
2.19.1



More information about the Alsa-devel mailing list