pak

pak.git
git clone git://git.lenczewski.org/pak.git
Log | Files | Refs | README | LICENSE

pakfmt.h (8596B)


      1 #ifndef PAKFMT_H
      2 #define PAKFMT_H
      3 
      4 #include <stddef.h>
      5 #include <stdint.h>
      6 
      7 #include "list.h"
      8 
      9 /* Our PAK0 format:
     10  * ---
     11  *  NOTE: the format is little-endian, so will not work naively across
     12  *        platforms with varying endianness; natively generated pak files
     13  *        will always be readable, but a pak file generated by a naive
     14  *        big-endian host will not be readable by a naive little-endian host.
     15  *
     16  *  NOTE: the on-disk format follows C member alignment rules, to allow reading
     17  *        and writing the format easily (via memcpy() or similar). this may
     18  *        leave small "reserved" fields in certain structures to ensure alignment,
     19  *        and the specific value of these fields MUST be ignored by implementations.
     20  *
     21  *  NOTE: the format limits the index segement to 4 GiB in size (by using a u32
     22  *        field for storing the uncompressed index size). it does not however
     23  *        limit the size of the data segment (by using a u64 field for storing
     24  *        the uncompressed data size). this is deemed acceptable, as each
     25  *        archive entry should contain no more than a small archive header (80
     26  *        bytes), and a variable, user-defined number of tags (again of
     27  *        variable, user-defined sizes).
     28  *
     29  *        if you have too many archives for a single pak file, you likely
     30  *        contain hundreds of thousands of archives (each with tens of
     31  *        thousands of tags), at which point the suggestion is to split the
     32  *        pak; parsing such a large index is guaranteed to be slow, so by
     33  *        necessity you will want to split it on a per-stage / per-level basis.
     34  *
     35  *  NOTE: the index can either preceede the data, or follow it. preceeding the
     36  *        data makes the format simpler to understand and parse (you can naively
     37  *        load the first N bytes instead of seeking to the end and rewinding),
     38  *        but following the data means that the contents can be easily modified
     39  *        (by stripping the index, appending to the data segment, and rewriting
     40  *        the index). this is a quality-of-implementation issue, and not
     41  *        specified by the format.
     42  *
     43  *  Compression type enum
     44  *  +---------+
     45  *  | None: 0 |
     46  *  +---------+
     47  *  | LZ4:  1 |
     48  *  +---------+
     49  *
     50  *  Header Format
     51  *  +--------------------------------+---------------------------------------+
     52  *  | Magic Value: char[4], "PAK0"   |                                       |
     53  *  +--------------------------------+---------------------------------------+
     54  *  | Header Length: u32             | Byte length of header                 |
     55  *  +--------------------------------+---------------------------------------+
     56  *  | Index File Offset: u64         |                                       |
     57  *  +--------------------------------+---------------------------------------+
     58  *  | Index Compression: u32         |                                       |
     59  *  +--------------------------------+---------------------------------------+
     60  *  | Index Compressed Length: u32   |                                       |
     61  *  +--------------------------------+---------------------------------------+
     62  *  | Index Uncompressed Length: u32 |                                       |
     63  *  +--------------------------------+---------------------------------------+
     64  *  | Index Uncompressed CRC32c: u32 |                                       |
     65  *  +--------------------------------+---------------------------------------+
     66  *  | Data File Offset: u64          |                                       |
     67  *  +--------------------------------+---------------------------------------+
     68  *  | Data Compressed Length: u64    |                                       |
     69  *  +--------------------------------+---------------------------------------+
     70  *
     71  *  Index Format
     72  *  +--------------------------+---------------------------------------------+
     73  *  | Index Entries: Archive[] | Array of archives contained in pak file     |
     74  *  +--------------------------+---------------------------------------------+
     75  *
     76  *  Archive Format
     77  *  +-----------------------------------+------------------------------------+
     78  *  | Archive Name: char[32]            | Short, human readable name         |
     79  *  +-----------------------------------+------------------------------------+
     80  *  | Archive Type: u32                 | User-defined archive type ID       |
     81  *  +-----------------------------------+------------------------------------+
     82  *  | Archive Length: u32               | Length of archive header + tags    |
     83  *  +-----------------------------------+------------------------------------+
     84  *  | Data Offset: u64                  | Byte offset of archive data from   |
     85  *  |                                   | start of data segment              |
     86  *  +-----------------------------------+------------------------------------+
     87  *  | Data Compressed Length: u64       |                                    |
     88  *  +-----------------------------------+------------------------------------+
     89  *  | Data Uncompressed Length: u64     |                                    |
     90  *  +-----------------------------------+------------------------------------+
     91  *  | Data Uncompressed CRC32c: u32     |                                    |
     92  *  +-----------------------------------+------------------------------------+
     93  *  | Data Compression: u32             |                                    |
     94  *  +-----------------------------------+------------------------------------+
     95  *  | Reserved: u32                     |                                    |
     96  *  +-----------------------------------+------------------------------------+
     97  *  | Tag Count: u32                    | Number of metadata tags            |
     98  *  +-----------------------------------+------------------------------------+
     99  *  | Tags: Tag[]                       |                                    |
    100  *  +-----------------------------------+------------------------------------+
    101  *
    102  *  Tag Format
    103  *  +-------------------------------------+----------------------------------+
    104  *  | Tag Name: char[32]                  | Short, human readable name       |
    105  *  +-------------------------------------+----------------------------------+
    106  *  | Tag Type: u16                       | User-defined tag type ID         |
    107  *  +-------------------------------------+----------------------------------+
    108  *  | Tag Length: u16                     | Byte length of tag data          |
    109  *  +-------------------------------------+----------------------------------+
    110  *  | Tag Data: u8[]                      |                                  |
    111  *  +-------------------------------------+----------------------------------+
    112  *
    113  *  Data Format
    114  *  +------------+-----------------------------------------------------------+
    115  *  | Data: u8[] | Raw (compressed) contents of an archive                   |
    116  *  +------------+-----------------------------------------------------------+
    117  *
    118  */
    119 
    120 enum pak_compression_type {
    121 	PAK_COMPRESSION_NONE,
    122 	PAK_COMPRESSION_LZ4,
    123 };
    124 
    125 #define PAK_MAGIC "PAK0"
    126 #define PAK_MAGIC_LEN 4
    127 
    128 #define PAK_HEADER_PRELUDE_SIZE \
    129 	(sizeof(char[PAK_MAGIC_LEN]) + sizeof(uint32_t))
    130 
    131 #define PAK_HEADER_SIZE sizeof(struct pakfmt_header)
    132 
    133 struct pakfmt_header {
    134 	char magic[PAK_MAGIC_LEN];
    135 	uint32_t header_length;
    136 
    137 	uint64_t index_file_offset;
    138 	uint32_t index_compression;
    139 	uint32_t index_compressed_length;
    140 	uint32_t index_uncompressed_length;
    141 	uint32_t index_uncompressed_crc32c;
    142 
    143 	uint64_t data_file_offset;
    144 	uint64_t data_compressed_length;
    145 };
    146 
    147 #define PAK_ARCHIVE_NAME_LEN 32
    148 #define PAK_ARCHIVE_PRELUDE_SIZE \
    149 	(sizeof(char[PAK_ARCHIVE_NAME_LEN]) + sizeof(uint32_t) + sizeof(uint32_t))
    150 
    151 #define PAK_ARCHIVE_SIZE sizeof(struct pakfmt_archive)
    152 
    153 struct pakfmt_archive {
    154 	char name[PAK_ARCHIVE_NAME_LEN];
    155 	uint32_t type;
    156 	uint32_t length;
    157 
    158 	uint64_t data_offset;
    159 	uint64_t data_compressed_length;
    160 	uint64_t data_uncompressed_length;
    161 	uint32_t data_uncompressed_crc32c;
    162 	uint32_t data_compression;
    163 
    164 	uint32_t reserved;
    165 
    166 	uint32_t tag_count;
    167 };
    168 
    169 #define PAK_TAG_NAME_LEN 32
    170 #define PAK_TAG_PRELUDE_SIZE \
    171 	(sizeof(char[PAK_TAG_NAME_LEN]) + sizeof(uint16_t) + sizeof(uint16_t))
    172 
    173 #define PAK_TAG_SIZE sizeof(struct pakfmt_tag)
    174 
    175 struct pakfmt_tag {
    176 	char name[PAK_TAG_NAME_LEN];
    177 	uint16_t type;
    178 	uint16_t length;
    179 };
    180 
    181 struct pak_archive {
    182 	struct pakfmt_archive header;
    183 
    184 	struct list_node tags;
    185 
    186 	uint8_t *data;
    187 
    188 	struct list_node list_node;
    189 };
    190 
    191 struct pak_tag {
    192 	struct pakfmt_tag header;
    193 
    194 	uint8_t *data;
    195 
    196 	struct list_node list_node;
    197 };
    198 
    199 struct pak {
    200 	struct pakfmt_header header;
    201 
    202 	struct list_node index;
    203 };
    204 
    205 #endif /* PAKFMT_H */