pakfmt.h (8596B)
1 #ifndef PAKFMT_H 2 #define PAKFMT_H 3 4 #include <stddef.h> 5 #include <stdint.h> 6 7 #include "list.h" 8 9 /* Our PAK0 format: 10 * --- 11 * NOTE: the format is little-endian, so will not work naively across 12 * platforms with varying endianness; natively generated pak files 13 * will always be readable, but a pak file generated by a naive 14 * big-endian host will not be readable by a naive little-endian host. 15 * 16 * NOTE: the on-disk format follows C member alignment rules, to allow reading 17 * and writing the format easily (via memcpy() or similar). this may 18 * leave small "reserved" fields in certain structures to ensure alignment, 19 * and the specific value of these fields MUST be ignored by implementations. 20 * 21 * NOTE: the format limits the index segement to 4 GiB in size (by using a u32 22 * field for storing the uncompressed index size). it does not however 23 * limit the size of the data segment (by using a u64 field for storing 24 * the uncompressed data size). this is deemed acceptable, as each 25 * archive entry should contain no more than a small archive header (80 26 * bytes), and a variable, user-defined number of tags (again of 27 * variable, user-defined sizes). 28 * 29 * if you have too many archives for a single pak file, you likely 30 * contain hundreds of thousands of archives (each with tens of 31 * thousands of tags), at which point the suggestion is to split the 32 * pak; parsing such a large index is guaranteed to be slow, so by 33 * necessity you will want to split it on a per-stage / per-level basis. 34 * 35 * NOTE: the index can either preceede the data, or follow it. preceeding the 36 * data makes the format simpler to understand and parse (you can naively 37 * load the first N bytes instead of seeking to the end and rewinding), 38 * but following the data means that the contents can be easily modified 39 * (by stripping the index, appending to the data segment, and rewriting 40 * the index). this is a quality-of-implementation issue, and not 41 * specified by the format. 42 * 43 * Compression type enum 44 * +---------+ 45 * | None: 0 | 46 * +---------+ 47 * | LZ4: 1 | 48 * +---------+ 49 * 50 * Header Format 51 * +--------------------------------+---------------------------------------+ 52 * | Magic Value: char[4], "PAK0" | | 53 * +--------------------------------+---------------------------------------+ 54 * | Header Length: u32 | Byte length of header | 55 * +--------------------------------+---------------------------------------+ 56 * | Index File Offset: u64 | | 57 * +--------------------------------+---------------------------------------+ 58 * | Index Compression: u32 | | 59 * +--------------------------------+---------------------------------------+ 60 * | Index Compressed Length: u32 | | 61 * +--------------------------------+---------------------------------------+ 62 * | Index Uncompressed Length: u32 | | 63 * +--------------------------------+---------------------------------------+ 64 * | Index Uncompressed CRC32c: u32 | | 65 * +--------------------------------+---------------------------------------+ 66 * | Data File Offset: u64 | | 67 * +--------------------------------+---------------------------------------+ 68 * | Data Compressed Length: u64 | | 69 * +--------------------------------+---------------------------------------+ 70 * 71 * Index Format 72 * +--------------------------+---------------------------------------------+ 73 * | Index Entries: Archive[] | Array of archives contained in pak file | 74 * +--------------------------+---------------------------------------------+ 75 * 76 * Archive Format 77 * +-----------------------------------+------------------------------------+ 78 * | Archive Name: char[32] | Short, human readable name | 79 * +-----------------------------------+------------------------------------+ 80 * | Archive Type: u32 | User-defined archive type ID | 81 * +-----------------------------------+------------------------------------+ 82 * | Archive Length: u32 | Length of archive header + tags | 83 * +-----------------------------------+------------------------------------+ 84 * | Data Offset: u64 | Byte offset of archive data from | 85 * | | start of data segment | 86 * +-----------------------------------+------------------------------------+ 87 * | Data Compressed Length: u64 | | 88 * +-----------------------------------+------------------------------------+ 89 * | Data Uncompressed Length: u64 | | 90 * +-----------------------------------+------------------------------------+ 91 * | Data Uncompressed CRC32c: u32 | | 92 * +-----------------------------------+------------------------------------+ 93 * | Data Compression: u32 | | 94 * +-----------------------------------+------------------------------------+ 95 * | Reserved: u32 | | 96 * +-----------------------------------+------------------------------------+ 97 * | Tag Count: u32 | Number of metadata tags | 98 * +-----------------------------------+------------------------------------+ 99 * | Tags: Tag[] | | 100 * +-----------------------------------+------------------------------------+ 101 * 102 * Tag Format 103 * +-------------------------------------+----------------------------------+ 104 * | Tag Name: char[32] | Short, human readable name | 105 * +-------------------------------------+----------------------------------+ 106 * | Tag Type: u16 | User-defined tag type ID | 107 * +-------------------------------------+----------------------------------+ 108 * | Tag Length: u16 | Byte length of tag data | 109 * +-------------------------------------+----------------------------------+ 110 * | Tag Data: u8[] | | 111 * +-------------------------------------+----------------------------------+ 112 * 113 * Data Format 114 * +------------+-----------------------------------------------------------+ 115 * | Data: u8[] | Raw (compressed) contents of an archive | 116 * +------------+-----------------------------------------------------------+ 117 * 118 */ 119 120 enum pak_compression_type { 121 PAK_COMPRESSION_NONE, 122 PAK_COMPRESSION_LZ4, 123 }; 124 125 #define PAK_MAGIC "PAK0" 126 #define PAK_MAGIC_LEN 4 127 128 #define PAK_HEADER_PRELUDE_SIZE \ 129 (sizeof(char[PAK_MAGIC_LEN]) + sizeof(uint32_t)) 130 131 #define PAK_HEADER_SIZE sizeof(struct pakfmt_header) 132 133 struct pakfmt_header { 134 char magic[PAK_MAGIC_LEN]; 135 uint32_t header_length; 136 137 uint64_t index_file_offset; 138 uint32_t index_compression; 139 uint32_t index_compressed_length; 140 uint32_t index_uncompressed_length; 141 uint32_t index_uncompressed_crc32c; 142 143 uint64_t data_file_offset; 144 uint64_t data_compressed_length; 145 }; 146 147 #define PAK_ARCHIVE_NAME_LEN 32 148 #define PAK_ARCHIVE_PRELUDE_SIZE \ 149 (sizeof(char[PAK_ARCHIVE_NAME_LEN]) + sizeof(uint32_t) + sizeof(uint32_t)) 150 151 #define PAK_ARCHIVE_SIZE sizeof(struct pakfmt_archive) 152 153 struct pakfmt_archive { 154 char name[PAK_ARCHIVE_NAME_LEN]; 155 uint32_t type; 156 uint32_t length; 157 158 uint64_t data_offset; 159 uint64_t data_compressed_length; 160 uint64_t data_uncompressed_length; 161 uint32_t data_uncompressed_crc32c; 162 uint32_t data_compression; 163 164 uint32_t reserved; 165 166 uint32_t tag_count; 167 }; 168 169 #define PAK_TAG_NAME_LEN 32 170 #define PAK_TAG_PRELUDE_SIZE \ 171 (sizeof(char[PAK_TAG_NAME_LEN]) + sizeof(uint16_t) + sizeof(uint16_t)) 172 173 #define PAK_TAG_SIZE sizeof(struct pakfmt_tag) 174 175 struct pakfmt_tag { 176 char name[PAK_TAG_NAME_LEN]; 177 uint16_t type; 178 uint16_t length; 179 }; 180 181 struct pak_archive { 182 struct pakfmt_archive header; 183 184 struct list_node tags; 185 186 uint8_t *data; 187 188 struct list_node list_node; 189 }; 190 191 struct pak_tag { 192 struct pakfmt_tag header; 193 194 uint8_t *data; 195 196 struct list_node list_node; 197 }; 198 199 struct pak { 200 struct pakfmt_header header; 201 202 struct list_node index; 203 }; 204 205 #endif /* PAKFMT_H */