#region Copyright & License Information /* * Copyright 2007-2010 The OpenRA Developers (see AUTHORS) * This file is part of OpenRA, which is free software. It is made * available to you under the terms of the GNU General Public License * as published by the Free Software Foundation. For more information, * see LICENSE. */ #endregion using System; using System.IO; namespace OpenRA.FileFormats { // A reimplementation of the Blast routines included in zlib public static class Blast { public static readonly int MAXBITS = 13; // maximum code length public static readonly int MAXWIN = 4096; // maximum window size /* * Decode a code from the stream s using huffman table h. Return the symbol or * a negative value if there is an error. If all of the lengths are zero, i.e. * an empty code, or if the code is incomplete and an invalid code is received, * then -9 is returned after reading MAXBITS bits. * * Format notes: * * - The codes as stored in the compressed data are bit-reversed relative to * a simple integer ordering of codes of the same lengths. Hence below the * bits are pulled from the compressed data one at a time and used to * build the code value reversed from what is in the stream in order to * permit simple integer comparisons for decoding. * * - The first code for the shortest length is all ones. Subsequent codes of * the same length are simply integer decrements of the previous code. When * moving up a length, a one bit is appended to the code. For a complete * code, the last code of the longest length will be all zeros. To support * this ordering, the bits pulled during decoding are inverted to apply the * more "natural" ordering starting with all zeros and incrementing. */ private static int Decode(Huffman h, BitReader br) { int code = 0; // len bits being decoded int first = 0; // first code of length len int index = 0; // index of first code of length len in symbol table short next = 1; while (true) { code |= br.ReadBits(1) ^ 1; // invert code int count = h.Count[next++]; if (code < first + count) return h.Symbol[index + (code - first)]; index += count; first += count; first <<= 1; code <<= 1; } } /* * Decode PKWare Compression Library stream. * * Format notes: * * - First byte is 0 if literals are uncoded or 1 if they are coded. Second * byte is 4, 5, or 6 for the number of extra bits in the distance code. * This is the base-2 logarithm of the dictionary size minus six. * * - Compressed data is a combination of literals and length/distance pairs * terminated by an end code. Literals are either Huffman coded or * uncoded bytes. A length/distance pair is a coded length followed by a * coded distance to represent a string that occurs earlier in the * uncompressed data that occurs again at the current location. * * - A bit preceding a literal or length/distance pair indicates which comes * next, 0 for literals, 1 for length/distance. * * - If literals are uncoded, then the next eight bits are the literal, in the * normal bit order in th stream, i.e. no bit-reversal is needed. Similarly, * no bit reversal is needed for either the length extra bits or the distance * extra bits. * * - Literal bytes are simply written to the output. A length/distance pair is * an instruction to copy previously uncompressed bytes to the output. The * copy is from distance bytes back in the output stream, copying for length * bytes. * * - Distances pointing before the beginning of the output data are not * permitted. * * - Overlapped copies, where the length is greater than the distance, are * allowed and common. For example, a distance of one and a length of 518 * simply copies the last byte 518 times. A distance of four and a length of * twelve copies the last four bytes three times. A simple forward copy * ignoring whether the length is greater than the distance or not implements * this correctly. */ static byte[] litlen = new byte[] { 11, 124, 8, 7, 28, 7, 188, 13, 76, 4, 10, 8, 12, 10, 12, 10, 8, 23, 8, 9, 7, 6, 7, 8, 7, 6, 55, 8, 23, 24, 12, 11, 7, 9, 11, 12, 6, 7, 22, 5, 7, 24, 6, 11, 9, 6, 7, 22, 7, 11, 38, 7, 9, 8, 25, 11, 8, 11, 9, 12, 8, 12, 5, 38, 5, 38, 5, 11, 7, 5, 6, 21, 6, 10, 53, 8, 7, 24, 10, 27, 44, 253, 253, 253, 252, 252, 252, 13, 12, 45, 12, 45, 12, 61, 12, 45, 44, 173 }; // bit lengths of length codes 0..15 static byte[] lenlen = new byte[] { 2, 35, 36, 53, 38, 23 }; // bit lengths of distance codes 0..63 static byte[] distlen = new byte[] { 2, 20, 53, 230, 247, 151, 248 }; // base for length codes static short[] lengthbase = new short[] { 3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264 }; // extra bits for length codes static byte[] extra = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8 }; static Huffman litcode = new Huffman(litlen, litlen.Length, 256); static Huffman lencode = new Huffman(lenlen, lenlen.Length, 16); static Huffman distcode = new Huffman(distlen, distlen.Length, 64); public static byte[] Decompress(byte[] src) { BitReader br = new BitReader(src); // Are literals coded? int coded = br.ReadBits(8); if (coded < 0 || coded > 1) throw new NotImplementedException("Invalid datastream"); bool EncodedLiterals = (coded == 1); // log2(dictionary size) - 6 int dict = br.ReadBits(8); if (dict < 4 || dict > 6) throw new InvalidDataException("Invalid dictionary size"); // output state ushort next = 0; // index of next write location in out[] bool first = true; // true to check distances (for first 4K) byte[] outBuffer = new byte[MAXWIN]; // output buffer and sliding window var ms = new MemoryStream(); // decode literals and length/distance pairs do { // length/distance pair if (br.ReadBits(1) == 1) { // Length int symbol = Decode(lencode, br); int len = lengthbase[symbol] + br.ReadBits(extra[symbol]); if (len == 519) // Magic number for "done" { for (int i = 0; i < next; i++) ms.WriteByte(outBuffer[i]); break; } // Distance symbol = len == 2 ? 2 : dict; int dist = Decode(distcode, br) << symbol; dist += br.ReadBits(symbol); dist++; if (first && dist > next) throw new InvalidDataException("Attempt to jump before data"); // copy length bytes from distance bytes back do { int dest = next; int source = dest - dist; int copy = MAXWIN; if (next < dist) { source += copy; copy = dist; } copy -= next; if (copy > len) copy = len; len -= copy; next += (ushort)copy; Array.Copy(outBuffer, source, outBuffer, dest, copy); // Flush window to outstream if (next == MAXWIN) { for (int i = 0; i < next; i++) ms.WriteByte(outBuffer[i]); next = 0; first = false; } } while (len != 0); } else // literal value { int symbol = EncodedLiterals ? Decode(litcode, br) : br.ReadBits(8); outBuffer[next++] = (byte)symbol; if (next == MAXWIN) { for (int i = 0; i < next; i++) ms.WriteByte(outBuffer[i]); next = 0; first = false; } } } while (true); return ms.ToArray(); } } class BitReader { readonly byte[] src; int offset = 0; int bitBuffer = 0; int bitCount = 0; public BitReader(byte[] src) { this.src = src; } public int ReadBits(int count) { int ret = 0; int filled = 0; while (filled < count) { if (bitCount == 0) { bitBuffer = src[offset++]; bitCount = 8; } ret |= (bitBuffer & 1) << filled; bitBuffer >>= 1; bitCount--; filled++; } return ret; } } /* * Given a list of repeated code lengths rep[0..n-1], where each byte is a * count (high four bits + 1) and a code length (low four bits), generate the * list of code lengths. This compaction reduces the size of the object code. * Then given the list of code lengths length[0..n-1] representing a canonical * Huffman code for n symbols, construct the tables required to decode those * codes. Those tables are the number of codes of each length, and the symbols * sorted by length, retaining their original order within each length. */ class Huffman { public short[] Count; // number of symbols of each length public short[] Symbol; // canonically ordered symbols public Huffman(byte[] rep, int n, short SymbolCount) { short[] length = new short[256]; // code lengths int s = 0; // current symbol // convert compact repeat counts into symbol bit length list foreach (byte code in rep) { int num = (code >> 4) + 1; // Number of codes (top four bits plus 1) byte len = (byte)(code & 15); // Code length (low four bits) do { length[s++] = len; } while (--num > 0); } n = s; // count number of codes of each length Count = new short[Blast.MAXBITS + 1]; for (int i = 0; i < n; i++) Count[length[i]]++; // no codes! if (Count[0] == n) return; // check for an over-subscribed or incomplete set of lengths int left = 1; // one possible code of zero length for (int len = 1; len <= Blast.MAXBITS; len++) { left <<= 1; // one more bit, double codes left left -= Count[len]; // deduct count from possible codes if (left < 0) throw new InvalidDataException ("over subscribed code set"); } // generate offsets into symbol table for each length for sorting short[] offs = new short[Blast.MAXBITS + 1]; for (int len = 1; len < Blast.MAXBITS; len++) offs[len + 1] = (short)(offs[len] + Count[len]); // put symbols in table sorted by length, by symbol order within each length Symbol = new short[SymbolCount]; for (short i = 0; i < n; i++) if (length[i] != 0) Symbol[offs[length[i]]++] = i; } } }