From 50157c43de15ef962a2b59da51dbb9d0534f6e92 Mon Sep 17 00:00:00 2001 From: Paul Chote Date: Tue, 9 Nov 2010 18:32:32 +1300 Subject: [PATCH] Port Blast from zlib/contribute to c# --- OpenRA.FileFormats/FileFormats/Blast.cs | 341 +++++++++++++++++++ OpenRA.FileFormats/OpenRA.FileFormats.csproj | 1 + 2 files changed, 342 insertions(+) create mode 100644 OpenRA.FileFormats/FileFormats/Blast.cs diff --git a/OpenRA.FileFormats/FileFormats/Blast.cs b/OpenRA.FileFormats/FileFormats/Blast.cs new file mode 100644 index 0000000000..571f6a2c86 --- /dev/null +++ b/OpenRA.FileFormats/FileFormats/Blast.cs @@ -0,0 +1,341 @@ +#region Copyright & License Information +/* + * Copyright 2007-2010 The OpenRA Developers (see AUTHORS) + * This file is part of OpenRA, which is free software. It is made + * available to you under the terms of the GNU General Public License + * as published by the Free Software Foundation. For more information, + * see LICENSE. + */ +#endregion + +using System; +using System.IO; + +namespace OpenRA.FileFormats +{ + // A reimplementation of the Blast routines included in zlib + class Blast + { + public const int MAXBITS = 13; // maximum code length + public const int MAXWIN = 4096; // maximum window size + + Stream InStream; + Stream OutStream; + public Blast(Stream inStream, Stream outStream) + { + InStream = inStream; + OutStream = outStream; + Decompress(); + } + + /* + * Return `need' bits from the input stream. This always leaves less than + * eight bits in the buffer. bits() works properly for need == 0. + * + * Format notes: + * + * - Bits are stored in bytes from the least significant bit to the most + * significant bit. Therefore bits are dropped from the bottom of the bit + * buffer, using shift right, and new bytes are appended to the top of the + * bit buffer, using shift left. + */ + int bitBuffer; + byte bitCount; + private int GetBits(int count) + { + int ret = 0; + int filled = 0; + while (filled < count) + { + if (bitCount == 0) + { + bitBuffer = InStream.ReadByte(); + bitCount = 8; + } + + ret |= (bitBuffer & 1) << filled; + bitBuffer >>= 1; + bitCount--; + filled++; + } + return ret; + } + + /* + * Decode a code from the stream s using huffman table h. Return the symbol or + * a negative value if there is an error. If all of the lengths are zero, i.e. + * an empty code, or if the code is incomplete and an invalid code is received, + * then -9 is returned after reading MAXBITS bits. + * + * Format notes: + * + * - The codes as stored in the compressed data are bit-reversed relative to + * a simple integer ordering of codes of the same lengths. Hence below the + * bits are pulled from the compressed data one at a time and used to + * build the code value reversed from what is in the stream in order to + * permit simple integer comparisons for decoding. + * + * - The first code for the shortest length is all ones. Subsequent codes of + * the same length are simply integer decrements of the previous code. When + * moving up a length, a one bit is appended to the code. For a complete + * code, the last code of the longest length will be all zeros. To support + * this ordering, the bits pulled during decoding are inverted to apply the + * more "natural" ordering starting with all zeros and incrementing. + */ + private int Decode(Huffman h) + { + int count; // number of codes of length len + int code = 0; // len bits being decoded + int first = 0; // first code of length len + int index = 0; // index of first code of length len in symbol table + int len = 1; // current number of bits in code + short next = 1; + while (true) + { + code |= GetBits(1) ^ 1; // invert code + count = h.Count[next++]; + if (code < first + count) + return h.Symbol[index + (code - first)]; + + index += count; + first += count; + first <<= 1; + code <<= 1; + len++; + } + } + + /* + * Decode PKWare Compression Library stream. + * + * Format notes: + * + * - First byte is 0 if literals are uncoded or 1 if they are coded. Second + * byte is 4, 5, or 6 for the number of extra bits in the distance code. + * This is the base-2 logarithm of the dictionary size minus six. + * + * - Compressed data is a combination of literals and length/distance pairs + * terminated by an end code. Literals are either Huffman coded or + * uncoded bytes. A length/distance pair is a coded length followed by a + * coded distance to represent a string that occurs earlier in the + * uncompressed data that occurs again at the current location. + * + * - A bit preceding a literal or length/distance pair indicates which comes + * next, 0 for literals, 1 for length/distance. + * + * - If literals are uncoded, then the next eight bits are the literal, in the + * normal bit order in th stream, i.e. no bit-reversal is needed. Similarly, + * no bit reversal is needed for either the length extra bits or the distance + * extra bits. + * + * - Literal bytes are simply written to the output. A length/distance pair is + * an instruction to copy previously uncompressed bytes to the output. The + * copy is from distance bytes back in the output stream, copying for length + * bytes. + * + * - Distances pointing before the beginning of the output data are not + * permitted. + * + * - Overlapped copies, where the length is greater than the distance, are + * allowed and common. For example, a distance of one and a length of 518 + * simply copies the last byte 518 times. A distance of four and a length of + * twelve copies the last four bytes three times. A simple forward copy + * ignoring whether the length is greater than the distance or not implements + * this correctly. + */ + + static Huffman litcode = null; + static Huffman lencode = null; + static Huffman distcode = null; + + static byte[] litlen = new byte[] { + 11, 124, 8, 7, 28, 7, 188, 13, 76, 4, + 10, 8, 12, 10, 12, 10, 8, 23, 8, 9, + 7, 6, 7, 8, 7, 6, 55, 8, 23, 24, + 12, 11, 7, 9, 11, 12, 6, 7, 22, 5, + 7, 24, 6, 11, 9, 6, 7, 22, 7, 11, + 38, 7, 9, 8, 25, 11, 8, 11, 9, 12, + 8, 12, 5, 38, 5, 38, 5, 11, 7, 5, + 6, 21, 6, 10, 53, 8, 7, 24, 10, 27, + 44, 253, 253, 253, 252, 252, 252, 13, 12, 45, + 12, 45, 12, 61, 12, 45, 44, 173 + }; + + // bit lengths of length codes 0..15 + static byte[] lenlen = new byte[] { 2, 35, 36, 53, 38, 23 }; + + // bit lengths of distance codes 0..63 + static byte[] distlen = new byte[] { 2, 20, 53, 230, 247, 151, 248 }; + + // base for length codes + static short[] lengthbase = new short[] { + 3, 2, 4, 5, 6, 7, 8, 9, 10, 12, + 16, 24, 40, 72, 136, 264 + }; + + // extra bits for length codes + static byte[] extra = new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, + 3, 4, 5, 6, 7, 8 + }; + + + public void Decompress() + { + // Init the Huffman tables + if (litcode == null) + { + litcode = new Huffman(litlen, litlen.Length, 256); + lencode = new Huffman(lenlen, lenlen.Length, 16); + distcode = new Huffman(distlen, distlen.Length, 64); + } + + // Are literals coded? + int coded = InStream.ReadByte(); + if (coded < 0 || coded > 1) + throw new NotImplementedException("Invalid datastream"); + bool EncodedLiterals = (coded == 1); + + // log2(dictionary size) - 6 + int dict = InStream.ReadByte(); + if (dict < 4 || dict > 6) + throw new InvalidDataException("Invalid dictionary size"); + + // output state + ushort next = 0; // index of next write location in out[] + bool first = true; // true to check distances (for first 4K) + byte[] outBuffer = new byte[MAXWIN]; // output buffer and sliding window + + // decode literals and length/distance pairs + do + { + // length/distance pair + if (GetBits(1) == 1) + { + // Length + int symbol = Decode(lencode); + int len = lengthbase[symbol] + GetBits(extra[symbol]); + if (len == 519) // Magic number for "done" + break; + + // Distance + symbol = len == 2 ? 2 : dict; + int dist = Decode (distcode) << symbol; + dist += GetBits (symbol); + dist++; + + if (first && dist > next) + throw new InvalidDataException("Attempt to jump before data"); + + // copy length bytes from distance bytes back + do + { + int dest = next; + int source = dest - dist; + + int copy = MAXWIN; + if (next < dist) { + source += copy; + copy = dist; + } + + copy -= next; + if (copy > len) + copy = len; + + len -= copy; + next += (ushort)copy; + Array.Copy (outBuffer, source, outBuffer, dest, copy); + + // Flush window to outstream + if (next == MAXWIN) + { + foreach (var b in outBuffer) + OutStream.WriteByte (b); + next = 0; + first = false; + } + } while (len != 0); + + } + else // literal value + { + int symbol = EncodedLiterals ? Decode(litcode) : GetBits(8); + outBuffer[next++] = (byte)symbol; + if (next == MAXWIN) + { + foreach (var b in outBuffer) + OutStream.WriteByte(b); + next = 0; + first = false; + } + } + } while (true); + } + } + + /* + * Given a list of repeated code lengths rep[0..n-1], where each byte is a + * count (high four bits + 1) and a code length (low four bits), generate the + * list of code lengths. This compaction reduces the size of the object code. + * Then given the list of code lengths length[0..n-1] representing a canonical + * Huffman code for n symbols, construct the tables required to decode those + * codes. Those tables are the number of codes of each length, and the symbols + * sorted by length, retaining their original order within each length. + */ + class Huffman + { + public short[] Count; // number of symbols of each length + public short[] Symbol; // canonically ordered symbols + + public Huffman(byte[] rep, int n, short SymbolCount) + { + short[] length = new short[256]; // code lengths + int s = 0; // current symbol + + // convert compact repeat counts into symbol bit length list + foreach (byte code in rep) + { + int num = (code >> 4) + 1; // Number of codes (top four bits plus 1) + byte len = (byte)(code & 15); // Code length (low four bits) + do + { + length[s++] = len; + } while (--num > 0); + } + n = s; + + // count number of codes of each length + Count = new short[Blast.MAXBITS + 1]; + for (int i = 0; i < n; i++) + Count[length[i]]++; + + // no codes! + if (Count[0] == n) + return; + + // check for an over-subscribed or incomplete set of lengths + int left = 1; // one possible code of zero length + for (int len = 1; len <= Blast.MAXBITS; len++) + { + left <<= 1; + // one more bit, double codes left + left -= Count[len]; + // deduct count from possible codes + if (left < 0) + throw new InvalidDataException ("over subscribed code set"); + } + + // generate offsets into symbol table for each length for sorting + short[] offs = new short[Blast.MAXBITS + 1]; + for (int len = 1; len < Blast.MAXBITS; len++) + offs[len + 1] = (short)(offs[len] + Count[len]); + + // put symbols in table sorted by length, by symbol order within each length + Symbol = new short[SymbolCount]; + for (short i = 0; i < n; i++) + if (length[i] != 0) + Symbol[offs[length[i]]++] = i; + } + } +} diff --git a/OpenRA.FileFormats/OpenRA.FileFormats.csproj b/OpenRA.FileFormats/OpenRA.FileFormats.csproj index 0473182f40..9e933b8a3e 100644 --- a/OpenRA.FileFormats/OpenRA.FileFormats.csproj +++ b/OpenRA.FileFormats/OpenRA.FileFormats.csproj @@ -104,6 +104,7 @@ +