diff options
Diffstat (limited to 'doc/api-documentation/html/lzsscomprs_8cpp-source.html')
-rw-r--r-- | doc/api-documentation/html/lzsscomprs_8cpp-source.html | 679 |
1 files changed, 679 insertions, 0 deletions
diff --git a/doc/api-documentation/html/lzsscomprs_8cpp-source.html b/doc/api-documentation/html/lzsscomprs_8cpp-source.html new file mode 100644 index 0000000..6b34b6e --- /dev/null +++ b/doc/api-documentation/html/lzsscomprs_8cpp-source.html @@ -0,0 +1,679 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> +<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"> +<title>lzsscomprs.cpp Source File</title> +<link href="doxygen.css" rel="stylesheet" type="text/css"> +</head><body> +<!-- Generated by Doxygen 1.2.15 --> +<center> +<a class="qindex" href="index.html">Main Page</a> <a class="qindex" href="namespaces.html">Namespace List</a> <a class="qindex" href="hierarchy.html">Class Hierarchy</a> <a class="qindex" href="classes.html">Alphabetical List</a> <a class="qindex" href="annotated.html">Compound List</a> <a class="qindex" href="files.html">File List</a> <a class="qindex" href="functions.html">Compound Members</a> </center> +<hr><h1>lzsscomprs.cpp</h1><div class="fragment"><pre>00001 <font class="comment">/******************************************************************************</font> +00002 <font class="comment"> * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that</font> +00003 <font class="comment"> * provides LZSS compression</font> +00004 <font class="comment"> */</font> +00005 +00006 <font class="preprocessor">#include <string.h></font> +00007 <font class="preprocessor">#include <stdlib.h></font> +00008 <font class="preprocessor">#include <lzsscomprs.h></font> +00009 +00010 +00011 <font class="comment">/******************************************************************************</font> +00012 <font class="comment"> * LZSSCompress Statics</font> +00013 <font class="comment"> */</font> +00014 +00015 <font class="comment">// m_ring_buffer is a text buffer. It contains "nodes" of</font> +00016 <font class="comment">// uncompressed text that can be indexed by position. That is,</font> +00017 <font class="comment">// a substring of the ring buffer can be indexed by a position</font> +00018 <font class="comment">// and a length. When decoding, the compressed text may contain</font> +00019 <font class="comment">// a position in the ring buffer and a count of the number of</font> +00020 <font class="comment">// bytes from the ring buffer that are to be moved into the</font> +00021 <font class="comment">// uncompressed buffer. </font> +00022 <font class="comment">//</font> +00023 <font class="comment">// This ring buffer is not maintained as part of the compressed</font> +00024 <font class="comment">// text. Instead, it is reconstructed dynamically. That is,</font> +00025 <font class="comment">// it starts out empty and gets built as the text is decompressed.</font> +00026 <font class="comment">//</font> +00027 <font class="comment">// The ring buffer contain N bytes, with an additional F - 1 bytes</font> +00028 <font class="comment">// to facilitate string comparison.</font> +00029 +00030 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> LZSSCompress::m_ring_buffer[N + F - 1]; +00031 +00032 <font class="comment">// m_match_position and m_match_length are set by InsertNode().</font> +00033 <font class="comment">//</font> +00034 <font class="comment">// These variables indicate the position in the ring buffer </font> +00035 <font class="comment">// and the number of characters at that position that match</font> +00036 <font class="comment">// a given string.</font> +00037 +00038 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_match_position; +00039 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_match_length; +00040 +00041 <font class="comment">// m_lson, m_rson, and m_dad are the Japanese way of referring to</font> +00042 <font class="comment">// a tree structure. The dad is the parent and it has a right and</font> +00043 <font class="comment">// left son (child).</font> +00044 <font class="comment">//</font> +00045 <font class="comment">// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right </font> +00046 <font class="comment">// and left children of node i. </font> +00047 <font class="comment">//</font> +00048 <font class="comment">// For i = 0 to N-1, m_dad[i] is the parent of node i.</font> +00049 <font class="comment">//</font> +00050 <font class="comment">// For i = 0 to 255, rson[N + i + 1] is the root of the tree for </font> +00051 <font class="comment">// strings that begin with the character i. Note that this requires </font> +00052 <font class="comment">// one byte characters.</font> +00053 <font class="comment">//</font> +00054 <font class="comment">// These nodes store values of 0...(N-1). Memory requirements</font> +00055 <font class="comment">// can be reduces by using 2-byte integers instead of full 4-byte</font> +00056 <font class="comment">// integers (for 32-bit applications). Therefore, these are </font> +00057 <font class="comment">// defined as "short ints."</font> +00058 +00059 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_lson[N + 1]; +00060 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_rson[N + 257]; +00061 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_dad[N + 1]; +00062 +00063 +00064 <font class="comment">/******************************************************************************</font> +00065 <font class="comment"> * LZSSCompress Constructor - Initializes data for instance of LZSSCompress</font> +00066 <font class="comment"> *</font> +00067 <font class="comment"> */</font> +00068 +00069 LZSSCompress::LZSSCompress() : SWCompress() { +00070 } +00071 +00072 +00073 <font class="comment">/******************************************************************************</font> +00074 <font class="comment"> * LZSSCompress Destructor - Cleans up instance of LZSSCompress</font> +00075 <font class="comment"> */</font> +00076 +00077 LZSSCompress::~LZSSCompress() { +00078 } +00079 +00080 +00081 <font class="comment">/******************************************************************************</font> +00082 <font class="comment"> * LZSSCompress::InitTree - This function initializes the tree nodes to</font> +00083 <font class="comment"> * "empty" states. </font> +00084 <font class="comment"> */</font> +00085 +00086 <font class="keywordtype">void</font> LZSSCompress::InitTree(<font class="keywordtype">void</font>) { +00087 <font class="keywordtype">int</font> i; +00088 +00089 <font class="comment">// For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right</font> +00090 <font class="comment">// and left children of node i. These nodes need not be</font> +00091 <font class="comment">// initialized. However, for debugging purposes, it is nice to</font> +00092 <font class="comment">// have them initialized. Since this is only used for compression</font> +00093 <font class="comment">// (not decompression), I don't mind spending the time to do it.</font> +00094 <font class="comment">//</font> +00095 <font class="comment">// For the same range of i, m_dad[i] is the parent of node i.</font> +00096 <font class="comment">// These are initialized to a known value that can represent</font> +00097 <font class="comment">// a "not used" state.</font> +00098 +00099 <font class="keywordflow">for</font> (i = 0; i < N; i++) { +00100 m_lson[i] = NOT_USED; +00101 m_rson[i] = NOT_USED; +00102 m_dad[i] = NOT_USED; +00103 } +00104 +00105 <font class="comment">// For i = 0 to 255, m_rson[N + i + 1] is the root of the tree</font> +00106 <font class="comment">// for strings that begin with the character i. This is why</font> +00107 <font class="comment">// the right child array is larger than the left child array.</font> +00108 <font class="comment">// These are also initialzied to a "not used" state.</font> +00109 <font class="comment">//</font> +00110 <font class="comment">// Note that there are 256 of these, one for each of the possible</font> +00111 <font class="comment">// 256 characters.</font> +00112 +00113 <font class="keywordflow">for</font> (i = N + 1; i <= (N + 256); i++) { +00114 m_rson[i] = NOT_USED; +00115 } +00116 } +00117 +00118 +00119 <font class="comment">/******************************************************************************</font> +00120 <font class="comment"> * LZSSCompress::InsertNode - This function inserts a string from the ring</font> +00121 <font class="comment"> * buffer into one of the trees. It loads the</font> +00122 <font class="comment"> * match position and length member variables</font> +00123 <font class="comment"> * for the longest match.</font> +00124 <font class="comment"> * </font> +00125 <font class="comment"> * The string to be inserted is identified by</font> +00126 <font class="comment"> * the parameter Pos, A full F bytes are</font> +00127 <font class="comment"> * inserted. So,</font> +00128 <font class="comment"> * m_ring_buffer[Pos ... Pos+F-1]</font> +00129 <font class="comment"> * are inserted.</font> +00130 <font class="comment"> *</font> +00131 <font class="comment"> * If the matched length is exactly F, then an</font> +00132 <font class="comment"> * old node is removed in favor of the new one</font> +00133 <font class="comment"> * (because the old one will be deleted</font> +00134 <font class="comment"> * sooner).</font> +00135 <font class="comment"> *</font> +00136 <font class="comment"> * Note that Pos plays a dual role. It is</font> +00137 <font class="comment"> * used as both a position in the ring buffer</font> +00138 <font class="comment"> * and also as a tree node.</font> +00139 <font class="comment"> * m_ring_buffer[Pos] defines a character that</font> +00140 <font class="comment"> * is used to identify a tree node.</font> +00141 <font class="comment"> *</font> +00142 <font class="comment"> * ENT: pos - position in the buffer</font> +00143 <font class="comment"> */</font> +00144 +00145 <font class="keywordtype">void</font> LZSSCompress::InsertNode(<font class="keywordtype">short</font> <font class="keywordtype">int</font> Pos) +00146 { +00147 <font class="keywordtype">short</font> <font class="keywordtype">int</font> i; +00148 <font class="keywordtype">short</font> <font class="keywordtype">int</font> p; +00149 <font class="keywordtype">int</font> cmp; +00150 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> * key; +00151 +00152 <font class="comment">/*</font> +00153 <font class="comment"> ASSERT(Pos >= 0);</font> +00154 <font class="comment"> ASSERT(Pos < N);</font> +00155 <font class="comment">*/</font> +00156 +00157 cmp = 1; +00158 key = &(m_ring_buffer[Pos]); +00159 +00160 <font class="comment">// The last 256 entries in m_rson contain the root nodes for</font> +00161 <font class="comment">// strings that begin with a letter. Get an index for the</font> +00162 <font class="comment">// first letter in this string.</font> +00163 +00164 p = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) (N + 1 + key[0]); +00165 +00166 <font class="comment">// Set the left and right tree nodes for this position to "not</font> +00167 <font class="comment">// used."</font> +00168 +00169 m_lson[Pos] = NOT_USED; +00170 m_rson[Pos] = NOT_USED; +00171 +00172 <font class="comment">// Haven't matched anything yet.</font> +00173 +00174 m_match_length = 0; +00175 +00176 <font class="keywordflow">for</font> ( ; ; ) { +00177 <font class="keywordflow">if</font> (cmp >= 0) { +00178 <font class="keywordflow">if</font> (m_rson[p] != NOT_USED) { +00179 p = m_rson[p]; +00180 } +00181 <font class="keywordflow">else</font> { +00182 m_rson[p] = Pos; +00183 m_dad[Pos] = p; +00184 <font class="keywordflow">return</font>; +00185 } +00186 } +00187 <font class="keywordflow">else</font> { +00188 <font class="keywordflow">if</font> (m_lson[p] != NOT_USED) { +00189 p = m_lson[p]; +00190 } +00191 <font class="keywordflow">else</font> { +00192 m_lson[p] = Pos; +00193 m_dad[Pos] = p; +00194 <font class="keywordflow">return</font>; +00195 } +00196 } +00197 +00198 <font class="comment">// Should we go to the right or the left to look for the</font> +00199 <font class="comment">// next match?</font> +00200 +00201 <font class="keywordflow">for</font> (i = 1; i < F; i++) { +00202 cmp = key[i] - m_ring_buffer[p + i]; +00203 <font class="keywordflow">if</font> (cmp != 0) +00204 <font class="keywordflow">break</font>; +00205 } +00206 +00207 <font class="keywordflow">if</font> (i > m_match_length) { +00208 m_match_position = p; +00209 m_match_length = i; +00210 +00211 <font class="keywordflow">if</font> (i >= F) +00212 <font class="keywordflow">break</font>; +00213 } +00214 } +00215 +00216 m_dad[Pos] = m_dad[p]; +00217 m_lson[Pos] = m_lson[p]; +00218 m_rson[Pos] = m_rson[p]; +00219 +00220 m_dad[ m_lson[p] ] = Pos; +00221 m_dad[ m_rson[p] ] = Pos; +00222 +00223 <font class="keywordflow">if</font> (m_rson[ m_dad[p] ] == p) { +00224 m_rson[ m_dad[p] ] = Pos; +00225 } +00226 <font class="keywordflow">else</font> { +00227 m_lson[ m_dad[p] ] = Pos; +00228 } +00229 +00230 <font class="comment">// Remove "p"</font> +00231 +00232 m_dad[p] = NOT_USED; +00233 } +00234 +00235 +00236 <font class="comment">/******************************************************************************</font> +00237 <font class="comment"> * LZSSCompress::DeleteNode - This function removes the node "Node" from the</font> +00238 <font class="comment"> * tree.</font> +00239 <font class="comment"> *</font> +00240 <font class="comment"> * ENT: node - node to be removed</font> +00241 <font class="comment"> */</font> +00242 +00243 <font class="keywordtype">void</font> LZSSCompress::DeleteNode(<font class="keywordtype">short</font> <font class="keywordtype">int</font> Node) +00244 { +00245 <font class="keywordtype">short</font> <font class="keywordtype">int</font> q; +00246 +00247 <font class="comment">/*</font> +00248 <font class="comment"> ASSERT(Node >= 0);</font> +00249 <font class="comment"> ASSERT(Node < (N+1));</font> +00250 <font class="comment">*/</font> +00251 +00252 <font class="keywordflow">if</font> (m_dad[Node] == NOT_USED) { <font class="comment">// not in tree, nothing to do</font> +00253 <font class="keywordflow">return</font>; +00254 } +00255 +00256 <font class="keywordflow">if</font> (m_rson[Node] == NOT_USED) { +00257 q = m_lson[Node]; +00258 } +00259 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (m_lson[Node] == NOT_USED) { +00260 q = m_rson[Node]; +00261 } +00262 <font class="keywordflow">else</font> { +00263 q = m_lson[Node]; +00264 <font class="keywordflow">if</font> (m_rson[q] != NOT_USED) { +00265 <font class="keywordflow">do</font> { +00266 q = m_rson[q]; +00267 } <font class="keywordflow">while</font> (m_rson[q] != NOT_USED); +00268 +00269 m_rson[ m_dad[q] ] = m_lson[q]; +00270 m_dad[ m_lson[q] ] = m_dad[q]; +00271 m_lson[q] = m_lson[Node]; +00272 m_dad[ m_lson[Node] ] = q; +00273 } +00274 +00275 m_rson[q] = m_rson[Node]; +00276 m_dad[ m_rson[Node] ] = q; +00277 } +00278 +00279 m_dad[q] = m_dad[Node]; +00280 +00281 <font class="keywordflow">if</font> (m_rson[ m_dad[Node] ] == Node) { +00282 m_rson[ m_dad[Node] ] = q; +00283 } +00284 <font class="keywordflow">else</font> { +00285 m_lson[ m_dad[Node] ] = q; +00286 } +00287 +00288 m_dad[Node] = NOT_USED; +00289 } +00290 +00291 +00292 <font class="comment">/******************************************************************************</font> +00293 <font class="comment"> * LZSSCompress::Encode - This function "encodes" the input stream into the</font> +00294 <font class="comment"> * output stream.</font> +00295 <font class="comment"> * The GetChars() and SendChars() functions are</font> +00296 <font class="comment"> * used to separate this method from the actual</font> +00297 <font class="comment"> * i/o.</font> +00298 <font class="comment"> * NOTE: must set zlen for parent class to know length of</font> +00299 <font class="comment"> * compressed buffer.</font> +00300 <font class="comment"> */</font> +00301 +00302 <font class="keywordtype">void</font> LZSSCompress::Encode(<font class="keywordtype">void</font>) +00303 { +00304 <font class="keywordtype">short</font> <font class="keywordtype">int</font> i; <font class="comment">// an iterator</font> +00305 <font class="keywordtype">short</font> <font class="keywordtype">int</font> r; <font class="comment">// node number in the binary tree</font> +00306 <font class="keywordtype">short</font> <font class="keywordtype">int</font> s; <font class="comment">// position in the ring buffer</font> +00307 <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> <font class="keywordtype">int</font> len; <font class="comment">// len of initial string</font> +00308 <font class="keywordtype">short</font> <font class="keywordtype">int</font> last_match_length; <font class="comment">// length of last match</font> +00309 <font class="keywordtype">short</font> <font class="keywordtype">int</font> code_buf_pos; <font class="comment">// position in the output buffer</font> +00310 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> code_buf[17]; <font class="comment">// the output buffer</font> +00311 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> mask; <font class="comment">// bit mask for byte 0 of out buf</font> +00312 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> c; <font class="comment">// character read from string</font> +00313 +00314 <font class="comment">// Start with a clean tree.</font> +00315 +00316 InitTree(); +00317 direct = 0; <font class="comment">// set direction needed by parent [Get|Send]Chars()</font> +00318 +00319 <font class="comment">// code_buf[0] works as eight flags. A "1" represents that the</font> +00320 <font class="comment">// unit is an unencoded letter (1 byte), and a "0" represents</font> +00321 <font class="comment">// that the next unit is a <position,length> pair (2 bytes).</font> +00322 <font class="comment">//</font> +00323 <font class="comment">// code_buf[1..16] stores eight units of code. Since the best</font> +00324 <font class="comment">// we can do is store eight <position,length> pairs, at most 16 </font> +00325 <font class="comment">// bytes are needed to store this.</font> +00326 <font class="comment">//</font> +00327 <font class="comment">// This is why the maximum size of the code buffer is 17 bytes.</font> +00328 +00329 code_buf[0] = 0; +00330 code_buf_pos = 1; +00331 +00332 <font class="comment">// Mask iterates over the 8 bits in the code buffer. The first</font> +00333 <font class="comment">// character ends up being stored in the low bit.</font> +00334 <font class="comment">//</font> +00335 <font class="comment">// bit 8 7 6 5 4 3 2 1</font> +00336 <font class="comment">// | |</font> +00337 <font class="comment">// | first sequence in code buffer</font> +00338 <font class="comment">// |</font> +00339 <font class="comment">// last sequence in code buffer </font> +00340 +00341 mask = 1; +00342 +00343 s = 0; +00344 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) N - (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) F; +00345 +00346 <font class="comment">// Initialize the ring buffer with spaces...</font> +00347 +00348 <font class="comment">// Note that the last F bytes of the ring buffer are not filled.</font> +00349 <font class="comment">// This is because those F bytes will be filled in immediately</font> +00350 <font class="comment">// with bytes from the input stream.</font> +00351 +00352 memset(m_ring_buffer, <font class="charliteral">' '</font>, N - F); +00353 +00354 <font class="comment">// Read F bytes into the last F bytes of the ring buffer.</font> +00355 <font class="comment">//</font> +00356 <font class="comment">// This function loads the buffer with X characters and returns</font> +00357 <font class="comment">// the actual amount loaded.</font> +00358 +00359 len = GetChars((<font class="keywordtype">char</font> *) &(m_ring_buffer[r]), F); +00360 +00361 <font class="comment">// Make sure there is something to be compressed.</font> +00362 +00363 <font class="keywordflow">if</font> (len == 0) +00364 <font class="keywordflow">return</font>; +00365 +00366 <font class="comment">// Insert the F strings, each of which begins with one or more</font> +00367 <font class="comment">// 'space' characters. Note the order in which these strings</font> +00368 <font class="comment">// are inserted. This way, degenerate trees will be less likely</font> +00369 <font class="comment">// to occur.</font> +00370 +00371 <font class="keywordflow">for</font> (i = 1; i <= F; i++) { +00372 InsertNode((<font class="keywordtype">short</font> <font class="keywordtype">int</font>) (r - i)); +00373 } +00374 +00375 <font class="comment">// Finally, insert the whole string just read. The</font> +00376 <font class="comment">// member variables match_length and match_position are set.</font> +00377 +00378 InsertNode(r); +00379 +00380 <font class="comment">// Now that we're preloaded, continue till done.</font> +00381 +00382 <font class="keywordflow">do</font> { +00383 +00384 <font class="comment">// m_match_length may be spuriously long near the end of</font> +00385 <font class="comment">// text.</font> +00386 +00387 <font class="keywordflow">if</font> (m_match_length > len) { +00388 m_match_length = len; +00389 } +00390 +00391 <font class="comment">// Is it cheaper to store this as a single character? If so,</font> +00392 <font class="comment">// make it so.</font> +00393 +00394 <font class="keywordflow">if</font> (m_match_length < THRESHOLD) { +00395 <font class="comment">// Send one character. Remember that code_buf[0] is the</font> +00396 <font class="comment">// set of flags for the next eight items.</font> +00397 +00398 m_match_length = 1; +00399 code_buf[0] |= mask; +00400 code_buf[code_buf_pos++] = m_ring_buffer[r]; +00401 } +00402 +00403 <font class="comment">// Otherwise, we do indeed have a string that can be stored</font> +00404 <font class="comment">// compressed to save space.</font> +00405 +00406 <font class="keywordflow">else</font> { +00407 <font class="comment">// The next 16 bits need to contain the position (12 bits)</font> +00408 <font class="comment">// and the length (4 bits).</font> +00409 +00410 code_buf[code_buf_pos++] = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) m_match_position; +00411 code_buf[code_buf_pos++] = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) ( +00412 ((m_match_position >> 4) & 0xf0) | +00413 (m_match_length - THRESHOLD) ); +00414 } +00415 +00416 <font class="comment">// Shift the mask one bit to the left so that it will be ready</font> +00417 <font class="comment">// to store the new bit.</font> +00418 +00419 mask = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) (mask << 1); +00420 +00421 <font class="comment">// If the mask is now 0, then we know that we have a full set</font> +00422 <font class="comment">// of flags and items in the code buffer. These need to be</font> +00423 <font class="comment">// output.</font> +00424 +00425 <font class="keywordflow">if</font> (!mask) { +00426 <font class="comment">// code_buf is the buffer of characters to be output.</font> +00427 <font class="comment">// code_buf_pos is the number of characters it contains.</font> +00428 +00429 SendChars((<font class="keywordtype">char</font> *) code_buf, code_buf_pos); +00430 +00431 <font class="comment">// Reset for next buffer...</font> +00432 +00433 code_buf[0] = 0; +00434 code_buf_pos = 1; +00435 mask = 1; +00436 } +00437 +00438 last_match_length = m_match_length; +00439 +00440 <font class="comment">// Delete old strings and read new bytes...</font> +00441 +00442 <font class="keywordflow">for</font> (i = 0; i < last_match_length; i++) { +00443 <font class="comment">// Get next character...</font> +00444 +00445 <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) &c, 1) != 1) +00446 <font class="keywordflow">break</font>; +00447 +00448 <font class="comment">// Delete "old strings"</font> +00449 +00450 DeleteNode(s); +00451 +00452 <font class="comment">// Put this character into the ring buffer.</font> +00453 <font class="comment">// </font> +00454 <font class="comment">// The original comment here says "If the position is near</font> +00455 <font class="comment">// the end of the buffer, extend the buffer to make</font> +00456 <font class="comment">// string comparison easier."</font> +00457 <font class="comment">//</font> +00458 <font class="comment">// That's a little misleading, because the "end" of the </font> +00459 <font class="comment">// buffer is really what we consider to be the "beginning"</font> +00460 <font class="comment">// of the buffer, that is, positions 0 through F.</font> +00461 <font class="comment">//</font> +00462 <font class="comment">// The idea is that the front end of the buffer is duplicated</font> +00463 <font class="comment">// into the back end so that when you're looking at characters</font> +00464 <font class="comment">// at the back end of the buffer, you can index ahead (beyond</font> +00465 <font class="comment">// the normal end of the buffer) and see the characters</font> +00466 <font class="comment">// that are at the front end of the buffer wihtout having</font> +00467 <font class="comment">// to adjust the index.</font> +00468 <font class="comment">//</font> +00469 <font class="comment">// That is...</font> +00470 <font class="comment">//</font> +00471 <font class="comment">// 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234</font> +00472 <font class="comment">// | | |</font> +00473 <font class="comment">// position 0 end of buffer |</font> +00474 <font class="comment">// |</font> +00475 <font class="comment">// duplicate of front of buffer</font> +00476 +00477 m_ring_buffer[s] = c; +00478 +00479 <font class="keywordflow">if</font> (s < F - 1) { +00480 m_ring_buffer[s + N] = c; +00481 } +00482 +00483 <font class="comment">// Increment the position, and wrap around when we're at</font> +00484 <font class="comment">// the end. Note that this relies on N being a power of 2.</font> +00485 +00486 s = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (s + 1) & (N - 1) ); +00487 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) & (N - 1) ); +00488 +00489 <font class="comment">// Register the string that is found in </font> +00490 <font class="comment">// m_ring_buffer[r..r+F-1].</font> +00491 +00492 InsertNode(r); +00493 } +00494 +00495 <font class="comment">// If we didn't quit because we hit the last_match_length,</font> +00496 <font class="comment">// then we must have quit because we ran out of characters</font> +00497 <font class="comment">// to process.</font> +00498 +00499 <font class="keywordflow">while</font> (i++ < last_match_length) { +00500 DeleteNode(s); +00501 +00502 s = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (s + 1) & (N - 1) ); +00503 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) & (N - 1) ); +00504 +00505 <font class="comment">// Note that len hitting 0 is the key that causes the</font> +00506 <font class="comment">// do...while() to terminate. This is the only place</font> +00507 <font class="comment">// within the loop that len is modified.</font> +00508 <font class="comment">//</font> +00509 <font class="comment">// Its original value is F (or a number less than F for</font> +00510 <font class="comment">// short strings).</font> +00511 +00512 <font class="keywordflow">if</font> (--len) { +00513 InsertNode(r); <font class="comment">/* buffer may not be empty. */</font> +00514 } +00515 } +00516 +00517 <font class="comment">// End of do...while() loop. Continue processing until there</font> +00518 <font class="comment">// are no more characters to be compressed. The variable</font> +00519 <font class="comment">// "len" is used to signal this condition.</font> +00520 } <font class="keywordflow">while</font> (len > 0); +00521 +00522 <font class="comment">// There could still be something in the output buffer. Send it</font> +00523 <font class="comment">// now.</font> +00524 +00525 <font class="keywordflow">if</font> (code_buf_pos > 1) { +00526 <font class="comment">// code_buf is the encoded string to send.</font> +00527 <font class="comment">// code_buf_ptr is the number of characters.</font> +00528 +00529 SendChars((<font class="keywordtype">char</font> *) code_buf, code_buf_pos); +00530 } +00531 +00532 +00533 <font class="comment">// must set zlen for parent class to know length of compressed buffer</font> +00534 zlen = zpos; +00535 } +00536 +00537 +00538 <font class="comment">/******************************************************************************</font> +00539 <font class="comment"> * LZSSCompress::Decode - This function "decodes" the input stream into the</font> +00540 <font class="comment"> * output stream.</font> +00541 <font class="comment"> * The GetChars() and SendChars() functions are</font> +00542 <font class="comment"> * used to separate this method from the actual</font> +00543 <font class="comment"> * i/o.</font> +00544 <font class="comment"> */</font> +00545 +00546 <font class="keywordtype">void</font> LZSSCompress::Decode(<font class="keywordtype">void</font>) +00547 { +00548 <font class="keywordtype">int</font> k; +00549 <font class="keywordtype">int</font> r; <font class="comment">// node number</font> +00550 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> c[F]; <font class="comment">// an array of chars</font> +00551 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> flags; <font class="comment">// 8 bits of flags</font> +00552 <font class="keywordtype">int</font> flag_count; <font class="comment">// which flag we're on</font> +00553 <font class="keywordtype">short</font> <font class="keywordtype">int</font> pos; <font class="comment">// position in the ring buffer</font> +00554 <font class="keywordtype">short</font> <font class="keywordtype">int</font> len; <font class="comment">// number of chars in ring buffer</font> +00555 <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> totalLen = 0; +00556 +00557 direct = 1; <font class="comment">// set direction needed by parent [Get|Send]Chars()</font> +00558 +00559 <font class="comment">// Initialize the ring buffer with a common string.</font> +00560 <font class="comment">//</font> +00561 <font class="comment">// Note that the last F bytes of the ring buffer are not filled.</font> +00562 +00563 memset(m_ring_buffer, <font class="charliteral">' '</font>, N - F); +00564 +00565 r = N - F; +00566 +00567 flags = (char) 0; +00568 flag_count = 0; +00569 +00570 <font class="keywordflow">for</font> ( ; ; ) { +00571 +00572 <font class="comment">// If there are more bits of interest in this flag, then</font> +00573 <font class="comment">// shift that next interesting bit into the 1's position.</font> +00574 <font class="comment">//</font> +00575 <font class="comment">// If this flag has been exhausted, the next byte must </font> +00576 <font class="comment">// be a flag.</font> +00577 +00578 <font class="keywordflow">if</font> (flag_count > 0) { +00579 flags = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) (flags >> 1); +00580 flag_count--; +00581 } +00582 <font class="keywordflow">else</font> { +00583 <font class="comment">// Next byte must be a flag.</font> +00584 +00585 <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) &flags, 1) != 1) +00586 <font class="keywordflow">break</font>; +00587 +00588 <font class="comment">// Set the flag counter. While at first it might appear</font> +00589 <font class="comment">// that this should be an 8 since there are 8 bits in the</font> +00590 <font class="comment">// flag, it should really be a 7 because the shift must</font> +00591 <font class="comment">// be performed 7 times in order to see all 8 bits.</font> +00592 +00593 flag_count = 7; +00594 } +00595 +00596 <font class="comment">// If the low order bit of the flag is now set, then we know</font> +00597 <font class="comment">// that the next byte is a single, unencoded character.</font> +00598 +00599 <font class="keywordflow">if</font> (flags & 1) { +00600 <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) c, 1) != 1) +00601 <font class="keywordflow">break</font>; +00602 +00603 <font class="keywordflow">if</font> (SendChars((<font class="keywordtype">char</font> *) c, 1) != 1) { +00604 totalLen++; +00605 <font class="keywordflow">break</font>; +00606 } +00607 +00608 <font class="comment">// Add to buffer, and increment to next spot. Wrap at end.</font> +00609 +00610 m_ring_buffer[r] = c[0]; +00611 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) & (N - 1) ); +00612 } +00613 +00614 <font class="comment">// Otherwise, we know that the next two bytes are a</font> +00615 <font class="comment">// <position,length> pair. The position is in 12 bits and</font> +00616 <font class="comment">// the length is in 4 bits.</font> +00617 +00618 <font class="keywordflow">else</font> { +00619 <font class="comment">// Original code:</font> +00620 <font class="comment">// if ((i = getc(infile)) == EOF)</font> +00621 <font class="comment">// break;</font> +00622 <font class="comment">// if ((j = getc(infile)) == EOF)</font> +00623 <font class="comment">// break;</font> +00624 <font class="comment">// i |= ((j & 0xf0) << 4); </font> +00625 <font class="comment">// j = (j & 0x0f) + THRESHOLD;</font> +00626 <font class="comment">//</font> +00627 <font class="comment">// I've modified this to only make one input call, and</font> +00628 <font class="comment">// have changed the variable names to something more</font> +00629 <font class="comment">// obvious.</font> +00630 +00631 <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) c, 2) != 2) +00632 <font class="keywordflow">break</font>; +00633 +00634 <font class="comment">// Convert these two characters into the position and</font> +00635 <font class="comment">// length. Note that the length is always at least</font> +00636 <font class="comment">// THRESHOLD, which is why we're able to get a length</font> +00637 <font class="comment">// of 18 out of only 4 bits.</font> +00638 +00639 pos = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( c[0] | ((c[1] & 0xf0) << 4) ); +00640 +00641 len = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (c[1] & 0x0f) + THRESHOLD ); +00642 +00643 <font class="comment">// There are now "len" characters at position "pos" in</font> +00644 <font class="comment">// the ring buffer that can be pulled out. Note that</font> +00645 <font class="comment">// len is never more than F.</font> +00646 +00647 <font class="keywordflow">for</font> (k = 0; k < len; k++) { +00648 c[k] = m_ring_buffer[(pos + k) & (N - 1)]; +00649 +00650 <font class="comment">// Add to buffer, and increment to next spot. Wrap at end.</font> +00651 +00652 m_ring_buffer[r] = c[k]; +00653 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) & (N - 1) ); +00654 } +00655 +00656 <font class="comment">// Add the "len" :characters to the output stream.</font> +00657 +00658 <font class="keywordflow">if</font> (SendChars((<font class="keywordtype">char</font> *) c, len) != (<font class="keywordtype">unsigned</font> <font class="keywordtype">int</font>)len) { +00659 totalLen += len; +00660 <font class="keywordflow">break</font>; +00661 } +00662 } +00663 } +00664 slen = totalLen; +00665 } +</pre></div><hr><address align="right"><small>Generated on Thu Jun 20 22:12:59 2002 for The Sword Project by +<a href="http://www.doxygen.org/index.html"> +<img src="doxygen.png" alt="doxygen" align="middle" border=0 +width=110 height=53></a>1.2.15 </small></address> +</body> +</html> |