aboutsummaryrefslogtreecommitdiffstats
path: root/src/epy_reader/tools/KindleUnpack/mobi_uncompress.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/epy_reader/tools/KindleUnpack/mobi_uncompress.py')
-rw-r--r--src/epy_reader/tools/KindleUnpack/mobi_uncompress.py131
1 files changed, 131 insertions, 0 deletions
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_uncompress.py b/src/epy_reader/tools/KindleUnpack/mobi_uncompress.py
new file mode 100644
index 0000000..c5fad85
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_uncompress.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, bchr, lmap, bstr
+
+if PY2:
+ range = xrange
+
+import struct
+# note: struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+
+
+class unpackException(Exception):
+ pass
+
+class UncompressedReader:
+
+ def unpack(self, data):
+ return data
+
+class PalmdocReader:
+
+ def unpack(self, i):
+ o, p = b'', 0
+ while p < len(i):
+ # for python 3 must use slice since i[p] returns int while slice returns character
+ c = ord(i[p:p+1])
+ p += 1
+ if (c >= 1 and c <= 8):
+ o += i[p:p+c]
+ p += c
+ elif (c < 128):
+ o += bchr(c)
+ elif (c >= 192):
+ o += b' ' + bchr(c ^ 128)
+ else:
+ if p < len(i):
+ c = (c << 8) | ord(i[p:p+1])
+ p += 1
+ m = (c >> 3) & 0x07ff
+ n = (c & 7) + 3
+ if (m > n):
+ o += o[-m:n-m]
+ else:
+ for _ in range(n):
+ # because of completely ass-backwards decision by python mainters for python 3
+ # we must use slice for bytes as i[p] returns int while slice returns character
+ if m == 1:
+ o += o[-m:]
+ else:
+ o += o[-m:-m+1]
+ return o
+
+class HuffcdicReader:
+ q = struct.Struct(b'>Q').unpack_from
+
+ def loadHuff(self, huff):
+ if huff[0:8] != b'HUFF\x00\x00\x00\x18':
+ raise unpackException('invalid huff header')
+ off1, off2 = struct.unpack_from(b'>LL', huff, 8)
+
+ def dict1_unpack(v):
+ codelen, term, maxcode = v&0x1f, v&0x80, v>>8
+ assert codelen != 0
+ if codelen <= 8:
+ assert term
+ maxcode = ((maxcode + 1) << (32 - codelen)) - 1
+ return (codelen, term, maxcode)
+ self.dict1 = lmap(dict1_unpack, struct.unpack_from(b'>256L', huff, off1))
+
+ dict2 = struct.unpack_from(b'>64L', huff, off2)
+ self.mincode, self.maxcode = (), ()
+ for codelen, mincode in enumerate((0,) + dict2[0::2]):
+ self.mincode += (mincode << (32 - codelen), )
+ for codelen, maxcode in enumerate((0,) + dict2[1::2]):
+ self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1, )
+
+ self.dictionary = []
+
+ def loadCdic(self, cdic):
+ if cdic[0:8] != b'CDIC\x00\x00\x00\x10':
+ raise unpackException('invalid cdic header')
+ phrases, bits = struct.unpack_from(b'>LL', cdic, 8)
+ n = min(1<<bits, phrases-len(self.dictionary))
+ h = struct.Struct(b'>H').unpack_from
+ def getslice(off):
+ blen, = h(cdic, 16+off)
+ slice = cdic[18+off:18+off+(blen&0x7fff)]
+ return (slice, blen&0x8000)
+ self.dictionary += lmap(getslice, struct.unpack_from(bstr('>%dH' % n), cdic, 16))
+
+ def unpack(self, data):
+ q = HuffcdicReader.q
+
+ bitsleft = len(data) * 8
+ data += b"\x00\x00\x00\x00\x00\x00\x00\x00"
+ pos = 0
+ x, = q(data, pos)
+ n = 32
+
+ s = b''
+ while True:
+ if n <= 0:
+ pos += 4
+ x, = q(data, pos)
+ n += 32
+ code = (x >> n) & ((1 << 32) - 1)
+
+ codelen, term, maxcode = self.dict1[code >> 24]
+ if not term:
+ while code < self.mincode[codelen]:
+ codelen += 1
+ maxcode = self.maxcode[codelen]
+
+ n -= codelen
+ bitsleft -= codelen
+ if bitsleft < 0:
+ break
+
+ r = (maxcode - code) >> (32 - codelen)
+ slice, flag = self.dictionary[r]
+ if not flag:
+ self.dictionary[r] = None
+ slice = self.unpack(slice)
+ self.dictionary[r] = (slice, 1)
+ s += slice
+ return s