I updated the PDF Booklet project and removed Python 2 dependencies so that it will run under Ubuntu 22.04.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

362 lines
13 KiB

2 years ago
  1. # vim: sw=4:expandtab:foldmethod=marker
  2. #
  3. # Copyright (c) 2006, Mathieu Fenniak
  4. # All rights reserved.
  5. #
  6. # Redistribution and use in source and binary forms, with or without
  7. # modification, are permitted provided that the following conditions are
  8. # met:
  9. #
  10. # * Redistributions of source code must retain the above copyright notice,
  11. # this list of conditions and the following disclaimer.
  12. # * Redistributions in binary form must reproduce the above copyright notice,
  13. # this list of conditions and the following disclaimer in the documentation
  14. # and/or other materials provided with the distribution.
  15. # * The name of the author may not be used to endorse or promote products
  16. # derived from this software without specific prior written permission.
  17. #
  18. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. # POSSIBILITY OF SUCH DAMAGE.
  29. """
  30. Implementation of stream filters for PDF.
  31. """
  32. __author__ = "Mathieu Fenniak"
  33. __author_email__ = "biziqe@mathieu.fenniak.net"
  34. from .utils import PdfReadError, ord_, chr_
  35. from sys import version_info
  36. if version_info < ( 3, 0 ):
  37. from cStringIO import StringIO
  38. else:
  39. from io import StringIO
  40. import struct
  41. try:
  42. import zlib
  43. def decompress(data):
  44. return zlib.decompress(data)
  45. def compress(data):
  46. return zlib.compress(data)
  47. except ImportError:
  48. # Unable to import zlib. Attempt to use the System.IO.Compression
  49. # library from the .NET framework. (IronPython only)
  50. import System
  51. from System import IO, Collections, Array
  52. def _string_to_bytearr(buf):
  53. retval = Array.CreateInstance(System.Byte, len(buf))
  54. for i in range(len(buf)):
  55. retval[i] = ord(buf[i])
  56. return retval
  57. def _bytearr_to_string(bytes):
  58. retval = ""
  59. for i in range(bytes.Length):
  60. retval += chr(bytes[i])
  61. return retval
  62. def _read_bytes(stream):
  63. ms = IO.MemoryStream()
  64. buf = Array.CreateInstance(System.Byte, 2048)
  65. while True:
  66. bytes = stream.Read(buf, 0, buf.Length)
  67. if bytes == 0:
  68. break
  69. else:
  70. ms.Write(buf, 0, bytes)
  71. retval = ms.ToArray()
  72. ms.Close()
  73. return retval
  74. def decompress(data):
  75. bytes = _string_to_bytearr(data)
  76. ms = IO.MemoryStream()
  77. ms.Write(bytes, 0, bytes.Length)
  78. ms.Position = 0 # fseek 0
  79. gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
  80. bytes = _read_bytes(gz)
  81. retval = _bytearr_to_string(bytes)
  82. gz.Close()
  83. return retval
  84. def compress(data):
  85. bytes = _string_to_bytearr(data)
  86. ms = IO.MemoryStream()
  87. gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
  88. gz.Write(bytes, 0, bytes.Length)
  89. gz.Close()
  90. ms.Position = 0 # fseek 0
  91. bytes = ms.ToArray()
  92. retval = _bytearr_to_string(bytes)
  93. ms.Close()
  94. return retval
  95. class FlateDecode(object):
  96. def decode(data, decodeParms):
  97. data = decompress(data)
  98. predictor = 1
  99. if decodeParms:
  100. try:
  101. predictor = decodeParms.get("/Predictor", 1)
  102. except AttributeError:
  103. pass # usually an array with a null object was read
  104. # predictor 1 == no predictor
  105. if predictor != 1:
  106. columns = decodeParms["/Columns"]
  107. # PNG prediction:
  108. if predictor >= 10 and predictor <= 15:
  109. output = StringIO()
  110. # PNG prediction can vary from row to row
  111. rowlength = columns + 1
  112. assert len(data) % rowlength == 0
  113. prev_rowdata = (0,) * rowlength
  114. for row in range(len(data) // rowlength):
  115. rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
  116. filterByte = rowdata[0]
  117. if filterByte == 0:
  118. pass
  119. elif filterByte == 1:
  120. for i in range(2, rowlength):
  121. rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
  122. elif filterByte == 2:
  123. for i in range(1, rowlength):
  124. rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
  125. else:
  126. # unsupported PNG filter
  127. raise PdfReadError("Unsupported PNG filter %r" % filterByte)
  128. prev_rowdata = rowdata
  129. output.write(''.join([chr(x) for x in rowdata[1:]]))
  130. data = output.getvalue()
  131. else:
  132. # unsupported predictor
  133. raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
  134. return data
  135. decode = staticmethod(decode)
  136. def encode(data):
  137. return compress(data)
  138. encode = staticmethod(encode)
  139. class ASCIIHexDecode(object):
  140. def decode(data, decodeParms=None):
  141. retval = ""
  142. char = ""
  143. x = 0
  144. while True:
  145. c = data[x]
  146. if c == ">":
  147. break
  148. elif c.isspace():
  149. x += 1
  150. continue
  151. char += c
  152. if len(char) == 2:
  153. retval += chr(int(char, base=16))
  154. char = ""
  155. x += 1
  156. assert char == ""
  157. return retval
  158. decode = staticmethod(decode)
  159. class LZWDecode(object):
  160. """Taken from:
  161. http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
  162. """
  163. class decoder(object):
  164. def __init__(self, data):
  165. self.STOP=257
  166. self.CLEARDICT=256
  167. self.data=data
  168. self.bytepos=0
  169. self.bitpos=0
  170. self.dict=[""]*4096
  171. for i in range(256):
  172. self.dict[i]=chr(i)
  173. self.resetDict()
  174. def resetDict(self):
  175. self.dictlen=258
  176. self.bitspercode=9
  177. def nextCode(self):
  178. fillbits=self.bitspercode
  179. value=0
  180. while fillbits>0 :
  181. if self.bytepos >= len(self.data):
  182. return -1
  183. nextbits=ord(self.data[self.bytepos])
  184. bitsfromhere=8-self.bitpos
  185. if bitsfromhere>fillbits:
  186. bitsfromhere=fillbits
  187. value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
  188. (0xff >> (8-bitsfromhere))) <<
  189. (fillbits-bitsfromhere))
  190. fillbits -= bitsfromhere
  191. self.bitpos += bitsfromhere
  192. if self.bitpos >=8:
  193. self.bitpos=0
  194. self.bytepos = self.bytepos+1
  195. return value
  196. def decode(self):
  197. """ algorithm derived from:
  198. http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
  199. and the PDFReference
  200. """
  201. cW = self.CLEARDICT;
  202. baos=""
  203. while True:
  204. pW = cW;
  205. cW = self.nextCode();
  206. if cW == -1:
  207. raise PdfReadError("Missed the stop code in LZWDecode!")
  208. if cW == self.STOP:
  209. break;
  210. elif cW == self.CLEARDICT:
  211. self.resetDict();
  212. elif pW == self.CLEARDICT:
  213. baos+=self.dict[cW]
  214. else:
  215. if cW < self.dictlen:
  216. baos += self.dict[cW]
  217. p=self.dict[pW]+self.dict[cW][0]
  218. self.dict[self.dictlen]=p
  219. self.dictlen+=1
  220. else:
  221. p=self.dict[pW]+self.dict[pW][0]
  222. baos+=p
  223. self.dict[self.dictlen] = p;
  224. self.dictlen+=1
  225. if (self.dictlen >= (1 << self.bitspercode) - 1 and
  226. self.bitspercode < 12):
  227. self.bitspercode+=1
  228. return baos
  229. @staticmethod
  230. def decode(data,decodeParams=None):
  231. return LZWDecode.decoder(data).decode()
  232. class ASCII85Decode(object):
  233. def decode(data, decodeParms=None):
  234. if version_info < ( 3, 0 ):
  235. retval = ""
  236. group = []
  237. x = 0
  238. hitEod = False
  239. # remove all whitespace from data
  240. data = [y for y in data if not (y in ' \n\r\t')]
  241. while not hitEod:
  242. c = data[x]
  243. if len(retval) == 0 and c == "<" and data[x+1] == "~":
  244. x += 2
  245. continue
  246. #elif c.isspace():
  247. # x += 1
  248. # continue
  249. elif c == 'z':
  250. assert len(group) == 0
  251. retval += '\x00\x00\x00\x00'
  252. x += 1
  253. continue
  254. elif c == "~" and data[x+1] == ">":
  255. if len(group) != 0:
  256. # cannot have a final group of just 1 char
  257. assert len(group) > 1
  258. cnt = len(group) - 1
  259. group += [ 85, 85, 85 ]
  260. hitEod = cnt
  261. else:
  262. break
  263. else:
  264. c = ord(c) - 33
  265. assert c >= 0 and c < 85
  266. group += [ c ]
  267. if len(group) >= 5:
  268. b = group[0] * (85**4) + \
  269. group[1] * (85**3) + \
  270. group[2] * (85**2) + \
  271. group[3] * 85 + \
  272. group[4]
  273. assert b < (2**32 - 1)
  274. c4 = chr((b >> 0) % 256)
  275. c3 = chr((b >> 8) % 256)
  276. c2 = chr((b >> 16) % 256)
  277. c1 = chr(b >> 24)
  278. retval += (c1 + c2 + c3 + c4)
  279. if hitEod:
  280. retval = retval[:-4+hitEod]
  281. group = []
  282. x += 1
  283. return retval
  284. else:
  285. if isinstance(data, str):
  286. data = data.encode('ascii')
  287. n = b = 0
  288. out = bytearray()
  289. for c in data:
  290. if ord('!') <= c and c <= ord('u'):
  291. n += 1
  292. b = b*85+(c-33)
  293. if n == 5:
  294. out += struct.pack(b'>L',b)
  295. n = b = 0
  296. elif c == ord('z'):
  297. assert n == 0
  298. out += b'\0\0\0\0'
  299. elif c == ord('~'):
  300. if n:
  301. for _ in range(5-n):
  302. b = b*85+84
  303. out += struct.pack(b'>L',b)[:n-1]
  304. break
  305. return bytes(out)
  306. decode = staticmethod(decode)
  307. def decodeStreamData(stream):
  308. from .generic import NameObject
  309. filters = stream.get("/Filter", ())
  310. if len(filters) and not isinstance(filters[0], NameObject):
  311. # we have a single filter instance
  312. filters = (filters,)
  313. data = stream._data
  314. # If there is not data to decode we should not try to decode the data.
  315. if data:
  316. for filterType in filters:
  317. if filterType == "/FlateDecode" or filterType == "/Fl":
  318. data = FlateDecode.decode(data, stream.get("/DecodeParms"))
  319. elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
  320. data = ASCIIHexDecode.decode(data)
  321. elif filterType == "/LZWDecode" or filterType == "/LZW":
  322. data = LZWDecode.decode(data, stream.get("/DecodeParms"))
  323. elif filterType == "/ASCII85Decode" or filterType == "/A85":
  324. data = ASCII85Decode.decode(data)
  325. elif filterType == "/Crypt":
  326. decodeParams = stream.get("/DecodeParams", {})
  327. if "/Name" not in decodeParams and "/Type" not in decodeParams:
  328. pass
  329. else:
  330. raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
  331. else:
  332. # unsupported filter
  333. raise NotImplementedError("unsupported filter %s" % filterType)
  334. return data