I updated the PDF Booklet project and removed Python 2 dependencies so that it will run under Ubuntu 22.04.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

295 lines
7.6 KiB

2 years ago
  1. # Copyright (c) 2006, Mathieu Fenniak
  2. # All rights reserved.
  3. #
  4. # Redistribution and use in source and binary forms, with or without
  5. # modification, are permitted provided that the following conditions are
  6. # met:
  7. #
  8. # * Redistributions of source code must retain the above copyright notice,
  9. # this list of conditions and the following disclaimer.
  10. # * Redistributions in binary form must reproduce the above copyright notice,
  11. # this list of conditions and the following disclaimer in the documentation
  12. # and/or other materials provided with the distribution.
  13. # * The name of the author may not be used to endorse or promote products
  14. # derived from this software without specific prior written permission.
  15. #
  16. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  26. # POSSIBILITY OF SUCH DAMAGE.
  27. """
  28. Utility functions for PDF library.
  29. """
  30. __author__ = "Mathieu Fenniak"
  31. __author_email__ = "biziqe@mathieu.fenniak.net"
  32. import sys
  33. try:
  34. import __builtin__ as builtins
  35. except ImportError: # Py3
  36. import builtins
  37. xrange_fn = getattr(builtins, "xrange", range)
  38. _basestring = getattr(builtins, "basestring", str)
  39. bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X
  40. string_type = getattr(builtins, "unicode", str)
  41. int_types = (int, long) if sys.version_info[0] < 3 else (int,)
  42. # Make basic type tests more consistent
  43. def isString(s):
  44. """Test if arg is a string. Compatible with Python 2 and 3."""
  45. return isinstance(s, _basestring)
  46. def isInt(n):
  47. """Test if arg is an int. Compatible with Python 2 and 3."""
  48. return isinstance(n, int_types)
  49. def isBytes(b):
  50. """Test if arg is a bytes instance. Compatible with Python 2 and 3."""
  51. return isinstance(b, bytes_type)
  52. #custom implementation of warnings.formatwarning
  53. def formatWarning(message, category, filename, lineno, line=None):
  54. file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name
  55. return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno)
  56. def readUntilWhitespace(stream, maxchars=None):
  57. """
  58. Reads non-whitespace characters and returns them.
  59. Stops upon encountering whitespace or when maxchars is reached.
  60. """
  61. txt = b_("")
  62. while True:
  63. tok = stream.read(1)
  64. if tok.isspace() or not tok:
  65. break
  66. txt += tok
  67. if len(txt) == maxchars:
  68. break
  69. return txt
  70. def readNonWhitespace(stream):
  71. """
  72. Finds and reads the next non-whitespace character (ignores whitespace).
  73. """
  74. tok = WHITESPACES[0]
  75. while tok in WHITESPACES:
  76. tok = stream.read(1)
  77. return tok
  78. def skipOverWhitespace(stream):
  79. """
  80. Similar to readNonWhitespace, but returns a Boolean if more than
  81. one whitespace character was read.
  82. """
  83. tok = WHITESPACES[0]
  84. cnt = 0;
  85. while tok in WHITESPACES:
  86. tok = stream.read(1)
  87. cnt+=1
  88. return (cnt > 1)
  89. def skipOverComment(stream):
  90. tok = stream.read(1)
  91. stream.seek(-1, 1)
  92. if tok == b_('%'):
  93. while tok not in (b_('\n'), b_('\r')):
  94. tok = stream.read(1)
  95. def readUntilRegex(stream, regex, ignore_eof=False):
  96. """
  97. Reads until the regular expression pattern matched (ignore the match)
  98. Raise PdfStreamError on premature end-of-file.
  99. :param bool ignore_eof: If true, ignore end-of-line and return immediately
  100. """
  101. name = b_('')
  102. while True:
  103. tok = stream.read(16)
  104. if not tok:
  105. # stream has truncated prematurely
  106. if ignore_eof == True:
  107. return name
  108. else:
  109. raise PdfStreamError("Stream has ended unexpectedly")
  110. m = regex.search(tok)
  111. if m is not None:
  112. name += tok[:m.start()]
  113. stream.seek(m.start()-len(tok), 1)
  114. break
  115. name += tok
  116. return name
  117. class ConvertFunctionsToVirtualList(object):
  118. def __init__(self, lengthFunction, getFunction):
  119. self.lengthFunction = lengthFunction
  120. self.getFunction = getFunction
  121. def __len__(self):
  122. return self.lengthFunction()
  123. def __getitem__(self, index):
  124. if isinstance(index, slice):
  125. indices = xrange_fn(*index.indices(len(self)))
  126. cls = type(self)
  127. return cls(indices.__len__, lambda idx: self[indices[idx]])
  128. if not isInt(index):
  129. raise TypeError("sequence indices must be integers")
  130. len_self = len(self)
  131. if index < 0:
  132. # support negative indexes
  133. index = len_self + index
  134. if index < 0 or index >= len_self:
  135. raise IndexError("sequence index out of range")
  136. return self.getFunction(index)
  137. def RC4_encrypt(key, plaintext):
  138. S = [i for i in range(256)]
  139. j = 0
  140. for i in range(256):
  141. j = (j + S[i] + ord_(key[i % len(key)])) % 256
  142. S[i], S[j] = S[j], S[i]
  143. i, j = 0, 0
  144. retval = b_("")
  145. for x in range(len(plaintext)):
  146. i = (i + 1) % 256
  147. j = (j + S[i]) % 256
  148. S[i], S[j] = S[j], S[i]
  149. t = S[(S[i] + S[j]) % 256]
  150. retval += b_(chr(ord_(plaintext[x]) ^ t))
  151. return retval
  152. def matrixMultiply(a, b):
  153. return [[sum([float(i)*float(j)
  154. for i, j in zip(row, col)]
  155. ) for col in zip(*b)]
  156. for row in a]
  157. def markLocation(stream):
  158. """Creates text file showing current location in context."""
  159. # Mainly for debugging
  160. RADIUS = 5000
  161. stream.seek(-RADIUS, 1)
  162. outputDoc = open('PyPDF2_pdfLocation.txt', 'w')
  163. outputDoc.write(stream.read(RADIUS))
  164. outputDoc.write('HERE')
  165. outputDoc.write(stream.read(RADIUS))
  166. outputDoc.close()
  167. stream.seek(-RADIUS, 1)
  168. class PyPdfError(Exception):
  169. pass
  170. class PdfReadError(PyPdfError):
  171. pass
  172. class PageSizeNotDefinedError(PyPdfError):
  173. pass
  174. class PdfReadWarning(UserWarning):
  175. pass
  176. class PdfStreamError(PdfReadError):
  177. pass
  178. if sys.version_info[0] < 3:
  179. def b_(s):
  180. return s
  181. else:
  182. B_CACHE = {}
  183. def b_(s):
  184. bc = B_CACHE
  185. if s in bc:
  186. return bc[s]
  187. if type(s) == bytes:
  188. return s
  189. else:
  190. r = s.encode('latin-1')
  191. if len(s) < 2:
  192. bc[s] = r
  193. return r
  194. def u_(s):
  195. if sys.version_info[0] < 3:
  196. return unicode(s, 'unicode_escape')
  197. else:
  198. return s
  199. def str_(b):
  200. if sys.version_info[0] < 3:
  201. return b
  202. else:
  203. if type(b) == bytes:
  204. return b.decode('latin-1')
  205. else:
  206. return b
  207. def ord_(b):
  208. if sys.version_info[0] < 3 or type(b) == str:
  209. return ord(b)
  210. else:
  211. return b
  212. def chr_(c):
  213. if sys.version_info[0] < 3:
  214. return c
  215. else:
  216. return chr(c)
  217. def barray(b):
  218. if sys.version_info[0] < 3:
  219. return b
  220. else:
  221. return bytearray(b)
  222. def hexencode(b):
  223. if sys.version_info[0] < 3:
  224. return b.encode('hex')
  225. else:
  226. import codecs
  227. coder = codecs.getencoder('hex_codec')
  228. return coder(b)[0]
  229. def hexStr(num):
  230. return hex(num).replace('L', '')
  231. WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]