I updated the PDF Booklet project and removed Python 2 dependencies so that it will run under Ubuntu 22.04.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

152 lines
5.4 KiB

2 years ago
  1. #!/usr/bin/env python
  2. """
  3. Representation and utils for ranges of PDF file pages.
  4. Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
  5. All rights reserved. This software is available under a BSD license;
  6. see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
  7. """
  8. import re
  9. from .utils import isString
  10. _INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
  11. PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
  12. # groups: 12 34 5 6 7 8
  13. class ParseError(Exception):
  14. pass
  15. PAGE_RANGE_HELP = """Remember, page indices start with zero.
  16. Page range expression examples:
  17. : all pages. -1 last page.
  18. 22 just the 23rd page. :-1 all but the last page.
  19. 0:3 the first three pages. -2 second-to-last page.
  20. :3 the first three pages. -2: last two pages.
  21. 5: from the sixth page onward. -3:-1 third & second to last.
  22. The third, "stride" or "step" number is also recognized.
  23. ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
  24. 1:10:2 1 3 5 7 9 2::-1 2 1 0.
  25. ::-1 all pages in reverse order.
  26. """
  27. class PageRange(object):
  28. """
  29. A slice-like representation of a range of page indices,
  30. i.e. page numbers, only starting at zero.
  31. The syntax is like what you would put between brackets [ ].
  32. The slice is one of the few Python types that can't be subclassed,
  33. but this class converts to and from slices, and allows similar use.
  34. o PageRange(str) parses a string representing a page range.
  35. o PageRange(slice) directly "imports" a slice.
  36. o to_slice() gives the equivalent slice.
  37. o str() and repr() allow printing.
  38. o indices(n) is like slice.indices(n).
  39. """
  40. def __init__(self, arg):
  41. """
  42. Initialize with either a slice -- giving the equivalent page range,
  43. or a PageRange object -- making a copy,
  44. or a string like
  45. "int", "[int]:[int]" or "[int]:[int]:[int]",
  46. where the brackets indicate optional ints.
  47. {page_range_help}
  48. Note the difference between this notation and arguments to slice():
  49. slice(3) means the first three pages;
  50. PageRange("3") means the range of only the fourth page.
  51. However PageRange(slice(3)) means the first three pages.
  52. """
  53. if isinstance(arg, slice):
  54. self._slice = arg
  55. return
  56. if isinstance(arg, PageRange):
  57. self._slice = arg.to_slice()
  58. return
  59. m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
  60. if not m:
  61. raise ParseError(arg)
  62. elif m.group(2):
  63. # Special case: just an int means a range of one page.
  64. start = int(m.group(2))
  65. stop = start + 1 if start != -1 else None
  66. self._slice = slice(start, stop)
  67. else:
  68. self._slice = slice(*[int(g) if g else None
  69. for g in m.group(4, 6, 8)])
  70. # Just formatting this when there is __doc__ for __init__
  71. if __init__.__doc__:
  72. __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
  73. @staticmethod
  74. def valid(input):
  75. """ True if input is a valid initializer for a PageRange. """
  76. return isinstance(input, slice) or \
  77. isinstance(input, PageRange) or \
  78. (isString(input)
  79. and bool(re.match(PAGE_RANGE_RE, input)))
  80. def to_slice(self):
  81. """ Return the slice equivalent of this page range. """
  82. return self._slice
  83. def __str__(self):
  84. """ A string like "1:2:3". """
  85. s = self._slice
  86. if s.step == None:
  87. if s.start != None and s.stop == s.start + 1:
  88. return str(s.start)
  89. indices = s.start, s.stop
  90. else:
  91. indices = s.start, s.stop, s.step
  92. return ':'.join("" if i == None else str(i) for i in indices)
  93. def __repr__(self):
  94. """ A string like "PageRange('1:2:3')". """
  95. return "PageRange(" + repr(str(self)) + ")"
  96. def indices(self, n):
  97. """
  98. n is the length of the list of pages to choose from.
  99. Returns arguments for range(). See help(slice.indices).
  100. """
  101. return self._slice.indices(n)
  102. PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
  103. def parse_filename_page_ranges(args):
  104. """
  105. Given a list of filenames and page ranges, return a list of
  106. (filename, page_range) pairs.
  107. First arg must be a filename; other ags are filenames, page-range
  108. expressions, slice objects, or PageRange objects.
  109. A filename not followed by a page range indicates all pages of the file.
  110. """
  111. pairs = []
  112. pdf_filename = None
  113. did_page_range = False
  114. for arg in args + [None]:
  115. if PageRange.valid(arg):
  116. if not pdf_filename:
  117. raise ValueError("The first argument must be a filename, " \
  118. "not a page range.")
  119. pairs.append( (pdf_filename, PageRange(arg)) )
  120. did_page_range = True
  121. else:
  122. # New filename or end of list--do all of the previous file?
  123. if pdf_filename and not did_page_range:
  124. pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
  125. pdf_filename = arg
  126. did_page_range = False
  127. return pairs