Merge Two PDF by PyPDF2 but got error Unexpected destination '/__WKANCHOR_2'

Question:

from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf','/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
 with open(file_name, 'rb') as f:
    merger.append(f)

merger.write("result.pdf")

While merger 2 pdf by python code I got Error Unexpected destination ‘/__WKANCHOR_2’ and I working with following code, please provide me solution

Asked By: Deval

||

Answers:

This is a temporary fix, when you pass in the file in the append method, pass in import_bookmarks=False. This works for me

from PyPDF2 import PdfFileMerger, PdfFileReader
filepath_list = ['/tmp/abc.pdf', '/tmp/xyz.pdf']
merger = PdfFileMerger()
for file_name in filepath_list:
 with open(file_name, 'rb') as f:
    merger.append(f, import_bookmarks=False )

merger.write("result.pdf")
Answered By: dayxx369

If the method @Tonechas mentions doesn’t work for you, try the method @hannal mentions on GitHub. I implemented it into a separate file that I import like so:

from __pypdf2_fix import NewPdfFileReader as PdfFileReader, NewPdfFileMerger as  PdfFileMerger

The file:

from PyPDF2 import PdfFileReader, PdfFileMerger
from PyPDF2.pdf import ArrayObject, NameObject
from PyPDF2.utils import isString
from PyPDF2.merger import _MergedPage
from io import BytesIO
from io import FileIO as file
StreamIO = BytesIO

class NewPdfFileReader(PdfFileReader):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

def _newBuildOutline(self, node):
    dest, title, outline = None, None, None

    if "/A" in node and "/Title" in node:
        # Action, section 8.5 (only type GoTo supported)
        title  = node["/Title"]
        action = node["/A"]
        if action["/S"] == "/GoTo":
            dest = action["/D"]
    elif "/Dest" in node and "/Title" in node:
        # Destination, section 8.2.1
        title = node["/Title"]
        dest  = node["/Dest"]

    # if destination found, then create outline
    if dest:
        if isinstance(dest, ArrayObject):
            outline = self._buildDestination(title, dest)
        elif isString(dest) and dest in self._namedDests:
            outline = self._namedDests[dest]
            outline[NameObject("/Title")] = title
        elif isinstance(dest, NameObject):
            pass
        else:
            raise utils.PdfReadError("Unexpected destination %r" % dest)
    return outline

NewPdfFileReader._buildOutline = _newBuildOutline

class NewPdfFileMerger(PdfFileMerger):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

def newMerge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        Merges the pages from the given file into the output file at the
        specified page number.

        :param int position: The *page number* to insert this file. File will
            be inserted after the given number.

        :param fileobj: A File Object or an object that supports the standard read
            and seek methods similar to a File Object. Could also be a
            string representing a path to a PDF file.

        :param str bookmark: Optionally, you may specify a bookmark to be applied at
            the beginning of the included file by supplying the text of the bookmark.

        :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
            to merge only the specified range of pages from the source
            document into the output document.

        :param bool import_bookmarks: You may prevent the source document's bookmarks
            from being imported by specifying this as ``False``.
        """

        # This parameter is passed to self.inputs.append and means
        # that the stream used was created in this method.
        my_file = False

        # If the fileobj parameter is a string, assume it is a path
        # and create a file object at that location. If it is a file,
        # copy the file's contents into a BytesIO (or StreamIO) stream object; if
        # it is a PdfFileReader, copy that reader's stream into a
        # BytesIO (or StreamIO) stream.
        # If fileobj is none of the above types, it is not modified
        decryption_key = None
        if isString(fileobj):
            fileobj = file(fileobj, 'rb')
            my_file = True
        elif isinstance(fileobj, file):
            fileobj.seek(0)
            filecontent = fileobj.read()
            fileobj = StreamIO(filecontent)
            my_file = True
        elif isinstance(fileobj, PdfFileReader):
            orig_tell = fileobj.stream.tell()
            fileobj.stream.seek(0)
            filecontent = StreamIO(fileobj.stream.read())
            fileobj.stream.seek(orig_tell) # reset the stream to its original location
            fileobj = filecontent
            if hasattr(fileobj, '_decryption_key'):
                decryption_key = fileobj._decryption_key
            my_file = True

        # Create a new PdfFileReader instance using the stream
        # (either file or BytesIO or StringIO) created above
        pdfr = NewPdfFileReader(fileobj, strict=self.strict)
        if decryption_key is not None:
            pdfr._decryption_key = decryption_key

        # Find the range of pages to merge.
        if pages == None:
            pages = (0, pdfr.getNumPages())
        elif isinstance(pages, PageRange):
            pages = pages.indices(pdfr.getNumPages())
        elif not isinstance(pages, tuple):
            raise TypeError('"pages" must be a tuple of (start, stop[, step])')

        srcpages = []
        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))

        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)

        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline

        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests

        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)

            id = self.id_count
            self.id_count += 1

            mp = _MergedPage(pg, pdfr, id)

            srcpages.append(mp)

        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)

        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages

        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))

NewPdfFileMerger.merge = newMerge
Answered By: LamerLink

Deval

You can simply concatenate files by using the append method.You can use PyPdf2s PdfMerger class for merging pdf with simple File Concatenation.
Check the simple example without any patch :

from PyPDF2 import PdfFileMerger

pdf_files = ['pdf1.pdf', 'pdf2.pdf']

merger = PdfFileMerger()

for pdf_file in pdf_files:
    merger.append(pdf_file)

merger.write("merge_pdf.pdf")
merger.close()

On the provided link you can find the many more option surround the pdf which can help you to achieve more preciously.

Thanks

Answered By: Dipen Shah
Categories: questions Tags: , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.