Set Author, Title, and Subject for PDF using Reportlab

Question:

How can you correctly set the Author, Title and Subject attributes for a PDF File using Reportlab?
I have found the methods in the Reportlab User Guide on page 56, but I am not sure how to implement them correctly.

Below in my PDF cropping and scaling script, I have added the annotations method, but I don’t know where to call them from, or if a whole new Canvas object is needed. Please excuse the lengthy code, but only after line 113 is the doc being created, above are mostly auxiliary methods, including the annotations method on line 30.

# All the necessary parameters are accessible after line 92,
# but can of course be changed manually in the Code


# imports for the crop, rename to avoid conflict with reportlab Image import
from PIL import Image as imgPIL
from PIL import ImageChops, ImageOps, ImageFilter
import os.path, sys

# import for the PDF creation
import glob
from reportlab.lib.pagesizes import A4
from reportlab.lib import utils
from reportlab.platypus import Image, SimpleDocTemplate, Spacer
from reportlab.pdfgen import canvas

# get os path for Cropping
path = (os.path.dirname(os.path.abspath("cropPDF.py")))
dirs = os.listdir(path)


def trim(im, border="white"):
   bg = imgPIL.new(im.mode, im.size, border)
   diff = ImageChops.difference(im, bg)
   bbox = diff.getbbox()
   if bbox:
       return im.crop(bbox)

def annotations(canvas):
   canvas.setAuthor("the ReportLab Team")
   canvas.setTitle("ReportLab PDF Generation User Guide")
   canvas.setSubject("How to Generate PDF files using the ReportLab modules")

def findMaxWidth():
   maxWidth = 0
   for item in dirs:
       try:
           fullpath = os.path.join(path, item)
           if os.path.isfile(fullpath):
               im = imgPIL.open(fullpath)
               maxWidth = max(maxWidth, im.size[0])
       except:
           pass
   return maxWidth


def padImages(docHeight):
   maxWidth = findMaxWidth()
   for item in dirs:
       try:
           fullpath = os.path.join(path, item)
           if os.path.isfile(fullpath):
               im = imgPIL.open(fullpath)
               f, e = os.path.splitext(fullpath)

               width, height = im.size  # get the image dimensions, the height is needed for the blank image
               if not docHeight <= height:  # to prevent oversized images from bein padded, such that they remain centered
                   image = imgPIL.new('RGB', (maxWidth, height),
                                      (255, 255, 255))  # create a white image with the max width          
                   image.paste(im, (0, 0))  # paste the original image overtop the blank one, flush on the left side
                   image.save(f + ".png", "PNG", quality=100)
       except:
           pass


def crop():
   for item in dirs:
       try:
           fullpath = os.path.join(path, item)
           if os.path.isfile(fullpath):
               im = imgPIL.open(fullpath)
               f, e = os.path.splitext(fullpath)
               imCrop = trim(im, "white")
               imCrop.save(f + ".png", "PNG", quality=100)
       except:
           pass


def add_page_number(canvas, doc):
   canvas.saveState()
   canvas.setFont('Times-Roman', numberFontSize)
   page_number_text = "%d" % (doc.page)
   canvas.drawCentredString(
       pageNumberSpacing * mm,
       pageNumberSpacing * mm,
       page_number_text
   )
   canvas.restoreState()


#############################

executeCrop = True
executePad = True

outputName = "output.pdf" #The name of the file that will be created
fileAuthor = "Roman Stadler" #these 3 attributes are visible in the file info menu
fileTitle = ""
fileSubject = ""

margin = 0.5
imageWidthDefault = 550
spacerHeight = 7
scalingIfImageTooTall = 0.95  # larger than 95 can result in an empty page after the image

includePagenumbers = True
numberFontSize = 10
pageNumberSpacing = 5

############################

doc = SimpleDocTemplate(
   outputName,
   topMargin=margin * mm,
   leftMargin=margin * mm,
   rightMargin=margin * mm,
   bottomMargin=margin * mm,
   pagesize=A4
)

if executeCrop:
   crop()
if executePad:
   padImages(doc.height)

filelist = glob.glob("*.png")  # Get a list of files in the current directory
filelist.sort()


story = []  # create the list of images for the PDF

for fn in filelist:
   img = utils.ImageReader(fn)
   img_width, img_height = img.getSize()  # necessary for the aspect ratio
   aspect = img_height / float(img_width)

   documentHeight = doc.height

   imageWidth = imageWidthDefault
   imageHeight = imageWidth * aspect

   if imageHeight > documentHeight:
       imageHeight = documentHeight * scalingIfImageTooTall
       imageWidth = imageHeight / aspect

   img = Image(
       fn,
       width=imageWidth,
       height=imageHeight
   )
   story.append(img)
   space = Spacer(width=0, height=spacerHeight)
   story.append(space)

if includePagenumbers and not len(filelist) == 0:  # if pagenumbers are desired, or not
   doc.build(
       story,
       onFirstPage=add_page_number,
       onLaterPages=add_page_number,
   )
elif not len(filelist) == 0:
   doc.build(story)
else:  # to prevent an empty PDF that can't be opened
   print("no files found")
Asked By: Roman Stadler

||

Answers:

In the meantime, I have found another way, that does not use reportlab, but instead relies on PyPDF2:

The following import is needed:

# PyPDF2 for the metadata modification
from PyPDF2 import PdfFileReader, PdfFileWriter

Then the metadata can be edited like this:

author = "Roman Stadler"
title = "CropPDF"
subject = "Stackoverflow"

#rest of the script

#attemp the metadate edit   
try:
    file = open('output.pdf', 'rb+')
    reader = PdfFileReader(file)
    writer = PdfFileWriter()

    writer.appendPagesFromReader(reader)
    metadata = reader.getDocumentInfo()
    writer.addMetadata(metadata)

    writer.addMetadata({
        '/Author': author,
        '/Title': title,
        '/Subject' : subject,
        '/Producer' : "CropPDF",
        '/Creator' : "CropPDF",
    })
    writer.write(file)
    file.close()
except:
    print("Error while editing metadata")

Answered By: Roman Stadler

You can define attributes like the author when defining the doc as a SimpleDocTemplate

doc = SimpleDocTemplate(
outputName,
topMargin=margin * mm,
leftMargin=margin * mm,
rightMargin=margin * mm,
bottomMargin=margin * mm,
pagesize=A4,
title="This is the title of the document", #exchange with your title
author="John Smith", #exchange with your authors name
subject"Adding metadata to pdf via reportlab" #exchange with your subject
)
Answered By: Matthias Wiedemann
Categories: questions Tags: , , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.