Converting multiple HTML files to PDF using PyQt5

Question:

I tried following this answer: How to use PyQT5 to convert multiple HTML docs to PDF in one loop

I modified it to convert all html files found in a local folder. For example htmls is a list of html files to be converted: [Q:Raytest1.html, Q:Rayprac2.html]

This is the code. However, when I try to run it, Python just freezes and I have to stop the run.

import os
import glob
from PyQt5 import QtWidgets, QtWebEngineWidgets

class PdfPage(QtWebEngineWidgets.QWebEnginePage):
    def __init__(self):
        super().__init__()
        self._htmls = []
        self._current_path = ""

        self.setZoomFactor(1)
        self.loadFinished.connect(self._handleLoadFinished)
        self.pdfPrintingFinished.connect(self._handlePrintingFinished)

    def convert(self, htmls):
        self._htmls = iter(zip(htmls))
        self._fetchNext()

    def _fetchNext(self):
        try:
            self._current_path = next(self._htmls)
        except StopIteration:
            return False

    def _handleLoadFinished(self, ok):
        if ok:
            self.printToPdf(self._current_path)

    def _handlePrintingFinished(self, filePath, success):
        print("finished:", filePath, success)
        if not self._fetchNext():
            QtWidgets.QApplication.quit()


if __name__ == "__main__":

    current_dir = os.path.dirname(os.path.realpath(__file__))
    folder= current_dir+ '\*.HTML'
    htmls= glob.glob(folder)

    app = QtWidgets.QApplication([])
    page = PdfPage()
    page.convert(htmls)
    app.exec_()

    print("finished")
Asked By: Ray234

||

Answers:

It seems that the OP has not understood the logic of my previous solution which is:

  1. Get the resource, in this case files,
  2. Load it on the page,
  3. When the load is finished then print the content of the page,
  4. When the printing is finished then execute step 1 with the next resource.

In this it does not perform step 2, on the other hand it is recommended that the path of the pdf has a name other than the html

import os
import glob
from PyQt5.QtCore import QUrl
from PyQt5 import QtWidgets, QtWebEngineWidgets


class PdfPage(QtWebEngineWidgets.QWebEnginePage):
    def __init__(self):
        super().__init__()
        self._htmls = []
        self._current_path = ""

        self.setZoomFactor(1)
        self.loadFinished.connect(self._handleLoadFinished)
        self.pdfPrintingFinished.connect(self._handlePrintingFinished)

    def convert(self, htmls):
        self._htmls = iter(htmls)
        self._fetchNext()

    def _fetchNext(self):
        try:
            self._current_path = next(self._htmls)
        except StopIteration:
            return False
        else:
            self.load(QUrl.fromLocalFile(self._current_path))
        return True

    def _handleLoadFinished(self, ok):
        if ok:
            self.printToPdf(self._current_path + ".pdf")

    def _handlePrintingFinished(self, filePath, success):
        print("finished:", filePath, success)
        if not self._fetchNext():
            QtWidgets.QApplication.quit()


if __name__ == "__main__":

    current_dir = os.path.dirname(os.path.realpath(__file__))
    folder= current_dir+ '\*.HTML'
    htmls = glob.glob(folder)
    print(htmls)
    if htmls:
        app = QtWidgets.QApplication([])
        page = PdfPage()
        page.convert(htmls)
        app.exec_()
    print("finished")
Answered By: eyllanesc

The code works fine for HTML files without images. However, converted PDF files does not include the images in HTML files. How to resolve this?

Answered By: Guru karthik
Categories: questions Tags: , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.