How to rename PDF file, with texts extracted from the PDF file?
Question:
I am trying to use Python to rename PDF file using part of the file content. Here is the situation.
The PDF file is a commercial invoice, contains wordings "Commercial Invoice" and "Department". I want to rename the file to "Commercial Invoice" and " Department ", such as "353624 HR".
Here is what I have so far:
from StringIO import StringIO
import pyPdf
import os
# a function here
def getPDFContent(path):
content = ""
num_pages = 10
p = file(path, "rb")
pdf = pyPdf.PdfFileReader(p)
for i in range(0, num_pages):
content += pdf.getPage(i).extractText() + "n"
content = " ".join(content.replace(u"xa0", " ").strip().split())
return content
# name of the source PDF file
PDF_name = '222'
# picking texts from the PDF file
pdfContent = StringIO(getPDFContent("C:\" + PDF_name + ".pdf").encode("ascii", "ignore"))
for line in pdfContent:
aaa = line.find(' Commercial Invoice ')
CIN = line[aaa + 28: aaa + 38]
bbb = line.find('Department')
Dpt = line [bbb+20 : bbb+26]
final_name = str(CIN + " " + Dpt)
print final_name
f = open("C:\" + PDF_name + ".pdf")
f.close()
os.rename("C:\" + PDF_name + ".pdf", "C:\" + final_name + ".pdf")
it works until print out the text extracted ‘ print final_name’, but at the last part when renaming the file, it gives an error " WindowsError: [Error 32] The process cannot access the file because it is being used by another process".
What went wrong here? it seems the file was once not closed properly?
Answers:
Add C:\
also to your PDF_name in the last line.
in def getPDFContent(path)
,
after p = file(path, "rb")
,
when the content has been copied,
you need to close the file.
p.close()
put this just after the for loop but in the function.
This can be done through mouse_event and cursor position
Below is the code :
Sub Run_report1()
'
' Run_report Macro
'
' Keyboard Shortcut: Ctrl+Shift+G
'
Application.Wait Now + TimeValue("0:00:01")
SendKeys "%{Tab}", True
Application.Wait Now + TimeValue("0:00:01")
Dim i As Integer
i = 1
Do Until i > 8
Application.Wait Now + TimeValue("0:00:01")
SetCursorPos 309, 253
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SendKeys "{Enter}", True
Application.Wait Now + TimeValue("0:00:03")
SetCursorPos 794, 771
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SetCursorPos 1068, 728
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SetCursorPos 746, 94
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SendKeys "%{Tab}", True
Application.Wait Now + TimeValue("0:00:01")
SetCursorPos 309, 253
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SendKeys "^V", True
SendKeys "{Enter}", True
Application.Wait Now + TimeValue("0:00:01")
SendKeys "{F5}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
i = i + 1
Loop
MsgBox "Task Completed"
End Sub
I am trying to use Python to rename PDF file using part of the file content. Here is the situation.
The PDF file is a commercial invoice, contains wordings "Commercial Invoice" and "Department". I want to rename the file to "Commercial Invoice" and " Department ", such as "353624 HR".
Here is what I have so far:
from StringIO import StringIO
import pyPdf
import os
# a function here
def getPDFContent(path):
content = ""
num_pages = 10
p = file(path, "rb")
pdf = pyPdf.PdfFileReader(p)
for i in range(0, num_pages):
content += pdf.getPage(i).extractText() + "n"
content = " ".join(content.replace(u"xa0", " ").strip().split())
return content
# name of the source PDF file
PDF_name = '222'
# picking texts from the PDF file
pdfContent = StringIO(getPDFContent("C:\" + PDF_name + ".pdf").encode("ascii", "ignore"))
for line in pdfContent:
aaa = line.find(' Commercial Invoice ')
CIN = line[aaa + 28: aaa + 38]
bbb = line.find('Department')
Dpt = line [bbb+20 : bbb+26]
final_name = str(CIN + " " + Dpt)
print final_name
f = open("C:\" + PDF_name + ".pdf")
f.close()
os.rename("C:\" + PDF_name + ".pdf", "C:\" + final_name + ".pdf")
it works until print out the text extracted ‘ print final_name’, but at the last part when renaming the file, it gives an error " WindowsError: [Error 32] The process cannot access the file because it is being used by another process".
What went wrong here? it seems the file was once not closed properly?
Add C:\
also to your PDF_name in the last line.
in def getPDFContent(path)
,
after p = file(path, "rb")
,
when the content has been copied,
you need to close the file.
p.close()
put this just after the for loop but in the function.
This can be done through mouse_event and cursor position
Below is the code :
Sub Run_report1()
'
' Run_report Macro
'
' Keyboard Shortcut: Ctrl+Shift+G
'
Application.Wait Now + TimeValue("0:00:01")
SendKeys "%{Tab}", True
Application.Wait Now + TimeValue("0:00:01")
Dim i As Integer
i = 1
Do Until i > 8
Application.Wait Now + TimeValue("0:00:01")
SetCursorPos 309, 253
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SendKeys "{Enter}", True
Application.Wait Now + TimeValue("0:00:03")
SetCursorPos 794, 771
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SetCursorPos 1068, 728
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SetCursorPos 746, 94
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SendKeys "%{Tab}", True
Application.Wait Now + TimeValue("0:00:01")
SetCursorPos 309, 253
mouse_event MOUSEEVENTF_LEFTDOWN, 0, 0, 0, 0
mouse_event MOUSEEVENTF_LEFTUP, 0, 0, 0, 0
Application.Wait Now + TimeValue("0:00:01")
SendKeys "^V", True
SendKeys "{Enter}", True
Application.Wait Now + TimeValue("0:00:01")
SendKeys "{F5}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
SendKeys "{PGUP}", True
i = i + 1
Loop
MsgBox "Task Completed"
End Sub