Python 3: urlextract package, PermissionError
Question:
I am using Windows 10 x64, with Python 3.6.1 x86.
I have this script from a few months ago which was working fine, but right now it gives me a weird error. The script is a simple one that extract URLs from tweets saved in .csv files.
This is the script:
import datetime
from urlextract import URLExtract
twitter_files_list = ['File1.csv', 'File2.csv', 'File3.csv']
input_path = my_path
# Find domain of URL
def find_domain(url):
return url.split("//")[-1].split("/")[0]
# Clean domain from useless chars
def clean_domain(domain):
domain = domain.replace("[", "")
domain = domain.replace("]", "")
domain = domain.replace("'", "")
return domain
# Extract URLs from Tweets
def url_extract(filename):
print('n' + filename + ':')
url_counter = 0
url_file = open('extracted_urls/urls_' + filename, 'a')
# Open file
f = open(input_path + filename, "r", encoding="utf8")
lines = f.readlines()
# Search for contents of column "text"
text = []
for x in lines:
text.append(x.split('t')[4])
# Close file
f.close()
extractor = URLExtract()
for i in range(len(text)):
try:
if extractor.find_urls(text[i]): # Check if URL exists
url = extractor.find_urls(text[i])
domain = find_domain(str(url))
if not " " in domain:
url_file.write(str(clean_domain(domain)) + "n")
url_counter += 1
except 'Not Found':
continue
url_file.close()
# Main
if __name__ == '__main__':
print('nURL Characterization:n')
# Start timer
start = datetime.datetime.now()
# Find the unique usernames for every file
for twitter_file in twitter_files_list:
print('Searching ' + str(twitter_file) + '...')
url_extract(twitter_file)
# End timer
end = datetime.datetime.now()
# Print results
print("nProcess finished")
print("Total time: " + str(end - start))
This gives me the following error:
Traceback (most recent call last):
File "C:/Users/Aventinus/url_analysis/url_extractor.py", line 77, in <module>
url_extract(twitter_file)
File "C:/Users/Aventinus/url_analysis/url_extractor.py", line 50, in url_extract
extractor = URLExtract()
File "C:Program Files (x86)Python36-32libsite-packagesurlextract.py", line 65, in __init__
if not self._download_tlds_list():
File "C:Program Files (x86)Python36-32libsite-packagesurlextract.py", line 114, in _download_tlds_list
with open(self._tld_list_path, 'w') as ftld:
PermissionError: [Errno 13] Permission denied: 'C:\Program Files (x86)\Python36-32\lib\site-packages\.tlds'
I have no idea how to interpret this.
Answers:
you can try run the script as administrator
from urlextract import URLExtract
This gives me an error of noodule urlextract.
How to rectify this?
I am using Windows 10 x64, with Python 3.6.1 x86.
I have this script from a few months ago which was working fine, but right now it gives me a weird error. The script is a simple one that extract URLs from tweets saved in .csv files.
This is the script:
import datetime
from urlextract import URLExtract
twitter_files_list = ['File1.csv', 'File2.csv', 'File3.csv']
input_path = my_path
# Find domain of URL
def find_domain(url):
return url.split("//")[-1].split("/")[0]
# Clean domain from useless chars
def clean_domain(domain):
domain = domain.replace("[", "")
domain = domain.replace("]", "")
domain = domain.replace("'", "")
return domain
# Extract URLs from Tweets
def url_extract(filename):
print('n' + filename + ':')
url_counter = 0
url_file = open('extracted_urls/urls_' + filename, 'a')
# Open file
f = open(input_path + filename, "r", encoding="utf8")
lines = f.readlines()
# Search for contents of column "text"
text = []
for x in lines:
text.append(x.split('t')[4])
# Close file
f.close()
extractor = URLExtract()
for i in range(len(text)):
try:
if extractor.find_urls(text[i]): # Check if URL exists
url = extractor.find_urls(text[i])
domain = find_domain(str(url))
if not " " in domain:
url_file.write(str(clean_domain(domain)) + "n")
url_counter += 1
except 'Not Found':
continue
url_file.close()
# Main
if __name__ == '__main__':
print('nURL Characterization:n')
# Start timer
start = datetime.datetime.now()
# Find the unique usernames for every file
for twitter_file in twitter_files_list:
print('Searching ' + str(twitter_file) + '...')
url_extract(twitter_file)
# End timer
end = datetime.datetime.now()
# Print results
print("nProcess finished")
print("Total time: " + str(end - start))
This gives me the following error:
Traceback (most recent call last):
File "C:/Users/Aventinus/url_analysis/url_extractor.py", line 77, in <module>
url_extract(twitter_file)
File "C:/Users/Aventinus/url_analysis/url_extractor.py", line 50, in url_extract
extractor = URLExtract()
File "C:Program Files (x86)Python36-32libsite-packagesurlextract.py", line 65, in __init__
if not self._download_tlds_list():
File "C:Program Files (x86)Python36-32libsite-packagesurlextract.py", line 114, in _download_tlds_list
with open(self._tld_list_path, 'w') as ftld:
PermissionError: [Errno 13] Permission denied: 'C:\Program Files (x86)\Python36-32\lib\site-packages\.tlds'
I have no idea how to interpret this.
you can try run the script as administrator
from urlextract import URLExtract
This gives me an error of noodule urlextract.
How to rectify this?