Download files from a site with requests by clicking the button
Question:
I’m trying to download download a zip file from this site:
After clicking the button "download *.zip file" the download is performed.
I’m trying to do this with the resquest because then I want to automate it and just change "zn" and "se" in the ulr
The problem is that I am not able to download the zip file with resquests
This is the code i’m trying, can anyone help me?
import py7zr
import json
import requests
r = requests.get('https://resultados.tse.jus.br/oficial/ele2022/arquivo-urna/407/dados/mg/40037/0001/0101/494a2b7171725964614e41336a4362695a32425276596447384e42434d644d73356241416e76797a6c45513d/o00407-4003700010101.logjez')
r.status_code
r.text # the file appears to come but I think it's not the right way to do it, when I automate it to look for another section (if) I won't have the correct file name
How can I automate something like this without knowing what the name of the next file will be when I change the ulr?
Thanks!
Answers:
I’m not sure what you mean by
change "zn" and "se" in the url
but you might want to try this:
import os
import time
import urllib.parse
from pathlib import Path
from shutil import copyfileobj
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:104.0) Gecko/20100101 Firefox/104.0",
"Referer": "https://resultados.tse.jus.br/oficial/app/index.html",
}
def download_file(source_url: str, directory: str) -> None:
os.makedirs(directory, exist_ok=True)
save_dir = Path(directory)
file_name = (
f"LogDeUrna_Totalizado_{int(time.time())}"
f"_{source_url.rsplit('/', 1)[-1].replace('.logjez', '')}"
f".vscmr.zip"
)
destination = save_dir / file_name
with s.get(source_url, stream=True) as file, open(destination, "wb") as output:
copyfileobj(file.raw, output)
with requests.session() as s:
s.headers.update(headers)
base_url = 'https://resultados.tse.jus.br/oficial/app/index.html#/m/dados-da-urna;e=e545;uf=mg;ufbu=mg;mubu=40037;zn=0001;se=0101/log-de-urna'
# deconstruct the url to get the query parameters
url_parts = (
urllib.parse
.urlsplit(base_url.rstrip("/log-de-urna").rsplit('/', 1)[-1])
.path.split(';')[2:]
)
# build a map of the query parameters
d = dict([part.split('=') for part in url_parts])
# rebuild the API url with the query parameters
api_url = f"https://resultados.tse.jus.br/oficial/ele2022/arquivo-urna/407/dados/mg/"
f"{d['mubu']}/{d['zn']}/{d['se']}/"
f"p000407-mg-m{d['mubu']}-z{d['zn']}-s{d['se']}-aux.json"
# get the API response and extract the hashes
hashes = s.get(api_url).json()['hashes'][0]
# build the download url with the hashes and query parameters
zip_url = f"https://resultados.tse.jus.br/oficial/ele2022/arquivo-urna/407/dados/mg/"
f"{d['mubu']}/{d['zn']}/{d['se']}/"
f"{hashes['hash']}/{hashes['nmarq'][3]}"
# download the file
download_file(zip_url, 'zip_files')
This should download a file like this:
LogDeUrna_Totalizado_1667737288_o00407-4003700010101.vscmr.zip
Which should have this (after extraction it’s just a .dat
file):
shortened for brevity
24/09/2022 16:16:19 INFO 67305985 LOGD InÌcio das operaÁıes do logd 25DB7401100B3F08
24/09/2022 16:16:19 INFO 67305985 LOGD Urna ligada em 24/09/2022 ‡s 16:15:04 43FBAE51FE13D6F8
24/09/2022 16:16:19 INFO 67305985 SCUE Iniciando aplicaÁ„o - Oficial - 1∫ turno 46556A8AF2E2C406
24/09/2022 16:16:19 INFO 67305985 SCUE Vers„o da aplicaÁ„o: 8.26.0.0 - OnÁa-pintada 7827EE0DB8CA11F0
24/09/2022 16:16:21 INFO 67305985 SCUE Urna operando com rede elÈtrica 239A67E5FCA5752B
24/09/2022 16:16:21 INFO 67305985 SCUE Bateria interna com carga plena 6EE1D58F43F1C55E
24/09/2022 16:16:27 INFO 67305985 SCUE Tamanho da mÌdia interna: 488.7 MB 3C1DD481446A0EB8
24/09/2022 16:16:30 INFO 67305985 SCUE Tamanho da memÛria: 489.4 MB D77DD903E0DDB80B
24/09/2022 16:16:30 INFO 67305985 SCUE VerificaÁ„o de assinatura de aplicaÁ„o por etapa [1] - [/bin/avbin.vst] - [SUCESSO] 07D0A24BF9E343B3
24/09/2022 16:16:31 INFO 67305985 SCUE VerificaÁ„o de assinatura de aplicaÁ„o por etapa [2] - [/uenux/bin/avusrbin.vst] - [SUCESSO]
To answer Leonardo’s question about reading the zip file: The zip files are compressed with 7zip. To open it use:
with py7zr.SevenZipFile(path_to_zipfile, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
baixa o arquivo log e descompacta ele dentro vc tera o arquivo .logjez renomeie este arquivo pra .zip descompacte ele e veja o logd.dat edit ele no ultraedit e veja tudo que esta no log, abraço.
I’m trying to download download a zip file from this site:
After clicking the button "download *.zip file" the download is performed.
I’m trying to do this with the resquest because then I want to automate it and just change "zn" and "se" in the ulr
The problem is that I am not able to download the zip file with resquests
This is the code i’m trying, can anyone help me?
import py7zr
import json
import requests
r = requests.get('https://resultados.tse.jus.br/oficial/ele2022/arquivo-urna/407/dados/mg/40037/0001/0101/494a2b7171725964614e41336a4362695a32425276596447384e42434d644d73356241416e76797a6c45513d/o00407-4003700010101.logjez')
r.status_code
r.text # the file appears to come but I think it's not the right way to do it, when I automate it to look for another section (if) I won't have the correct file name
How can I automate something like this without knowing what the name of the next file will be when I change the ulr?
Thanks!
I’m not sure what you mean by
change "zn" and "se" in the url
but you might want to try this:
import os
import time
import urllib.parse
from pathlib import Path
from shutil import copyfileobj
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:104.0) Gecko/20100101 Firefox/104.0",
"Referer": "https://resultados.tse.jus.br/oficial/app/index.html",
}
def download_file(source_url: str, directory: str) -> None:
os.makedirs(directory, exist_ok=True)
save_dir = Path(directory)
file_name = (
f"LogDeUrna_Totalizado_{int(time.time())}"
f"_{source_url.rsplit('/', 1)[-1].replace('.logjez', '')}"
f".vscmr.zip"
)
destination = save_dir / file_name
with s.get(source_url, stream=True) as file, open(destination, "wb") as output:
copyfileobj(file.raw, output)
with requests.session() as s:
s.headers.update(headers)
base_url = 'https://resultados.tse.jus.br/oficial/app/index.html#/m/dados-da-urna;e=e545;uf=mg;ufbu=mg;mubu=40037;zn=0001;se=0101/log-de-urna'
# deconstruct the url to get the query parameters
url_parts = (
urllib.parse
.urlsplit(base_url.rstrip("/log-de-urna").rsplit('/', 1)[-1])
.path.split(';')[2:]
)
# build a map of the query parameters
d = dict([part.split('=') for part in url_parts])
# rebuild the API url with the query parameters
api_url = f"https://resultados.tse.jus.br/oficial/ele2022/arquivo-urna/407/dados/mg/"
f"{d['mubu']}/{d['zn']}/{d['se']}/"
f"p000407-mg-m{d['mubu']}-z{d['zn']}-s{d['se']}-aux.json"
# get the API response and extract the hashes
hashes = s.get(api_url).json()['hashes'][0]
# build the download url with the hashes and query parameters
zip_url = f"https://resultados.tse.jus.br/oficial/ele2022/arquivo-urna/407/dados/mg/"
f"{d['mubu']}/{d['zn']}/{d['se']}/"
f"{hashes['hash']}/{hashes['nmarq'][3]}"
# download the file
download_file(zip_url, 'zip_files')
This should download a file like this:
LogDeUrna_Totalizado_1667737288_o00407-4003700010101.vscmr.zip
Which should have this (after extraction it’s just a .dat
file):
shortened for brevity
24/09/2022 16:16:19 INFO 67305985 LOGD InÌcio das operaÁıes do logd 25DB7401100B3F08
24/09/2022 16:16:19 INFO 67305985 LOGD Urna ligada em 24/09/2022 ‡s 16:15:04 43FBAE51FE13D6F8
24/09/2022 16:16:19 INFO 67305985 SCUE Iniciando aplicaÁ„o - Oficial - 1∫ turno 46556A8AF2E2C406
24/09/2022 16:16:19 INFO 67305985 SCUE Vers„o da aplicaÁ„o: 8.26.0.0 - OnÁa-pintada 7827EE0DB8CA11F0
24/09/2022 16:16:21 INFO 67305985 SCUE Urna operando com rede elÈtrica 239A67E5FCA5752B
24/09/2022 16:16:21 INFO 67305985 SCUE Bateria interna com carga plena 6EE1D58F43F1C55E
24/09/2022 16:16:27 INFO 67305985 SCUE Tamanho da mÌdia interna: 488.7 MB 3C1DD481446A0EB8
24/09/2022 16:16:30 INFO 67305985 SCUE Tamanho da memÛria: 489.4 MB D77DD903E0DDB80B
24/09/2022 16:16:30 INFO 67305985 SCUE VerificaÁ„o de assinatura de aplicaÁ„o por etapa [1] - [/bin/avbin.vst] - [SUCESSO] 07D0A24BF9E343B3
24/09/2022 16:16:31 INFO 67305985 SCUE VerificaÁ„o de assinatura de aplicaÁ„o por etapa [2] - [/uenux/bin/avusrbin.vst] - [SUCESSO]
To answer Leonardo’s question about reading the zip file: The zip files are compressed with 7zip. To open it use:
with py7zr.SevenZipFile(path_to_zipfile, 'r') as zip_ref:
zip_ref.extractall(temp_dir)
baixa o arquivo log e descompacta ele dentro vc tera o arquivo .logjez renomeie este arquivo pra .zip descompacte ele e veja o logd.dat edit ele no ultraedit e veja tudo que esta no log, abraço.