I can't download the product image
Question:
I need to know how I can download the second product image, as it is inside a TAG in quotes.
An alternative would also be to just copy the image link to save in a DF.
import pandas as pd
import xlsxwriter
import pyautogui
import urllib.request
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.get("https://shopee.com.br/Tapete-Sala-1-00-X-1-50-Peludo-Shaggy-Macio-Quarto-Felpudo-
i.346235717.7779211526")
driver.maximize_window()
df = pd.read_excel (r"C:__Imagens e Planilhas PythonShopeeVideosVideos.xlsx")
for index, row in df.iterrows():
links = driver.get(str(row["links"]))
sleep(5)
video = driver.find_element(By. CLASS_NAME, "_1OPdfl")
sleep(5)
atributoSrc = video.get_attribute("src")
print(atributoSrc)
try:
urllib.request.urlretrieve(atributoSrc,r"C:__Imagens e Planilhas PythonShopeeVideos
Baixadosnome" + str(row["salvar"]) + ".mp4")
except:
print("error")
Answers:
This should download all the product images:
#import requests
imgDivs = driver.find_elements(By.CSS_SELECTOR, '._1OPdfl > ._2PWsS4')
for i in range(len(imgDivs)):
d = imgDivs[i].get_attribute('style')
imgUrl = d.split('url("')[1].split('")')[0]
with open(f"img_{i}.jpeg", "wb") as f:
f.write(requests.get(imgUrl).content)
f.close()
print(imgUrl)
The enlarged image gets downloaded twice. You can change the find_elements
arguments to be pickier about the images.
EDIT: to reduce duplicates you might try this:
def getImgUrl(imgDiv):
d = imgDiv.get_attribute('style')
iUrl = d.split('url("')[1].split('")')[0]
if iUrl.endswith('_tn'):
iUrl = iUrl[:-3]
return iUrl
imgDivs = driver.find_elements(By.CSS_SELECTOR, '._1OPdfl > ._2PWsS4')
imgUrls = [getImgUrl(iDiv) for iDiv in imgDivs]
imgUrls = list(set(imgUrls)) # keep only unique urls
imgTot = len(imgUrls)
for i in range(imgTot):
imgUrl = imgUrls[i]
with open(f"img_{i}.jpeg", "wb") as f:
f.write(requests.get(imgUrl).content)
f.close()
print(f'{i+1} of {imgTot} links: ', imgUrl)
since the link to the thumbnail seems to be the same as for the enlarged version – but with ‘_tn’ added to the end. BUT, if they change their naming conventions this might cause errors
thanks, following your logic, I’ll build my own line of codes that worked
import pandas as pd
import xlsxwriter
import pyautogui
import urllib.request
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.get("https://www.google.com.br")
driver.maximize_window()
df = pd.read_excel (r"C:__Imagens e Planilhas
PythonShopeeVideosLinksAfiliado.xlsx")
Link = []
for index, row in df.iterrows():
links = driver.get(str(row["links"]))
sleep(5)
classe = driver.find_element(By. CLASS_NAME, "_3DKwBj")
sleep(5)
atributo = classe.get_attribute("style")
link_imagem = atributo.split('url("')[1].split('")')[0]
print(link_imagem)
Link.append(link_imagem)
data = {'Link Imagem': Link}
df = pd.DataFrame(data)
df.to_excel(r"C:__Imagens e Planilhas PythonShopeeVideoslinkimagens.xlsx",
engine='xlsxwriter')
print(df)
I need to know how I can download the second product image, as it is inside a TAG in quotes.
An alternative would also be to just copy the image link to save in a DF.
import pandas as pd
import xlsxwriter
import pyautogui
import urllib.request
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.get("https://shopee.com.br/Tapete-Sala-1-00-X-1-50-Peludo-Shaggy-Macio-Quarto-Felpudo-
i.346235717.7779211526")
driver.maximize_window()
df = pd.read_excel (r"C:__Imagens e Planilhas PythonShopeeVideosVideos.xlsx")
for index, row in df.iterrows():
links = driver.get(str(row["links"]))
sleep(5)
video = driver.find_element(By. CLASS_NAME, "_1OPdfl")
sleep(5)
atributoSrc = video.get_attribute("src")
print(atributoSrc)
try:
urllib.request.urlretrieve(atributoSrc,r"C:__Imagens e Planilhas PythonShopeeVideos
Baixadosnome" + str(row["salvar"]) + ".mp4")
except:
print("error")
This should download all the product images:
#import requests
imgDivs = driver.find_elements(By.CSS_SELECTOR, '._1OPdfl > ._2PWsS4')
for i in range(len(imgDivs)):
d = imgDivs[i].get_attribute('style')
imgUrl = d.split('url("')[1].split('")')[0]
with open(f"img_{i}.jpeg", "wb") as f:
f.write(requests.get(imgUrl).content)
f.close()
print(imgUrl)
The enlarged image gets downloaded twice. You can change the find_elements
arguments to be pickier about the images.
EDIT: to reduce duplicates you might try this:
def getImgUrl(imgDiv):
d = imgDiv.get_attribute('style')
iUrl = d.split('url("')[1].split('")')[0]
if iUrl.endswith('_tn'):
iUrl = iUrl[:-3]
return iUrl
imgDivs = driver.find_elements(By.CSS_SELECTOR, '._1OPdfl > ._2PWsS4')
imgUrls = [getImgUrl(iDiv) for iDiv in imgDivs]
imgUrls = list(set(imgUrls)) # keep only unique urls
imgTot = len(imgUrls)
for i in range(imgTot):
imgUrl = imgUrls[i]
with open(f"img_{i}.jpeg", "wb") as f:
f.write(requests.get(imgUrl).content)
f.close()
print(f'{i+1} of {imgTot} links: ', imgUrl)
since the link to the thumbnail seems to be the same as for the enlarged version – but with ‘_tn’ added to the end. BUT, if they change their naming conventions this might cause errors
thanks, following your logic, I’ll build my own line of codes that worked
import pandas as pd
import xlsxwriter
import pyautogui
import urllib.request
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
driver = webdriver.Chrome(options=options)
driver.get("https://www.google.com.br")
driver.maximize_window()
df = pd.read_excel (r"C:__Imagens e Planilhas
PythonShopeeVideosLinksAfiliado.xlsx")
Link = []
for index, row in df.iterrows():
links = driver.get(str(row["links"]))
sleep(5)
classe = driver.find_element(By. CLASS_NAME, "_3DKwBj")
sleep(5)
atributo = classe.get_attribute("style")
link_imagem = atributo.split('url("')[1].split('")')[0]
print(link_imagem)
Link.append(link_imagem)
data = {'Link Imagem': Link}
df = pd.DataFrame(data)
df.to_excel(r"C:__Imagens e Planilhas PythonShopeeVideoslinkimagens.xlsx",
engine='xlsxwriter')
print(df)