python extracting values from lists inside a list
Question:
With Selenium Webdriver I search for input value in google search. Then take links one by one, run them and take emails from each URL.
[[], [], [], [], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], [], [], ['[email protected]'], ['[email protected]'], ['[email protected]'], ['[email protected]'], ['[email protected]'], ['[email protected]'], [], [], [], []] [[], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]'], ['[email protected]'], ['[email protected]']]
The result is lists of lists. I need to take lists out of them to be str
values inside a list.
#open google
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.keys import Keys
chrome_options = Options()
chrome_options.headless = False
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
driver.get('https://www.google.com/')
#paste - write name
#var_inp=input('Write the name to search:')
var_inp='dermalog lf10'
#search for image
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys(var_inp+Keys.RETURN)
#find first 10 companies
res_lst=[]
res=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,'cite')))
print(len(res))
for r in res:
res_lst.append(driver.execute_script("return arguments[0].firstChild.textContent;", r))
print(res_lst)
#take email addresses from company
import re
emails_lst=[]
for i in range(len(res_lst)):
driver.get(res_lst[i])
email_pattern = r"[A-Za-z0-9._%+-][email protected][A-Za-z0-9.-]+.[A-Z|a-z]{2,4}"
html = driver.page_source
emails = re.findall(email_pattern, html)
driver.implicitly_wait(5)
print(emails)
emails_lst.append(emails)
print(emails_lst)
no_duplicates=[x for n, x in enumerate(emails_lst) if x not in emails_lst[:n]]
print(no_duplicates)
driver.close()
#send email
Answers:
To create a list out of a list of lists you can use the following logic:
list = [['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]']]
flat_list = []
for sublist in list:
for item in sublist:
flat_list.append(item)
print(flat_list)
As an alternative you can have nested iterations in a single list comprehension as follows:
list = [['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]']]
flatened_list = [item for sublist in list for item in sublist]
print(flatened_list)
Console output:
['[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]']
With Selenium Webdriver I search for input value in google search. Then take links one by one, run them and take emails from each URL.
[[], [], [], [], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], [], [], ['[email protected]'], ['[email protected]'], ['[email protected]'], ['[email protected]'], ['[email protected]'], ['[email protected]'], [], [], [], []] [[], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]'], ['[email protected]'], ['[email protected]']]
The result is lists of lists. I need to take lists out of them to be str
values inside a list.
#open google
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.keys import Keys
chrome_options = Options()
chrome_options.headless = False
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
driver.get('https://www.google.com/')
#paste - write name
#var_inp=input('Write the name to search:')
var_inp='dermalog lf10'
#search for image
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, "q"))).send_keys(var_inp+Keys.RETURN)
#find first 10 companies
res_lst=[]
res=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,'cite')))
print(len(res))
for r in res:
res_lst.append(driver.execute_script("return arguments[0].firstChild.textContent;", r))
print(res_lst)
#take email addresses from company
import re
emails_lst=[]
for i in range(len(res_lst)):
driver.get(res_lst[i])
email_pattern = r"[A-Za-z0-9._%+-][email protected][A-Za-z0-9.-]+.[A-Z|a-z]{2,4}"
html = driver.page_source
emails = re.findall(email_pattern, html)
driver.implicitly_wait(5)
print(emails)
emails_lst.append(emails)
print(emails_lst)
no_duplicates=[x for n, x in enumerate(emails_lst) if x not in emails_lst[:n]]
print(no_duplicates)
driver.close()
#send email
To create a list out of a list of lists you can use the following logic:
list = [['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]']]
flat_list = []
for sublist in list:
for item in sublist:
flat_list.append(item)
print(flat_list)
As an alternative you can have nested iterations in a single list comprehension as follows:
list = [['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]'], ['[email protected]', '[email protected]']]
flatened_list = [item for sublist in list for item in sublist]
print(flatened_list)
Console output:
['[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]', '[email protected]']