Web Scraping with Python – loop through list of URLs and convert to CSV

Question:

I have a list of URLs that I would like to convert and save to CSVs on a local drive. I would also like to take a substring of the URL for the filename. This is the code I currently have but it’s only writing the first URL data to 2 separate files.

import csv
import requests
from bs4 import BeautifulSoup

link =
['https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_19.html',
        'https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_20.html']

def get_data(link):
    for url in link:
        res = requests.get(url)
        soup = BeautifulSoup(res.text,"lxml")

        for items in soup.select("table.table tr"):
            td = [item.get_text(strip=True) for item in items.select("th,td")]
            writer.writerow(td)

if __name__ == '__main__':
    for f in link:        
        f2 = f.split('audit/')[-1].split('.html')[0]   
        with open(f2 + '.csv',"w",newline="") as infile: 
            writer = csv.writer(infile)
            get_data(link)
Asked By: 6114617

||

Answers:

You don’t need to loop over link again in get_data(). You can just send the url to get_data in you main loop:

import csv
import requests
from bs4 import BeautifulSoup

link = ['https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_19.html',
        'https://www.health.ny.gov/statistics/sparcs/reports/audit/Emergency_Department_20.html']

def get_data(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.text,"lxml")

    for items in soup.select("table.table tr"):
        td = [item.get_text(strip=True) for item in items.select("th,td")]
        writer.writerow(td)

if __name__ == '__main__':
    for f in link:
        f2 = f.split('audit/')[-1].split('.html')[0]
        with open(f2 + '.csv',"w",newline="") as infile:
            writer = csv.writer(infile)
            get_data(f)
Answered By: Marcelo Paco