How to write all scraped data to csv?

Question

I have a PYTHON code to scrape data from a website and write it into a CSV file. But after running my code, only the last row(joblink) is displayed in my excel while other rows are empty with the headers only.

Please how do I fix? Below is my codeblock.



for x in range(1, 210):

    html_text = requests.get(f'https://www.timesjobs.com/candidate/job-search.html?from=submit&actualTxtKeywords=Python&searchBy=0&rdoOperator=OR&searchType=personalizedSearch&luceneResultSize=25&postWeek=60&txtKeywords=Python&pDate=I&sequence={x}&startPage=1').text

    soup = BeautifulSoup(html_text, 'lxml')

    jobs = soup.find_all('li', class_ = 'clearfix job-bx wht-shd-bx')

    with open('jobberman.csv', 'w+', newline = '',  encoding = 'utf-8') as f:
        header = ['Company Name', 'Keyskill', 'Joblink']
        writer = csv.writer(f, delimiter = '')
        
        writer.writerow(header)
    
    
        for job in jobs:
            company_name = job.find('h3', class_ = 'joblist-comp-name').text.replace(' ','')
            keyskill = job.find('span', class_ = 'srp-skills').text.replace(' ','')
            joblink = job.header.h2.a['href']


            print(f"Company Name: {company_name.strip()}")
            print(f"Required Skills: {keyskill.strip()}")
            print(f"Joblink: {joblink}")


            print('')

            joblist = [company_name, keyskill, joblink]
            writer.writerow(joblist)

Asked By: Miracle

||

Source

Answer 1

Main issue is, that you overwrite your contents in each iteration, so perform your outer for-loop while file is open.

...
with open('jobberman.csv', 'w+', newline = '',  encoding = 'utf-8') as f:
    header = ['Company Name', 'Keyskill', 'Joblink']
    writer = csv.writer(f)
    writer.writerow(header)

    for x in range(1, 120):
        html_text = requests.get(f'https://www.timesjobs.com/candidate/job-search.html?from=submit&actualTxtKeywords=Python&searchBy=0&rdoOperator=OR&searchType=personalizedSearch&luceneResultSize=25&postWeek=60&txtKeywords=Python&pDate=I&sequence={x}&startPage=1').text
        soup = BeautifulSoup(html_text, 'lxml')
        jobs = soup.find_all('li', class_ = 'clearfix job-bx wht-shd-bx')

        for job in jobs:
            company_name = job.find('h3', class_ = 'joblist-comp-name').get_text(strip=True)
            keyskill = job.find('span', class_ = 'srp-skills').get_text(strip=True)
            joblink = job.header.h2.a['href']

            joblist = [company_name, keyskill, joblink]
            writer.writerow(joblist)

Example

import csv
from csv import writer
from bs4 import BeautifulSoup

with open('jobberman.csv', 'w+', newline = '',  encoding = 'utf-8') as f:
    header = ['Company Name', 'Keyskill', 'Joblink']
    writer = csv.writer(f)
    writer.writerow(header)

    for x in range(1, 120):
        #### requesting and scraping info      
        joblist = ['Company Name'+str(x), 'Keyskill'+str(x), 'Joblink'+str(x)]
        writer.writerow(joblist)

Output

Company Name,Keyskill,Joblink
Company Name1,Keyskill1,Joblink1
Company Name2,Keyskill2,Joblink2
Company Name3,Keyskill3,Joblink3
Company Name4,Keyskill4,Joblink4
Company Name5,Keyskill5,Joblink5
Company Name6,Keyskill6,Joblink6
Company Name7,Keyskill7,Joblink7

Answered By: HedgeHog

Answer 2

Same here. Can’t access the site. But give this a try:

import requests
import pandas as pd
from bs4 import BeautifulSoup

df = pd.DataFrame([], columns = ['Company Name', 'Keyskill', 'Joblink'])
df.to_csv('jobberman.csv', index=False)   
for x in range(1, 210):
    html_text = requests.get(f'https://www.timesjobs.com/candidate/job-search.html?from=submit&actualTxtKeywords=Python&searchBy=0&rdoOperator=OR&searchType=personalizedSearch&luceneResultSize=25&postWeek=60&txtKeywords=Python&pDate=I&sequence={x}&startPage=1').text
    soup = BeautifulSoup(html_text, 'lxml')
    jobs = soup.find_all('li', class_ = 'clearfix job-bx wht-shd-bx')

    rows = []
    for job in jobs:
        company_name = job.find('h3', class_ = 'joblist-comp-name').text.replace(' ','')
        keyskill = job.find('span', class_ = 'srp-skills').text.replace(' ','')
        joblink = job.header.h2.a['href']

        row = {
            'Company Name':company_name.strip(), 
            'Keyskill': keyskill.strip(), 
            'Joblink': joblink}
        
        rows.append(row)
        
        print(f"Company Name: {company_name.strip()}")
        print(f"Required Skills: {keyskill.strip()}")
        print(f"Joblink: {joblink}")

        print('') 
    
    df = pd.DataFrame(rows)
    df.to_csv('jobberman.csv', mode='a', header=False, index=False)

Answered By: chitown88

How to write all scraped data to csv?

Question:

Answers:

Example

Output