Basic Python Questions

Question:

The way I’ve done this is rather cumbersome. How would I be able to adjust the below code for the link to be amended with the country codes in the list, and load these json links, rather than unioning different dataframes.

Your help is much appreciated!

import urllib.request
import json
import pandas as pd
from datetime import datetime

countries = ["nl", "us", "se"]

## load Dutch top episodes chart
with urllib.request.urlopen("https://podcastcharts.byspotify.com/api/charts/top_episodes?    region=nl") as url_NL:
dataFrameNL = json.load(url_NL)
print(dataFrameNL)

## load US top episodes chart
with urllib.request.urlopen("https://podcastcharts.byspotify.com/api/charts/top_episodes?region=us") as url_US:
dataFrameUS = json.load(url_US)
print(dataFrameUS)

# creating the dataframe 
## NL
dfNL = pd.json_normalize(dataFrameNL)
## US
dfUS = pd.json_normalize(dataFrameUS)

## add scraped_date
dfNL['scraped_date'] = pd.Timestamp.today().strftime('%Y-%m-%d')
dfUS['scraped_date'] = pd.Timestamp.today().strftime('%Y-%m-%d')

## add rank
dfNL["rank"] = dfNL.index + 1
dfUS["rank"] = dfNL.index + 1

## add country
dfNL['country'] = 'NL'
dfUS['country'] = 'US'

## concetenate 
union_dataframes = pd.concat([dfNL, dfUS])

## create file name with date output
file_name = 'mycsvfile' + str(datetime.today().strftime('%Y-%m-%d')) + '.csv'

# converted a file to csv
union_dataframes.to_csv(file_name, encoding='utf-8', index=False)

I’m loading different datasets and concatenating them rather then using a loop function over a list.

Asked By: jsb92

||

Answers:

Create loop and processing each value of country for list of DataFrames, last outside loop join together by concat():

from pathlib import Path
import pandas as pd

countries = ['nl', 'us', 'se']
url_base = 'https://podcastcharts.byspotify.com/api/charts/top_episodes'
today = pd.Timestamp.today().strftime('%Y-%m-%d')

dfs = []
for country in countries:
    # dynamic set country by f-string
    with urllib.request.urlopen(f'{url_base}?region={country}') as url:
        dataFrame = json.load(url)
    
    df = pd.json_normalize(dataFrame)
    
    # add scraped_date
    df['scraped_date'] = today
    
    # add rank
    df['rank'] = dfNL.index + 1
    
    # add country, dynamic generate uppercase country name
    df['country'] = country.upper()
    dfs.append(df)

# concatenate
union_dataframes = pd.concat(dfs)

# create file name with date output
file_path = Path(f'mycsvfile{today}.csv')

# converted a file to csv
union_dataframes.to_csv(file_path, encoding='utf-8', index=False)

EDIT:

from pathlib import Path
import pandas as pd

countries = ['nl', 'us', 'se']

url_base = 'podcastcharts.byspotify.com/api/'
today = pd.Timestamp.today().strftime('%Y-%m-%d')

dfs = []
for country in countries:
    for category in categories:
        # dynamic set country by f-string
        with urllib.request.urlopen(f'{url_base}charts{category}?region={country}') as url:
            dataFrame = json.load(url)
        
        df = pd.json_normalize(dataFrame)
        
        # add scraped_date
        df['scraped_date'] = today
        
        # add rank
        df['rank'] = dfNL.index + 1
        
        # add country, dynamic generate uppercase country name
        df['country'] = country.upper()
        df['category'] = category
        dfs.append(df)

# concatenate
union_dataframes = pd.concat(dfs)

# create file name with date output
file_path = Path(f'mycsvfile{today}.csv')

# converted a file to csv
union_dataframes.to_csv(file_path, encoding='utf-8', index=False)
Answered By: jezrael
Categories: questions Tags: , , ,
Answers are sorted by their score. The answer accepted by the question owner as the best is marked with
at the top-right corner.