Create a dictionary from a csv file where a specific column is the key and the rows are the values in Python
Question:
I have a csv file formatted as below:
jersey number, position, name, birth_date, birth_city, years_in_nba, team
23 , SF , Lebron, 12/30/84 , Akron , 19 , Lakers
30 , PG , Curry, 03/14/88 , Akron , 13 , Warriors
34 , PF , Giannis, 08/26/89 , Athens , 8 , Bucks
My goal is to be able to group the rows by position where the key is the different positions and and the values are a list of player info having in common the same position like below:
{
"SF": [{..player1..}, {..player2..}],
"PF": [{..player1..}, {..player2..}],
"SG": [{..player1..}, {..player2..}],
"PG": [{..player1..}, {..player2..}],
}
This is what I have so far:
positions = {}
def players_position():
with open(filename, 'r') as file_obj:
dict_reader = DictReader(file_obj, delimiter=",")
for row in dict_reader:
positions[row["position"]] = row
return positions
print(players_position())
with the code as it is, the current value will always override the previous one. I don’t want that. I want to have a list where we can keep appending player info like displayed above.
Answers:
Try this
positions = {}
def players_position():
with open(filename, 'r') as file_obj:
dict_reader = DictReader(file_obj, delimiter=",")
for row in dict_reader:
position = row["position"]
if position in positions:
positions[position].append(row)
else:
positions[position] = [row]
return positions
print(players_position())
The function checks if the position
is already in positions
. If it is, we simply append the current row
to the list of players already in that position. If it is not, we create a new list containing the current row
and assign it to positions[position]
.
Note that, this is based on your answer.
Here you go. I originally tried basing this off what you had, but because the data when split in the CSV raw data ends up with a bunch of whitespace padding, I did a little extra work to clean up the keys/values.
import csv
CSV = """
# jersey number, position, name, birth_date, birth_city, years_in_nba, team
# 23 , SF , Lebron, 12/30/84 , Akron , 19 , Lakers
# 30 , PG , Curry, 03/14/88 , Akron , 13 , Warriors
# 34 , PF , Giannis, 08/26/89 , Athens , 8 , Bucks
"""
def main():
players = [player for player in csv.DictReader(CSV.splitlines())]
# gives me:
"""
{None: ['# jersey number', ' position', ' name', ' birth_date', ' birth_city', ' years_in_nba', ' team']}
{None: ['# 23 ', ' SF ', ' Lebron', ' 12/30/84 ', ' Akron ', ' 19 ', ' Lakers']}
{None: ['# 30 ', ' PG ', ' Curry', ' 03/14/88 ', ' Akron ', ' 13 ', ' Warriors']}
{None: ['# 34 ', ' PF ', ' Giannis', ' 08/26/89 ', ' Athens ', ' 8 ', ' Bucks']}
"""
# strip the keys because extra whitespace, and because of how DictReader makes arrays, we have to do this:
keys = [k.strip() for k in [v for v in players.pop(0).values()][0]]
# then let's gather the players and strip the values so they don't have unnecessary spaces
players = [[p.strip() for p in list(p.values())[0]] for p in [v for v in players]]
# and combine (zip) the keys and values to make a dictionary
players_list = [dict(zip(keys, p)) for p in players]
# this gives me:
"""
{'# jersey number': '# 23', 'position': 'SF', 'name': 'Lebron', 'birth_date': '12/30/84', 'birth_city': 'Akron', 'years_in_nba': '19', 'team': 'Lakers'}
{'# jersey number': '# 30', 'position': 'PG', 'name': 'Curry', 'birth_date': '03/14/88', 'birth_city': 'Akron', 'years_in_nba': '13', 'team': 'Warriors'}
{'# jersey number': '# 34', 'position': 'PF', 'name': 'Giannis', 'birth_date': '08/26/89', 'birth_city': 'Athens', 'years_in_nba': '8', 'team': 'Bucks'}
"""
# last, create a dictionary of lists grouping by position using a list comprehension
# you could do this in a for loop, but we're writing python, after all...
players_by_position = {
position: [
player for player in players_list if player['position'] == position
] for position in set([player['position'] for player in players_list])
}
return players_by_position
"""
{
'SF': [
{
'# jersey number': '# 23',
'position': 'SF',
'name': 'Lebron',
'birth_date': '12/30/84',
'birth_city': 'Akron',
'years_in_nba': '19',
'team': 'Lakers'
}
],
'PG': [
{
'# jersey number': '# 30',
'position': 'PG',
'name': 'Curry',
'birth_date': '03/14/88',
'birth_city': 'Akron',
'years_in_nba': '13',
'team': 'Warriors'
}
],
'PF': [
{
'# jersey number': '# 34',
'position': 'PF',
'name': 'Giannis',
'birth_date': '08/26/89',
'birth_city': 'Athens',
'years_in_nba': '8',
'team': 'Bucks'
}
]
}
"""
if __name__ == "__main__":
print(main())
I have a csv file formatted as below:
jersey number, position, name, birth_date, birth_city, years_in_nba, team
23 , SF , Lebron, 12/30/84 , Akron , 19 , Lakers
30 , PG , Curry, 03/14/88 , Akron , 13 , Warriors
34 , PF , Giannis, 08/26/89 , Athens , 8 , Bucks
My goal is to be able to group the rows by position where the key is the different positions and and the values are a list of player info having in common the same position like below:
{
"SF": [{..player1..}, {..player2..}],
"PF": [{..player1..}, {..player2..}],
"SG": [{..player1..}, {..player2..}],
"PG": [{..player1..}, {..player2..}],
}
This is what I have so far:
positions = {}
def players_position():
with open(filename, 'r') as file_obj:
dict_reader = DictReader(file_obj, delimiter=",")
for row in dict_reader:
positions[row["position"]] = row
return positions
print(players_position())
with the code as it is, the current value will always override the previous one. I don’t want that. I want to have a list where we can keep appending player info like displayed above.
Try this
positions = {}
def players_position():
with open(filename, 'r') as file_obj:
dict_reader = DictReader(file_obj, delimiter=",")
for row in dict_reader:
position = row["position"]
if position in positions:
positions[position].append(row)
else:
positions[position] = [row]
return positions
print(players_position())
The function checks if the position
is already in positions
. If it is, we simply append the current row
to the list of players already in that position. If it is not, we create a new list containing the current row
and assign it to positions[position]
.
Note that, this is based on your answer.
Here you go. I originally tried basing this off what you had, but because the data when split in the CSV raw data ends up with a bunch of whitespace padding, I did a little extra work to clean up the keys/values.
import csv
CSV = """
# jersey number, position, name, birth_date, birth_city, years_in_nba, team
# 23 , SF , Lebron, 12/30/84 , Akron , 19 , Lakers
# 30 , PG , Curry, 03/14/88 , Akron , 13 , Warriors
# 34 , PF , Giannis, 08/26/89 , Athens , 8 , Bucks
"""
def main():
players = [player for player in csv.DictReader(CSV.splitlines())]
# gives me:
"""
{None: ['# jersey number', ' position', ' name', ' birth_date', ' birth_city', ' years_in_nba', ' team']}
{None: ['# 23 ', ' SF ', ' Lebron', ' 12/30/84 ', ' Akron ', ' 19 ', ' Lakers']}
{None: ['# 30 ', ' PG ', ' Curry', ' 03/14/88 ', ' Akron ', ' 13 ', ' Warriors']}
{None: ['# 34 ', ' PF ', ' Giannis', ' 08/26/89 ', ' Athens ', ' 8 ', ' Bucks']}
"""
# strip the keys because extra whitespace, and because of how DictReader makes arrays, we have to do this:
keys = [k.strip() for k in [v for v in players.pop(0).values()][0]]
# then let's gather the players and strip the values so they don't have unnecessary spaces
players = [[p.strip() for p in list(p.values())[0]] for p in [v for v in players]]
# and combine (zip) the keys and values to make a dictionary
players_list = [dict(zip(keys, p)) for p in players]
# this gives me:
"""
{'# jersey number': '# 23', 'position': 'SF', 'name': 'Lebron', 'birth_date': '12/30/84', 'birth_city': 'Akron', 'years_in_nba': '19', 'team': 'Lakers'}
{'# jersey number': '# 30', 'position': 'PG', 'name': 'Curry', 'birth_date': '03/14/88', 'birth_city': 'Akron', 'years_in_nba': '13', 'team': 'Warriors'}
{'# jersey number': '# 34', 'position': 'PF', 'name': 'Giannis', 'birth_date': '08/26/89', 'birth_city': 'Athens', 'years_in_nba': '8', 'team': 'Bucks'}
"""
# last, create a dictionary of lists grouping by position using a list comprehension
# you could do this in a for loop, but we're writing python, after all...
players_by_position = {
position: [
player for player in players_list if player['position'] == position
] for position in set([player['position'] for player in players_list])
}
return players_by_position
"""
{
'SF': [
{
'# jersey number': '# 23',
'position': 'SF',
'name': 'Lebron',
'birth_date': '12/30/84',
'birth_city': 'Akron',
'years_in_nba': '19',
'team': 'Lakers'
}
],
'PG': [
{
'# jersey number': '# 30',
'position': 'PG',
'name': 'Curry',
'birth_date': '03/14/88',
'birth_city': 'Akron',
'years_in_nba': '13',
'team': 'Warriors'
}
],
'PF': [
{
'# jersey number': '# 34',
'position': 'PF',
'name': 'Giannis',
'birth_date': '08/26/89',
'birth_city': 'Athens',
'years_in_nba': '8',
'team': 'Bucks'
}
]
}
"""
if __name__ == "__main__":
print(main())