Converting a csv to dict with multiple values
Question:
I have a csv that when loaded looks like this.
chicken, meat
veal, meat
rice, carbs
potato, carbs
carrot, veggies
mushroom, veggies
apples, fruits
I want to create a dictionary from it, so I’m using the code:
food = pd.read_csv('foods.csv', header=None, index_col=1, squeeze=False).to_dict()
When i print the dictionary only one item is showing for each key. Instead I would like all to show like this.
{'carbs':['potato','rice'],
'meat':['chicken','veal'],
'veggies':['mushroom','carrot'],
'fruits':['apples']}
Answers:
You can skip Pandas and deal with the file directly. Since you actually have a two character delimiter ', '
it is easier to skip csv too:
di={}
with open('/tmp/fruit.csv') as f:
for x,y in (line.rstrip().split(', ') for line in f):
di.setdefault(y, []).append(x)
>>> di
{'meat': ['chicken', 'veal'], 'carbs': ['rice', 'potato'], 'veggies': ['carrot', 'mushroom'], 'fruits': ['apples']}
Or use pandas:
df=pd.read_csv('/tmp/fruit.csv', header=None, sep=', ', engine='python').groupby([1])[0].agg(list).to_dict()
>>> df
{'carbs': ['rice', 'potato'], 'fruits': ['apples'], 'meat': ['chicken', 'veal'], 'veggies': ['carrot', 'mushroom']}
If you don’t need Pandas, this can be done easily enough using Python’s CSV reader.
import csv
from collections import defaultdict
category_food_map = defaultdict(list)
with open('foods.csv', newline='') as f:
reader = csv.reader(f)
for row in reader:
food, category = row
category = category.strip()
category_food_map[category].append(food)
for category, food in category_food_map.items():
print(f'{category}: {food}')
and I get:
meat: ['chicken', 'veal']
carbs: ['rice', 'potato']
veggies: ['carrot', 'mushroom']
fruits: ['apples']
There’s a pure python convtools library which provides lots of data processing primitives and fosters functional approach:
from convtools import conversion as c
from convtools.contrib.tables import Table
# store converter for future reuse
converter = (
c.group_by(c.item(1))
.aggregate(
{
# use c.ReduceFuncs.ArrayDistinct(c.item(0)) if you need only
# unique values in lists
c.item(1): c.ReduceFuncs.Array(c.item(0)),
}
)
.gen_converter()
)
# stream reading and processing if needed
rows = Table.from_csv(
"tmp2.csv", dialect=Table.csv_dialect(skipinitialspace=True)
).into_iter_rows(list)
result = converter(rows)
assert result == [
{'meat': ['chicken', 'veal']},
{'carbs': ['rice', 'potato']},
{'veggies': ['carrot', 'mushroom']},
{'fruits': ['apples']}]
df1.groupby(" meat").agg(list).chicken.to_dict()
out:
{' carbs': ['rice', 'potato'],
' fruits': ['apples'],
' meat': ['veal'],
' veggies': ['carrot', 'mushroom']}
I have a csv that when loaded looks like this.
chicken, meat
veal, meat
rice, carbs
potato, carbs
carrot, veggies
mushroom, veggies
apples, fruits
I want to create a dictionary from it, so I’m using the code:
food = pd.read_csv('foods.csv', header=None, index_col=1, squeeze=False).to_dict()
When i print the dictionary only one item is showing for each key. Instead I would like all to show like this.
{'carbs':['potato','rice'],
'meat':['chicken','veal'],
'veggies':['mushroom','carrot'],
'fruits':['apples']}
You can skip Pandas and deal with the file directly. Since you actually have a two character delimiter ', '
it is easier to skip csv too:
di={}
with open('/tmp/fruit.csv') as f:
for x,y in (line.rstrip().split(', ') for line in f):
di.setdefault(y, []).append(x)
>>> di
{'meat': ['chicken', 'veal'], 'carbs': ['rice', 'potato'], 'veggies': ['carrot', 'mushroom'], 'fruits': ['apples']}
Or use pandas:
df=pd.read_csv('/tmp/fruit.csv', header=None, sep=', ', engine='python').groupby([1])[0].agg(list).to_dict()
>>> df
{'carbs': ['rice', 'potato'], 'fruits': ['apples'], 'meat': ['chicken', 'veal'], 'veggies': ['carrot', 'mushroom']}
If you don’t need Pandas, this can be done easily enough using Python’s CSV reader.
import csv
from collections import defaultdict
category_food_map = defaultdict(list)
with open('foods.csv', newline='') as f:
reader = csv.reader(f)
for row in reader:
food, category = row
category = category.strip()
category_food_map[category].append(food)
for category, food in category_food_map.items():
print(f'{category}: {food}')
and I get:
meat: ['chicken', 'veal']
carbs: ['rice', 'potato']
veggies: ['carrot', 'mushroom']
fruits: ['apples']
There’s a pure python convtools library which provides lots of data processing primitives and fosters functional approach:
from convtools import conversion as c
from convtools.contrib.tables import Table
# store converter for future reuse
converter = (
c.group_by(c.item(1))
.aggregate(
{
# use c.ReduceFuncs.ArrayDistinct(c.item(0)) if you need only
# unique values in lists
c.item(1): c.ReduceFuncs.Array(c.item(0)),
}
)
.gen_converter()
)
# stream reading and processing if needed
rows = Table.from_csv(
"tmp2.csv", dialect=Table.csv_dialect(skipinitialspace=True)
).into_iter_rows(list)
result = converter(rows)
assert result == [
{'meat': ['chicken', 'veal']},
{'carbs': ['rice', 'potato']},
{'veggies': ['carrot', 'mushroom']},
{'fruits': ['apples']}]
df1.groupby(" meat").agg(list).chicken.to_dict()
out:
{' carbs': ['rice', 'potato'],
' fruits': ['apples'],
' meat': ['veal'],
' veggies': ['carrot', 'mushroom']}