How can I plot a dictionary with several keys and different values under each key?
Question:
I have a dictionary that prints out like this:
t_onset {'Accuracy': 0.7086659064994298, 'Precision': 0.6666666666666666, 'Recall': 0.00390625, 'F1': 0.007766990291262136, 'Accuracy Training': 0.7086659064994298, 'Precision Training': 0.6666666666666666, 'Recall Training': 0.00390625, 'F1 Training': 0.007766990291262136}
t_max {'Accuracy': 0.7080957810718358, 'Precision': 0.5, 'Recall': 0.00390625, 'F1': 0.007751937984496124, 'Accuracy Training': 0.7080957810718358, 'Precision Training': 0.5, 'Recall Training': 0.00390625, 'F1 Training': 0.007751937984496124}
And I want to print it out as a histogram that has a set of 2 columns nearby for each of the test and train instances of the metrics. There should be in total 2 graphs, 1 for each of the keys, and each of them has these featured columns
For example, like this:
enter image description here
Whenever I try to iterate through the keys, I get errors. I can’t figure out how to nest the results. I have tried to look upon some resources but they all seem to plot out dictionaries that have only 1 set of keys
Example of histogram code I tried to write:
import matplotlib.pyplot as plt
import numpy as np
# Define the data for each key and metric
keys = ['t_onset', 't_max']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1']
data_train = [[0.7087, 0.6667, 0.0039, 0.0078], [0.7081, 0.5000, 0.0039, 0.0078]]
data_test = [[0.7087, 0.6667, 0.0039, 0.0078], [0.7081, 0.5000, 0.0039, 0.0078]]
for i, metric in enumerate(metrics):
# Training data
train_data = [t_onset[f'{metric} Training'], t_max[f'{metric} Training']
axes[i].hist(train_data, color='blue', alpha=0.5, label='Training')
# Test data
test_data = [t_onset[f'{metric}'], t_max[f'{metric}']]
axes[i].hist(test_data, color='green', alpha=0.5, label='Test')
# Add labels and titles
axes[i].set_xlabel(metric)
axes[i].set_ylabel('Frequency')
axes[i].set_title(f'{metric} for Training and Test Data')
axes[i].legend()
plt.tight_layout()
plt.show()
Answers:
With Pandas
and Matplotlib
, you can do something like:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame([t_onset, t_max], index=['Onset', 'Max']).T
ax = df.plot.bar(title='Comparison', xlabel='Metrics', ylabel='Value',
figsize=(10, 8), rot=45)
plt.tight_layout()
plt.show()
print(df)
# Output
Onset Max
Accuracy 0.708666 0.708096
Precision 0.666667 0.500000
Recall 0.003906 0.003906
F1 0.007767 0.007752
Accuracy Training 0.708666 0.708096
Precision Training 0.666667 0.500000
Recall Training 0.003906 0.003906
F1 Training 0.007767 0.007752
Output:
Update
Alternative:
# Define the data for each key and metric
keys = ['t_onset', 't_max']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1']
data_train = [[0.7087, 0.6667, 0.0039, 0.0078], [0.7081, 0.5000, 0.0039, 0.0078]]
data_test = [[0.7087, 0.6667, 0.0039, 0.0078], [0.7081, 0.5000, 0.0039, 0.0078]]
# Dataframes
df_train = pd.DataFrame(data_train, index=keys, columns=metrics)
df_test = pd.DataFrame(data_test, index=keys, columns=metrics)
df = pd.concat([df_train, df_test], keys=['Train', 'Test'], axis=0).T
# Chart
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
df.loc[:, (slice(None), 't_onset')].droplevel(1, axis=1).plot.bar(ax=ax1, rot=45, title='t_onset')
df.loc[:, (slice(None), 't_max')].droplevel(1, axis=1).plot.bar(ax=ax2, rot=45, title='t_max')
fig.tight_layout()
plt.show()
Output:
I have a dictionary that prints out like this:
t_onset {'Accuracy': 0.7086659064994298, 'Precision': 0.6666666666666666, 'Recall': 0.00390625, 'F1': 0.007766990291262136, 'Accuracy Training': 0.7086659064994298, 'Precision Training': 0.6666666666666666, 'Recall Training': 0.00390625, 'F1 Training': 0.007766990291262136}
t_max {'Accuracy': 0.7080957810718358, 'Precision': 0.5, 'Recall': 0.00390625, 'F1': 0.007751937984496124, 'Accuracy Training': 0.7080957810718358, 'Precision Training': 0.5, 'Recall Training': 0.00390625, 'F1 Training': 0.007751937984496124}
And I want to print it out as a histogram that has a set of 2 columns nearby for each of the test and train instances of the metrics. There should be in total 2 graphs, 1 for each of the keys, and each of them has these featured columns
For example, like this:
enter image description here
Whenever I try to iterate through the keys, I get errors. I can’t figure out how to nest the results. I have tried to look upon some resources but they all seem to plot out dictionaries that have only 1 set of keys
Example of histogram code I tried to write:
import matplotlib.pyplot as plt
import numpy as np
# Define the data for each key and metric
keys = ['t_onset', 't_max']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1']
data_train = [[0.7087, 0.6667, 0.0039, 0.0078], [0.7081, 0.5000, 0.0039, 0.0078]]
data_test = [[0.7087, 0.6667, 0.0039, 0.0078], [0.7081, 0.5000, 0.0039, 0.0078]]
for i, metric in enumerate(metrics):
# Training data
train_data = [t_onset[f'{metric} Training'], t_max[f'{metric} Training']
axes[i].hist(train_data, color='blue', alpha=0.5, label='Training')
# Test data
test_data = [t_onset[f'{metric}'], t_max[f'{metric}']]
axes[i].hist(test_data, color='green', alpha=0.5, label='Test')
# Add labels and titles
axes[i].set_xlabel(metric)
axes[i].set_ylabel('Frequency')
axes[i].set_title(f'{metric} for Training and Test Data')
axes[i].legend()
plt.tight_layout()
plt.show()
With Pandas
and Matplotlib
, you can do something like:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame([t_onset, t_max], index=['Onset', 'Max']).T
ax = df.plot.bar(title='Comparison', xlabel='Metrics', ylabel='Value',
figsize=(10, 8), rot=45)
plt.tight_layout()
plt.show()
print(df)
# Output
Onset Max
Accuracy 0.708666 0.708096
Precision 0.666667 0.500000
Recall 0.003906 0.003906
F1 0.007767 0.007752
Accuracy Training 0.708666 0.708096
Precision Training 0.666667 0.500000
Recall Training 0.003906 0.003906
F1 Training 0.007767 0.007752
Output:
Update
Alternative:
# Define the data for each key and metric
keys = ['t_onset', 't_max']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1']
data_train = [[0.7087, 0.6667, 0.0039, 0.0078], [0.7081, 0.5000, 0.0039, 0.0078]]
data_test = [[0.7087, 0.6667, 0.0039, 0.0078], [0.7081, 0.5000, 0.0039, 0.0078]]
# Dataframes
df_train = pd.DataFrame(data_train, index=keys, columns=metrics)
df_test = pd.DataFrame(data_test, index=keys, columns=metrics)
df = pd.concat([df_train, df_test], keys=['Train', 'Test'], axis=0).T
# Chart
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))
df.loc[:, (slice(None), 't_onset')].droplevel(1, axis=1).plot.bar(ax=ax1, rot=45, title='t_onset')
df.loc[:, (slice(None), 't_max')].droplevel(1, axis=1).plot.bar(ax=ax2, rot=45, title='t_max')
fig.tight_layout()
plt.show()
Output: