Matplotlib plotting nan values

Question:

I am trying to plot a bar graph in matplotlib, but somehow it plots the nan values.

import pandas as pd
import matplotlib.pyplot as plt
import math 


engine_data= pd.read_excel('edb-emissions-databank_v28c_web.xlsx',
                    sheet_name = 'nvPM Emissions')

manufacturers = engine_data.value_counts('Manufacturer').index.values


def Average(engine_type, column, manufacturer):
   
    averages = []
    for manufacturer in manufacturers:  
        average = engine_data[(engine_data.iloc[:,2]== manufacturer) & (engine_data.iloc[:,5] == engine_type)][column].mean()
        averages.append(average)
    
    return averages


TF_particles = Average('TF','nvPM LTO Total Mass (mg)', manufacturers)
MTF_particles = Average('MTF','nvPM LTO Total Mass (mg)', manufacturers) 


results_TF = list(zip(manufacturers, TF_particles))
results_MTF = list(zip(manufacturers, MTF_particles))


for result in results_TF:
    if math.isnan(result[1]) == True:
        results_TF.remove(result)


for result in results_MTF:
    if math.isnan(result[1]) == True:
        results_MTF.remove(result)
        
fig ,ax = plt.subplots(1,2,figsize = (150,50))

for result in results_TF:
    ax[0].bar(result[0], result[1], color = 'red')

for result in results_MTF:
    ax[1].bar(result[0], result[1], color = 'blue')

ax[0].tick_params(axis='x', labelsize=70)
ax[0].tick_params(axis='y', labelsize=70)

ax[1].tick_params(axis='x', labelsize=30)
ax[1].tick_params(axis='y', labelsize=30)

plt.show()

I tried removing the nan values, but while I managed to remove some, there are still some that remain. I am plotting the relationship between the engine manufacturers and their average emissions on a LTO cycle. I have two graphs because I separate the two engine types that I have : TF = turbofan and MTF = mixed turbofan. The NaNs are caused by the fact that some manufacturers don’t have any TF engines or vice-versa, so when I do the zip, I get some nan values.

This is what I get

Asked By: Dan

||

Answers:

You’re iterating over results_TF and results_MTF while trying to remove items from both lists so it ends up skipping and not catching all the nan values. It might be easier just to make new lists instead of modifying them then adding that to your barplot instead.

# new lists
results_TF_filtered = [(m, p) for (m, p) in results_TF if pd.notnull(p)]
results_MTF_filtered = [(m, p) for (m, p) in results_MTF if pd.notnull(p)]

fig ,ax = plt.subplots(1,2,figsize = (150,50))

# plotting bar chart
ax[0].bar([m for (m, _) in results_TF_filtered], [p for (_, p) in results_TF_filtered], color = 'red')
ax[1].bar([m for (m, _) in results_MTF_filtered], [p for (_, p) in results_MTF_filtered], color = 'blue')

Updated Code:

import pandas as pd
import matplotlib.pyplot as plt


engine_data= pd.read_excel('edb-emissions-databank_v28c_web.xlsx',
                    sheet_name = 'nvPM Emissions')

manufacturers = engine_data.value_counts('Manufacturer').index.values


def Average(engine_type, column, manufacturer):
   
    averages = []
    for manufacturer in manufacturers:  
        average = engine_data[(engine_data.iloc[:,2]== manufacturer) & (engine_data.iloc[:,5] == engine_type)][column].mean()
        averages.append(average)
    
    return averages


TF_particles = Average('TF','nvPM LTO Total Mass (mg)', manufacturers)
MTF_particles = Average('MTF','nvPM LTO Total Mass (mg)', manufacturers) 


results_TF = list(zip(manufacturers, TF_particles))
results_MTF = list(zip(manufacturers, MTF_particles))

# new lists
results_TF_filtered = [(m, p) for (m, p) in results_TF if pd.notnull(p)]
results_MTF_filtered = [(m, p) for (m, p) in results_MTF if pd.notnull(p)]

fig ,ax = plt.subplots(1,2,figsize = (150,50))

# plotting bar chart
ax[0].bar([m for (m, _) in results_TF_filtered], [p for (_, p) in results_TF_filtered], color = 'red')
ax[1].bar([m for (m, _) in results_MTF_filtered], [p for (_, p) in results_MTF_filtered], color = 'blue')

ax[0].tick_params(axis='x', labelsize=70)
ax[0].tick_params(axis='y', labelsize=70)

ax[1].tick_params(axis='x', labelsize=30)
ax[1].tick_params(axis='y', labelsize=30)

plt.show()
Answered By: tamarajqawasmeh