How to make a normal distribution graph from data frame in Python?
Question:
my question is how to make a normal distribution graph from data frame in Python. I can find many information to make such a graph from random numbers, but I don’t know how to make it from data frame.
First, I generated random numbers and made a data frame.
import numpy as np
import pandas
from pandas import DataFrame
cv1 = np.random.normal(50, 3, 1000)
source = {"Genotype": ["CV1"]*1000, "AGW": cv1}
Cultivar_1=DataFrame(source)
Then, I tried to make a normal distribution graph.
sns.kdeplot(data = Cultivar_1['AGW'])
plt.xlim([30,70])
plt.xlabel("Grain weight (mg)", size=12)
plt.ylabel("Frequency", size=12)
plt.grid(True, alpha=0.3, linestyle="--")
plt.show()
However, this is a density graph, not a normal distribution graph which is calculated using mean and standard deviation.
Could you let me know which codes I need to use to make a normal distribution graph?
Thanks!!
Answers:
I found one solution to make a normal distribution graph from data frame.
#Library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
#Generating data frame
x = np.random.normal(50, 3, 1000)
source = {"Genotype": ["CV1"]*1000, "AGW": x}
df = pd.DataFrame(source)
# Calculating mean and Stdev of AGW
df_mean = np.mean(df["AGW"])
df_std = np.std(df["AGW"])
# Calculating probability density function (PDF)
pdf = stats.norm.pdf(df["AGW"].sort_values(), df_mean, df_std)
# Drawing a graph
plt.plot(df["AGW"].sort_values(), pdf)
plt.xlim([30,70])
plt.xlabel("Grain weight (mg)", size=12)
plt.ylabel("Frequency", size=12)
plt.grid(True, alpha=0.3, linestyle="--")
plt.show()
#Loading dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
# Generating the dataframe
cv1 = np.random.normal(50, 3, 1000)
source = {"Genotype": ["CV1"]*1000, "AGW": cv1}
dataframe = pd.DataFrame(source)
# Calculating the mean and standard deviation of the parameter "AGW":
mean = dataframe["AGW"].mean()
std = dataframe["AGW"].std()
s = np.random.normal(mean, std, 100)
# This mean and standard deviation will be useful to create the normal distribution graph
# Creating the normal distribution graph for the column "AGW"
count, bins, ignored = plt.hist(s, 100, density=True)
# Mathematical representation/formula of the normal distribution
plt.plot(bins, 1/(std * np.sqrt(2 * np.pi)) *
np.exp( - (bins - mean)**2 / (2 * std**2) ),
linewidth=2, color='r')
# This is the direct function used in stats
pdf = stats.norm.pdf(dataframe["AGW"].sort_values(), mean, std)
plt.plot(dataframe["AGW"].sort_values(), pdf)
plt.xlabel("Grain weight (mg)", size=12)
plt.ylabel("Frequency", size=12)
plt.xlim([30,70])
plt.grid(True, alpha=0.3, linestyle="--")
plt.show()
my question is how to make a normal distribution graph from data frame in Python. I can find many information to make such a graph from random numbers, but I don’t know how to make it from data frame.
First, I generated random numbers and made a data frame.
import numpy as np
import pandas
from pandas import DataFrame
cv1 = np.random.normal(50, 3, 1000)
source = {"Genotype": ["CV1"]*1000, "AGW": cv1}
Cultivar_1=DataFrame(source)
Then, I tried to make a normal distribution graph.
sns.kdeplot(data = Cultivar_1['AGW'])
plt.xlim([30,70])
plt.xlabel("Grain weight (mg)", size=12)
plt.ylabel("Frequency", size=12)
plt.grid(True, alpha=0.3, linestyle="--")
plt.show()
However, this is a density graph, not a normal distribution graph which is calculated using mean and standard deviation.
Could you let me know which codes I need to use to make a normal distribution graph?
Thanks!!
I found one solution to make a normal distribution graph from data frame.
#Library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
#Generating data frame
x = np.random.normal(50, 3, 1000)
source = {"Genotype": ["CV1"]*1000, "AGW": x}
df = pd.DataFrame(source)
# Calculating mean and Stdev of AGW
df_mean = np.mean(df["AGW"])
df_std = np.std(df["AGW"])
# Calculating probability density function (PDF)
pdf = stats.norm.pdf(df["AGW"].sort_values(), df_mean, df_std)
# Drawing a graph
plt.plot(df["AGW"].sort_values(), pdf)
plt.xlim([30,70])
plt.xlabel("Grain weight (mg)", size=12)
plt.ylabel("Frequency", size=12)
plt.grid(True, alpha=0.3, linestyle="--")
plt.show()
#Loading dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
# Generating the dataframe
cv1 = np.random.normal(50, 3, 1000)
source = {"Genotype": ["CV1"]*1000, "AGW": cv1}
dataframe = pd.DataFrame(source)
# Calculating the mean and standard deviation of the parameter "AGW":
mean = dataframe["AGW"].mean()
std = dataframe["AGW"].std()
s = np.random.normal(mean, std, 100)
# This mean and standard deviation will be useful to create the normal distribution graph
# Creating the normal distribution graph for the column "AGW"
count, bins, ignored = plt.hist(s, 100, density=True)
# Mathematical representation/formula of the normal distribution
plt.plot(bins, 1/(std * np.sqrt(2 * np.pi)) *
np.exp( - (bins - mean)**2 / (2 * std**2) ),
linewidth=2, color='r')
# This is the direct function used in stats
pdf = stats.norm.pdf(dataframe["AGW"].sort_values(), mean, std)
plt.plot(dataframe["AGW"].sort_values(), pdf)
plt.xlabel("Grain weight (mg)", size=12)
plt.ylabel("Frequency", size=12)
plt.xlim([30,70])
plt.grid(True, alpha=0.3, linestyle="--")
plt.show()