How to calculate the R^2 and RMSE considering multiple points against the mean python
Question:
I want to compare the categories points against the mean for each key. Assume the predicted value is the mean but the actual value is ( four separate values )
df = {'Key': ['1', '2','3', '4','1', '2','3', '4','1', '2','3', '4','1', '2','3', '4'],
'value': ['21', '24','35', '30','31', '34','25', '19','25', '14','29', '23','31', '34','15', '25'],
'category': ['M', 'M','M', 'M','K', 'K','K', 'K','T', 'T','T', 'T','Q', 'Q','Q', 'Q']}
# Create DataFrame
df = pd.DataFrame(df)
df["Key"] = pd.to_numeric(df["Key"])
df["value"] = pd.to_numeric(df["value"])
print(df)
s = df.pivot('Key', 'category', 'value')
ax = s.plot()
s.mean(1).plot(ax=ax, color='b', linestyle='--', label='Mean')
ax.legend()
#plt.annotate("r-squared = {:.3f}".format(r2_score(y_test, s.mean(1))), (0, 1))
Answers:
you can use this:
df["Key"] = pd.to_numeric(df["Key"])
df["value"] = pd.to_numeric(df["value"])
s = df.pivot('Key', 'category', 'value')
mean = s.mean(axis=1)
rmse = np.sqrt(mean_squared_error(s, np.tile(mean, (s.shape[1], 1)).T, multioutput='raw_values'))
print(rmse)
r_squared = r2_score(s, np.tile(mean, (s.shape[1], 1)).T, multioutput='raw_values')
print(r_squared)
I want to compare the categories points against the mean for each key. Assume the predicted value is the mean but the actual value is ( four separate values )
df = {'Key': ['1', '2','3', '4','1', '2','3', '4','1', '2','3', '4','1', '2','3', '4'],
'value': ['21', '24','35', '30','31', '34','25', '19','25', '14','29', '23','31', '34','15', '25'],
'category': ['M', 'M','M', 'M','K', 'K','K', 'K','T', 'T','T', 'T','Q', 'Q','Q', 'Q']}
# Create DataFrame
df = pd.DataFrame(df)
df["Key"] = pd.to_numeric(df["Key"])
df["value"] = pd.to_numeric(df["value"])
print(df)
s = df.pivot('Key', 'category', 'value')
ax = s.plot()
s.mean(1).plot(ax=ax, color='b', linestyle='--', label='Mean')
ax.legend()
#plt.annotate("r-squared = {:.3f}".format(r2_score(y_test, s.mean(1))), (0, 1))
you can use this:
df["Key"] = pd.to_numeric(df["Key"])
df["value"] = pd.to_numeric(df["value"])
s = df.pivot('Key', 'category', 'value')
mean = s.mean(axis=1)
rmse = np.sqrt(mean_squared_error(s, np.tile(mean, (s.shape[1], 1)).T, multioutput='raw_values'))
print(rmse)
r_squared = r2_score(s, np.tile(mean, (s.shape[1], 1)).T, multioutput='raw_values')
print(r_squared)