how to plot the decision boundary of a polynomial logistic regression in python?
Question:
I have looked into the example on this website: https://scipython.com/blog/plotting-the-decision-boundary-of-a-logistic-regression-model/
I understand how they plot the decision boundary for a linear feature vector. But how would I plot the decision boundary if I apply
from sklearn.preprocessing import PolynomialFeatures
...
poly = PolynomialFeatures(degree = 3, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
to get a curved decision boundary? (I know it doesnt make a lot of sense for the example on the webiste, but it may be easier to talk about it).
I have tried to plot the resulting polynomial decision boundary by overlaying the polynomial plot but only got weird results like this:
So how could I do a curved decision boundary plot?
the edited code:
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
plt.rc('text', usetex=True)
plt.figure(dpi=1200)
pts = np.loadtxt(r'C:UsersstefaOneDriveDesktoplinpts.txt')
X = pts[:,:2]
Y = pts[:,2].astype('int')
poly = PolynomialFeatures(degree = 2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
# Retrieve the model parameters.
b = clf.intercept_[0]
w1, w2,w3,w4,w5 = clf.coef_.T
# In[]
def PolyCoefficients(x, coeffs):
""" Returns a polynomial for ``x`` values for the ``coeffs`` provided.
The coefficients must be in ascending order (``x**0`` to ``x**o``).
"""
o = len(coeffs)
print(f'# This is a polynomial of order {ord}.')
y = 0
for i in range(o):
y += coeffs[i]*x**i
return y
x = np.linspace(0, 9, 100)
coeffs = [b, w1, w2, w3, w4, w5]
plt.plot(x, PolyCoefficients(x, coeffs))
plt.show()
# In[]
# Calculate the intercept and gradient of the decision boundary.
c = -b/w2
m = -w1/w2
# Plot the data and the classification with the decision boundary.
xmin, xmax = -1, 2
ymin, ymax = -1, 2.5
xd = np.array([xmin, xmax])
yd = m*xd + c
#plt.plot(xd, yd, 'k', lw=1, ls='--')
plt.plot(x, PolyCoefficients(x, coeffs))
plt.fill_between(xd, yd, ymin, color='tab:blue', alpha=0.2)
plt.fill_between(xd, yd, ymax, color='tab:orange', alpha=0.2)
plt.scatter(*X[Y==0].T, s=8, alpha=0.5)
plt.scatter(*X[Y==1].T, s=8, alpha=0.5)
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.ylabel(r'$x_2$')
plt.xlabel(r'$x_1$')
plt.show()
Answers:
The output of your PolyCoefficients
function is a 4th order polynomial made up of:
coeffs[0]*x^0 + coeffs[1]*x^1 + coeffs[2]*x^2 + coeffs[3]*x^3 + coeffs[i]*x^4
Instead what you need is a 2nd order polynomial (specified by your parameter degree = 2
inside the sklearn.preprocessing.PolynomialFeatures
object), which is really the following:
(coeffs[0]*x1^1) + (coeffs[1]*x2^1) + (coeffs[2]*x1^2 + 2*coeffs[3]*x1*x2 + coeffs[4]*x2^2)
This formula is valid as soon as you are using two features x1 and x2, otherwise you would need to use x1
, x2
, …, xN
and all terms that derive from (x1 + x2 + ... + xN)^2
.
You can find more details and examples here.
Let me generate a demo data.
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
X = np.random.normal(size=(1000, 2))
Y = ((X[:,0] - X[:,1] + 0.4*X[:,0]*X[:,1] + 0.7*X[:,0]**2 - 0.8*X[:,1]**2 +
np.random.normal(scale=0.1, size=(1000,))) >= 0).astype(int)
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
Apart from the randomness, the data looks something like this.
Train the model like you did.
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
print(poly.powers_)
#[[1 0]
# [0 1]
# [2 0]
# [1 1]
# [0 2]]
This tells us that the features are ordered as: x1, x2, x1^2, x1*x2, x2^2
.
So collect the coefficients and the intercept and give them intuitive names.
w1, w2, w11, w12, w22 = clf.coef_[0]
b = clf.intercept_[0]
By definition, the decision boundary is a set of (x1, x2) such that the probability is even between the two classes. Mathematically, they are the solutions to:
b + w1*x1 + w2*x2 + w11*x1^2 + w12*x1*x2 + w22x2^2 = 0
If we fix x1
, then this is a quadratic equation of x2
, which we can solve analytically. The following function does this job.
def boundary(x1):
# returns x2 on the boundary for a given x1
# we solve square equation
# a x^2 + b x + c = 0
# --> x = (-b +- sqrt(b^2 - 4ac)) / 2a
a_ = w22
b_ = w2 + w12 * x1
c_ = b + w1*x1 + w11*x1**2
tmp = b_**2 - 4*a_*c_
if tmp < 0:
return None
ans = [(-b_ + tmp**0.5) / (2*a_), (-b_ - tmp**0.5) / (2*a_)]
ans.sort() # smaller first
return ans
# compute the boundaries
xs = np.linspace(X[:,0].min(), X[:,0].max(), num=100)
ys_1 = []
ys_2 = []
for x1 in xs:
tmp = boundary(x1)
if tmp is None:
ys_1.append(None)
ys_2.append(None)
else:
ys_1.append(tmp[0]) # smaller boundary
ys_2.append(tmp[1]) # larger boundary
Now we have the boundaries as data, we can visualize them easily.
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
plt.plot(xs, ys_1, c="green")
plt.plot(xs, ys_2, c="gray")
# if ys contains None, need to skip them
plt.fill_between(xs, ys_1, ys_2, color='tab:blue', alpha=0.2)
plt.fill_between(xs, min(ys_1), ys_1, color='tab:orange', alpha=0.2)
plt.fill_between(xs, ys_2, max(ys_2), color='tab:orange', alpha=0.2)
Notice that the boundaries can be explicitly computed because the model is quadratic. Different approaches are needed for more general, complex classifiers.
An easier, generally applicable approach is to create dummy data containing various combination of variables and let the classifier predict, and plot with the color given by the predicted class.
xs = np.linspace(X[:,0].min(), X[:,0].max(), num=100)
ys = np.linspace(X[:,1].min(), X[:,1].max(), num=100)
newX = []
for x1 in xs:
for x2 in ys:
newX.append((x1, x2))
newX = np.array(newX)
p = clf.predict(poly.transform(newX))
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
flg = (p > 0)
plt.scatter(newX[flg,0], newX[flg,1], alpha=0.02, c="tab:blue", marker="s", s=20)
plt.scatter(newX[~flg,0], newX[~flg,1], alpha=0.02, c="tab:orange", marker="s", s=20)
I have looked into the example on this website: https://scipython.com/blog/plotting-the-decision-boundary-of-a-logistic-regression-model/
I understand how they plot the decision boundary for a linear feature vector. But how would I plot the decision boundary if I apply
from sklearn.preprocessing import PolynomialFeatures
...
poly = PolynomialFeatures(degree = 3, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
to get a curved decision boundary? (I know it doesnt make a lot of sense for the example on the webiste, but it may be easier to talk about it).
I have tried to plot the resulting polynomial decision boundary by overlaying the polynomial plot but only got weird results like this:
So how could I do a curved decision boundary plot?
the edited code:
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
plt.rc('text', usetex=True)
plt.figure(dpi=1200)
pts = np.loadtxt(r'C:UsersstefaOneDriveDesktoplinpts.txt')
X = pts[:,:2]
Y = pts[:,2].astype('int')
poly = PolynomialFeatures(degree = 2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
# Retrieve the model parameters.
b = clf.intercept_[0]
w1, w2,w3,w4,w5 = clf.coef_.T
# In[]
def PolyCoefficients(x, coeffs):
""" Returns a polynomial for ``x`` values for the ``coeffs`` provided.
The coefficients must be in ascending order (``x**0`` to ``x**o``).
"""
o = len(coeffs)
print(f'# This is a polynomial of order {ord}.')
y = 0
for i in range(o):
y += coeffs[i]*x**i
return y
x = np.linspace(0, 9, 100)
coeffs = [b, w1, w2, w3, w4, w5]
plt.plot(x, PolyCoefficients(x, coeffs))
plt.show()
# In[]
# Calculate the intercept and gradient of the decision boundary.
c = -b/w2
m = -w1/w2
# Plot the data and the classification with the decision boundary.
xmin, xmax = -1, 2
ymin, ymax = -1, 2.5
xd = np.array([xmin, xmax])
yd = m*xd + c
#plt.plot(xd, yd, 'k', lw=1, ls='--')
plt.plot(x, PolyCoefficients(x, coeffs))
plt.fill_between(xd, yd, ymin, color='tab:blue', alpha=0.2)
plt.fill_between(xd, yd, ymax, color='tab:orange', alpha=0.2)
plt.scatter(*X[Y==0].T, s=8, alpha=0.5)
plt.scatter(*X[Y==1].T, s=8, alpha=0.5)
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
plt.ylabel(r'$x_2$')
plt.xlabel(r'$x_1$')
plt.show()
The output of your PolyCoefficients
function is a 4th order polynomial made up of:
coeffs[0]*x^0 + coeffs[1]*x^1 + coeffs[2]*x^2 + coeffs[3]*x^3 + coeffs[i]*x^4
Instead what you need is a 2nd order polynomial (specified by your parameter degree = 2
inside the sklearn.preprocessing.PolynomialFeatures
object), which is really the following:
(coeffs[0]*x1^1) + (coeffs[1]*x2^1) + (coeffs[2]*x1^2 + 2*coeffs[3]*x1*x2 + coeffs[4]*x2^2)
This formula is valid as soon as you are using two features x1 and x2, otherwise you would need to use x1
, x2
, …, xN
and all terms that derive from (x1 + x2 + ... + xN)^2
.
You can find more details and examples here.
Let me generate a demo data.
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
X = np.random.normal(size=(1000, 2))
Y = ((X[:,0] - X[:,1] + 0.4*X[:,0]*X[:,1] + 0.7*X[:,0]**2 - 0.8*X[:,1]**2 +
np.random.normal(scale=0.1, size=(1000,))) >= 0).astype(int)
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
Apart from the randomness, the data looks something like this.
Train the model like you did.
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)
# Fit the data to a logistic regression model.
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_poly, Y)
print(poly.powers_)
#[[1 0]
# [0 1]
# [2 0]
# [1 1]
# [0 2]]
This tells us that the features are ordered as: x1, x2, x1^2, x1*x2, x2^2
.
So collect the coefficients and the intercept and give them intuitive names.
w1, w2, w11, w12, w22 = clf.coef_[0]
b = clf.intercept_[0]
By definition, the decision boundary is a set of (x1, x2) such that the probability is even between the two classes. Mathematically, they are the solutions to:
b + w1*x1 + w2*x2 + w11*x1^2 + w12*x1*x2 + w22x2^2 = 0
If we fix x1
, then this is a quadratic equation of x2
, which we can solve analytically. The following function does this job.
def boundary(x1):
# returns x2 on the boundary for a given x1
# we solve square equation
# a x^2 + b x + c = 0
# --> x = (-b +- sqrt(b^2 - 4ac)) / 2a
a_ = w22
b_ = w2 + w12 * x1
c_ = b + w1*x1 + w11*x1**2
tmp = b_**2 - 4*a_*c_
if tmp < 0:
return None
ans = [(-b_ + tmp**0.5) / (2*a_), (-b_ - tmp**0.5) / (2*a_)]
ans.sort() # smaller first
return ans
# compute the boundaries
xs = np.linspace(X[:,0].min(), X[:,0].max(), num=100)
ys_1 = []
ys_2 = []
for x1 in xs:
tmp = boundary(x1)
if tmp is None:
ys_1.append(None)
ys_2.append(None)
else:
ys_1.append(tmp[0]) # smaller boundary
ys_2.append(tmp[1]) # larger boundary
Now we have the boundaries as data, we can visualize them easily.
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
plt.plot(xs, ys_1, c="green")
plt.plot(xs, ys_2, c="gray")
# if ys contains None, need to skip them
plt.fill_between(xs, ys_1, ys_2, color='tab:blue', alpha=0.2)
plt.fill_between(xs, min(ys_1), ys_1, color='tab:orange', alpha=0.2)
plt.fill_between(xs, ys_2, max(ys_2), color='tab:orange', alpha=0.2)
Notice that the boundaries can be explicitly computed because the model is quadratic. Different approaches are needed for more general, complex classifiers.
An easier, generally applicable approach is to create dummy data containing various combination of variables and let the classifier predict, and plot with the color given by the predicted class.
xs = np.linspace(X[:,0].min(), X[:,0].max(), num=100)
ys = np.linspace(X[:,1].min(), X[:,1].max(), num=100)
newX = []
for x1 in xs:
for x2 in ys:
newX.append((x1, x2))
newX = np.array(newX)
p = clf.predict(poly.transform(newX))
flg = (Y > 0)
plt.scatter(X[flg,0], X[flg,1], alpha=0.3, marker="o")
plt.scatter(X[~flg,0], X[~flg,1], alpha=0.3, marker="x")
flg = (p > 0)
plt.scatter(newX[flg,0], newX[flg,1], alpha=0.02, c="tab:blue", marker="s", s=20)
plt.scatter(newX[~flg,0], newX[~flg,1], alpha=0.02, c="tab:orange", marker="s", s=20)