ValueError: shapes (120,6) and (7,) not aligned: 6 (dim 1) != 7 (dim 0)
Question:
I’m trying to implement multiclass classification with logistic regression on an Iris.csv dataset from Kaggle. This is my code.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def standardize(X_tr): # (x-Mean(x))/std(X) Normalizes data
for i in range(X_tr.shape[1]):
X_tr[:, i] = (X_tr[:, i] - np.mean(X_tr[:, i])) / np.std(X_tr[:, i])
return X_tr
def sigmoid(z): #Sigmoid/Logistic function
sig = 1 / (1 + np.exp(-z))
return sig
def cost(theta, X, y):
z = np.dot(X, theta)
cost0 = y.T.dot(np.log(sigmoid(z)))
cost1 = (1 - y).T.dot(np.log(1 - sigmoid(z)))
cost = -((cost1 + cost0)) / len(y)
return cost
def initialize(X): #Initializing X feature matrix and Theta vector
thetas = np.zeros((X.shape[1] + 1, len(np.unique(y))))
X = np.c_[np.ones((X.shape[0], 1)), X] # adding 691 rows of ones as the first column in X
return thetas, X
def fit(X, y, alpha=0.01, iterations=1000): # Gradient Descent
thetas_list = []
X = np.c_[np.ones((X.shape[0], 1)), X]
for i in range(len(np.unique(y))):
y_one_vs_all = np.where(y == np.unique(y)[i], 1, 0)
thetas, _ = initialize(X)
for j in range(iterations):
z = np.dot(X, thetas[:, i])
h = sigmoid(z)
gradient = np.dot(X.T, (h - y_one_vs_all)) / len(y)
thetas[:, i] -= alpha * gradient
thetas_list.append(thetas[:, i])
global gthetas
gthetas = thetas_list
return None
def predict(X):
X = np.c_[np.ones((X.shape[0], 1)), X]
predictions = []
for sample in X:
probs = []
for thetas in gthetas:
z = np.dot(sample, thetas)
probs.append(sigmoid(z))
predictions.append(np.argmax(probs) + 1)
return predictions
# load data
df = pd.read_csv("Iris.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
# convert class categorical values to numerical values
df['Species'].replace('Iris-setosa', 1, inplace=True)
df['Species'].replace('Iris-versicolor', 2, inplace=True)
df['Species'].replace('Iris-virginica', 3, inplace=True)
# prepare one-vs-all labels for multiclass classification
y1 = pd.DataFrame(np.zeros((len(y), len(np.unique(y)))))
for i in range(len(np.unique(y))):
for j in range(len(y1)):
if y[j] == np.unique(y)[i]:
y1.iloc[j, i] = 1
else:
y1.iloc[j, i] = 0
# split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2, random_state=0)
# standardize features
X_train = standardize(X_train)
X_test = standardize(X_test)
# fit logistic regression model
fit(X_train, y_train, alpha=0.01, iterations=400)
# make predictions on test set
predictions = predict(X_test)
print(predictions)
and below is the error I’m getting.
ValueError Traceback (most recent call last)
~AppDataLocalTemp/ipykernel_14368/3997569506.py in <module>
1 # standardize features
2 X_train = standardize(X_train)
----> 3 thetas_list = fit(X_train, y_train)
4 plt.scatter(range(len(cost_list)), cost_list, c="blue")
5 plt.show()
~AppDataLocalTemp/ipykernel_14368/3827160719.py in fit(X, y, alpha, iter)
6 thetas, _ = initialize(X)
7 for j in range(iter):
----> 8 z = dot(X, thetas[:, i])
9 h = sigmoid(z)
10 gradient = dot(X.T, (h - y_one_vs_all)) / len(y)
<__array_function__ internals> in dot(*args, **kwargs)
ValueError: shapes (120,6) and (7,) not aligned: 6 (dim 1) != 7 (dim 0)
Any help in fixing this error will be appreciated. I’ve looked into other answers on stack overflow and I still can’t figure this out.
What I’m trying to accomplish: The following code is used as a base to create columns in y (outcome, Species class) for each class. So, the number of columns in y1 (given below) will be equal to the total number classes. That is 3 for iris flower dataset. For instance, for the first column (class is 1 which is Iris-setosa), whichever row in dataset is ‘Iris-setosa’ will be marked as 1 in y1’s corresponding row. Any other class (classes 2 and 3: Iris-versicolor and Iris-virginica) will be marked as 0 in y1’s first column.
for i in range(len(np.unique(y))):
for j in range(len(y1)):
if y[j] == np.unique(y)[i]:
y1.iloc[j, i] = 1 #one vs. all
else:
y1.iloc[j, i] = 0 #all others will be 0
Answers:
You should check how the theta array is built. You are initializing it with in the initialize
function with the shape (X.shape[1] + 1, nrows)
. The errors tells you that you can not calculate the dot product between an array X
with shape (a, b)
and theta with shape (b+1, c)
. You can try to remove the +1
in the theta definition. Then you will find another problem regarding the gradient, where you are trying to calculate again a dot product between two incompatible arrays. I hope this helps you, good luck!
I’m trying to implement multiclass classification with logistic regression on an Iris.csv dataset from Kaggle. This is my code.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def standardize(X_tr): # (x-Mean(x))/std(X) Normalizes data
for i in range(X_tr.shape[1]):
X_tr[:, i] = (X_tr[:, i] - np.mean(X_tr[:, i])) / np.std(X_tr[:, i])
return X_tr
def sigmoid(z): #Sigmoid/Logistic function
sig = 1 / (1 + np.exp(-z))
return sig
def cost(theta, X, y):
z = np.dot(X, theta)
cost0 = y.T.dot(np.log(sigmoid(z)))
cost1 = (1 - y).T.dot(np.log(1 - sigmoid(z)))
cost = -((cost1 + cost0)) / len(y)
return cost
def initialize(X): #Initializing X feature matrix and Theta vector
thetas = np.zeros((X.shape[1] + 1, len(np.unique(y))))
X = np.c_[np.ones((X.shape[0], 1)), X] # adding 691 rows of ones as the first column in X
return thetas, X
def fit(X, y, alpha=0.01, iterations=1000): # Gradient Descent
thetas_list = []
X = np.c_[np.ones((X.shape[0], 1)), X]
for i in range(len(np.unique(y))):
y_one_vs_all = np.where(y == np.unique(y)[i], 1, 0)
thetas, _ = initialize(X)
for j in range(iterations):
z = np.dot(X, thetas[:, i])
h = sigmoid(z)
gradient = np.dot(X.T, (h - y_one_vs_all)) / len(y)
thetas[:, i] -= alpha * gradient
thetas_list.append(thetas[:, i])
global gthetas
gthetas = thetas_list
return None
def predict(X):
X = np.c_[np.ones((X.shape[0], 1)), X]
predictions = []
for sample in X:
probs = []
for thetas in gthetas:
z = np.dot(sample, thetas)
probs.append(sigmoid(z))
predictions.append(np.argmax(probs) + 1)
return predictions
# load data
df = pd.read_csv("Iris.csv")
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
# convert class categorical values to numerical values
df['Species'].replace('Iris-setosa', 1, inplace=True)
df['Species'].replace('Iris-versicolor', 2, inplace=True)
df['Species'].replace('Iris-virginica', 3, inplace=True)
# prepare one-vs-all labels for multiclass classification
y1 = pd.DataFrame(np.zeros((len(y), len(np.unique(y)))))
for i in range(len(np.unique(y))):
for j in range(len(y1)):
if y[j] == np.unique(y)[i]:
y1.iloc[j, i] = 1
else:
y1.iloc[j, i] = 0
# split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2, random_state=0)
# standardize features
X_train = standardize(X_train)
X_test = standardize(X_test)
# fit logistic regression model
fit(X_train, y_train, alpha=0.01, iterations=400)
# make predictions on test set
predictions = predict(X_test)
print(predictions)
and below is the error I’m getting.
ValueError Traceback (most recent call last)
~AppDataLocalTemp/ipykernel_14368/3997569506.py in <module>
1 # standardize features
2 X_train = standardize(X_train)
----> 3 thetas_list = fit(X_train, y_train)
4 plt.scatter(range(len(cost_list)), cost_list, c="blue")
5 plt.show()
~AppDataLocalTemp/ipykernel_14368/3827160719.py in fit(X, y, alpha, iter)
6 thetas, _ = initialize(X)
7 for j in range(iter):
----> 8 z = dot(X, thetas[:, i])
9 h = sigmoid(z)
10 gradient = dot(X.T, (h - y_one_vs_all)) / len(y)
<__array_function__ internals> in dot(*args, **kwargs)
ValueError: shapes (120,6) and (7,) not aligned: 6 (dim 1) != 7 (dim 0)
Any help in fixing this error will be appreciated. I’ve looked into other answers on stack overflow and I still can’t figure this out.
What I’m trying to accomplish: The following code is used as a base to create columns in y (outcome, Species class) for each class. So, the number of columns in y1 (given below) will be equal to the total number classes. That is 3 for iris flower dataset. For instance, for the first column (class is 1 which is Iris-setosa), whichever row in dataset is ‘Iris-setosa’ will be marked as 1 in y1’s corresponding row. Any other class (classes 2 and 3: Iris-versicolor and Iris-virginica) will be marked as 0 in y1’s first column.
for i in range(len(np.unique(y))):
for j in range(len(y1)):
if y[j] == np.unique(y)[i]:
y1.iloc[j, i] = 1 #one vs. all
else:
y1.iloc[j, i] = 0 #all others will be 0
You should check how the theta array is built. You are initializing it with in the initialize
function with the shape (X.shape[1] + 1, nrows)
. The errors tells you that you can not calculate the dot product between an array X
with shape (a, b)
and theta with shape (b+1, c)
. You can try to remove the +1
in the theta definition. Then you will find another problem regarding the gradient, where you are trying to calculate again a dot product between two incompatible arrays. I hope this helps you, good luck!