Custom torch.nn.Module not learning, even though grad_fn=MmBackward

Question:

I am training a model to predict pose using a custom Pytorch model. However, V1 below never learns (params don’t change). The output is connected to the backdrop graph and grad_fn=MmBackward.

I can’t understand why V1 isn’t learning but V2 is?

V1

class cam_pose_transform_V1(torch.nn.Module):
    def __init__(self):
        super(cam_pose_transform, self).__init__()
        self.elevation_x_rotation_radians = torch.nn.Parameter(torch.normal(0., 1e-6, size=()))
        self.azimuth_y_rotation_radians = torch.nn.Parameter(torch.normal(0., 1e-6, size=()))
        self.z_rotation_radians = torch.nn.Parameter(torch.normal(0., 1e-6, size=()))

    def forward(self, x):
        exp_i = torch.zeros((4,4))

        c1 = torch.cos(self.elevation_x_rotation_radians)
        s1 = torch.sin(self.elevation_x_rotation_radians)
        c2 = torch.cos(self.azimuth_y_rotation_radians)
        s2 = torch.sin(self.azimuth_y_rotation_radians)
        c3 = torch.cos(self.z_rotation_radians)
        s3 = torch.sin(self.z_rotation_radians)

        rotation_in_matrix = torch.tensor([
            [c2, s2 * s3, c3 * s2],
            [s1 * s2, c1 * c3 - c2 * s1 * s3, -c1 * s3 - c2 * c3 * s1],
            [-c1 * s2, c3 * s1 + c1 * c2 * s3, c1 * c2 * c3 - s1 * s3]
        ], requires_grad=True)

        exp_i[:3, :3] = rotation_in_matrix
        exp_i[3, 3] = 1.

        return torch.matmul(exp_i, x)

However, this version learns as expected (params and loss change) and also has grad_fn=MmBackward on the output:

V2


def vec2ss_matrix(vector):  # vector to skewsym. matrix

    ss_matrix = torch.zeros((3,3))
    ss_matrix[0, 1] = -vector[2]
    ss_matrix[0, 2] = vector[1]
    ss_matrix[1, 0] = vector[2]
    ss_matrix[1, 2] = -vector[0]
    ss_matrix[2, 0] = -vector[1]
    ss_matrix[2, 1] = vector[0]

    return ss_matrix


class cam_pose_transform_V2(torch.nn.Module):
    def __init__(self):
        super(camera_transf, self).__init__()
        self.w = torch.nn.Parameter(torch.normal(0., 1e-6, size=(3,)))
        self.v = torch.nn.Parameter(torch.normal(0., 1e-6, size=(3,)))
        self.theta = torch.nn.Parameter(torch.normal(0., 1e-6, size=()))

    def forward(self, x):
        exp_i = torch.zeros((4,4))
        w_skewsym = vec2ss_matrix(self.w)
        v_skewsym = vec2ss_matrix(self.v)
        exp_i[:3, :3] = torch.eye(3) + torch.sin(self.theta) * w_skewsym + (1 - torch.cos(self.theta)) * torch.matmul(w_skewsym, w_skewsym)
        exp_i[:3, 3] = torch.matmul(torch.eye(3) * self.theta + (1 - torch.cos(self.theta)) * w_skewsym + (self.theta - torch.sin(self.theta)) * torch.matmul(w_skewsym, w_skewsym), self.v)
        exp_i[3, 3] = 1.

        return torch.matmul(exp_i, x)

Update #1

In the training loop I printed the .grad attributes using:

print([i.grad for i in list(cam_pose.parameters())])
loss.backward()
print([i.grad for i in list(cam_pose.parameters())])

Results:

# V1
[None, None, None]
[None, None, None]

# V2
[None, None, None]
[tensor([-0.0032,  0.0025, -0.0053]), tensor([ 0.0016, -0.0013,  0.0054]), tensor(-0.0559)]

Nothing else in the code was changed, just swapped V1 model for V2.

Asked By: aktabit

||

Answers:

this is your problem right here:

rotation_in_matrix = torch.tensor([
    [c2, s2 * s3, c3 * s2],
    [s1 * s2, c1 * c3 - c2 * s1 * s3, -c1 * s3 - c2 * c3 * s1],
    [-c1 * s2, c3 * s1 + c1 * c2 * s3, c1 * c2 * c3 - s1 * s3]], requires_grad=True)

you are creating a tensor out of a list of tensors, which is not a differentiable operation — i.e. there’s no gradient flow from rotation_in_matrix to its elements c1..c3

the solution would be to create the rotation_in_matrix using tensor operations like stack and cat instead

Answered By: KonstantinosKokos