Ray | AttributeError: 'BroadModel' object has no attribute 'model'

Question:

I am using ray tune to find to optimal hyperparameters value for this model:

class BroadModel(tune.Trainable):
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    def build_model(self, config):
        global convB2, drop2, convA2, poolA, poolB 
        window_size = 200
        self.x_gyro, self.x_acc, x_mag, q = load_data_train()
        self.Att_quat = Att_q(q)
        self.x_gyro_t, self.x_acc_t, x_mag_t, q_t = load_data_test()
        self.Att_quat_t = Att_q(q_t)

        self.x_gyro, self.x_acc, self.Att_quat = shuffle(self.x_gyro, self.x_acc, self.Att_quat)
        x1 = Input((window_size, 3), name='x1')
        x2 = Input((window_size, 3), name='x2')
        convA1 = Conv1D(config["Conv1DA"],11,padding='same',activation='relu')(x1)
        for i in range(config["Conv1DAn"]):
            if i > 0: 
                convA2 = Conv1D(config[f'Conv1DAn_{i}'],11,padding='same',activation='relu')(convA1)
        poolA = MaxPooling1D(3)(convA1)
        
        
        convB1 = Conv1D(config["Conv1DB"],11,padding='same',activation='relu')(x2)
        for i in range(config["Conv1DBn"]):
            if i > 0:
                convB2 = Conv1D(config[f'Conv1DBn_{i}'],11,padding='same',activation='relu')(convB1)
        poolB = MaxPooling1D(3)(convB1)
        AB = concatenate([poolA, poolB])
        
        lstm1 = Bidirectional(LSTM(config["LSTM1"], return_sequences=True))(AB)
        drop1 = Dropout(config['dropout'])(lstm1)
        for i in range(config['LSTMn']):
            if i > 0:
                lstm2 = Bidirectional(LSTM(config[f'LSTMn_{i}'], return_sequences=True))(drop1)
                drop1 =  Dropout(config['dropout'])(lstm2)   
        lstm2 = Bidirectional(LSTM(config['LSTMn_l']))(drop1)
        drop2 =  Dropout(config['dropout'])(lstm2)
        y1_pred = Dense(4,kernel_regularizer='l2')(drop2)
        model = Model(inputs =[x1, x2], outputs = [y1_pred])
        return model
    def setup(self, config):
        
        model = self.build_model(config)
    
        model.compile(
            optimizer=Adam(learning_rate=config['lr']),
            loss=quaternion_mean_multiplicative_error,
            metrics=[quaternion_mean_multiplicative_error],
        )
        self.model = model
        return model

But whenever I scale up my network by increasing the size of each layer from 50 to 100 or more or increasing the number of iterations from 10~20 to more than 40 I get some weird errors such as

> Failure # 1 (occurred at 2022-09-05_12-04-07)
> [36mray::ResourceTrainable.train()[39m (pid=35719,
> ip=192.168.91.120, repr=<ray.tune.trainable.util.BroadModel object at
> 0x7f478f107c40>)   File
> "/home/ssrc/asq/lib/python3.8/site-packages/ray/tune/trainable/trainable.py",
> line 347, in train
>     result = self.step()   File "ray_test.py", line 258, in step
>     self.model.fit( AttributeError: 'BroadModel' object has no attribute 'model'

This is the tunning code

if __name__ == "__main__":
    import ray
    from ray.tune.schedulers import PopulationBasedTraining
    

    pbt = PopulationBasedTraining(
        perturbation_interval=600,
        hyperparam_mutations={
            "dropout": tune.uniform(0.1,0.5),
            "lr": tune.uniform(1e-5,1e-3),
            "Conv1DA": tune.randint(10,15),
            "Conv1DAn": tune.choice([0,1,2,3]),
            "Conv1DAn_1": tune.randint(10,15),
            "Conv1DAn_2": tune.randint(10,15),
            "Conv1DAn_3": tune.randint(10,15),
            "Conv1DB": tune.randint(10,15),
            "Conv1DBn": tune.choice([0,1,2,3]),
            "Conv1DBn_1": tune.randint(10,15),
            "Conv1DBn_2": tune.randint(10,15),
            "Conv1DBn_3": tune.randint(10,15),
            "LSTM1": tune.randint(10,15),
            "LSTMn": tune.choice([0,1,2,3]),
            "LSTMn_1": tune.randint(10,15),
            "LSTMn_2": tune.randint(10,15),
            "LSTMn_3": tune.randint(10,15),
            "LSTMn_l": tune.randint(10,15),
           },
        
    )
    resources_per_trial = {"cpu": 10 , "gpu": 0}
    tuner = tune.Tuner(
         tune.with_resources(
        BroadModel,
        resources=resources_per_trial),
        run_config=air.RunConfig(
            name="BroadPBT"+timestr,
            stop={"training_iteration": 50},
        ),
        tune_config=tune.TuneConfig(
            reuse_actors=True,
            scheduler=pbt,
            metric="loss",
            mode="min",
            num_samples=2 ,
            
        ),
        param_space={
            "finish_fast": False,
            "batch_size": 1000,
            "epochs": 200,
            "dropout": tune.uniform(0.1,0.5),
            "lr": tune.uniform(1e-5,1e-3),
            "Conv1DA": tune.randint(10,15),
            "Conv1DAn": tune.choice([0,1,2,3]),
            "Conv1DAn_1": tune.randint(10,15),
            "Conv1DAn_2": tune.randint(10,15),
            "Conv1DAn_3": tune.randint(10,15),
            "Conv1DB": tune.randint(10,15),
            "Conv1DBn": tune.choice([0,1,2,3]),
            "Conv1DBn_1": tune.randint(10,15),
            "Conv1DBn_2": tune.randint(10,15),
            "Conv1DBn_3": tune.randint(10,15),
            "LSTM1": tune.randint(10,15),
            "LSTMn": tune.choice([0,1,2,3]),
            "LSTMn_1": tune.randint(10,15),
            "LSTMn_2": tune.randint(10,15),
            "LSTMn_3": tune.randint(10,15),
            "LSTMn_l": tune.randint(10,15),
            
            
        },
    )
    #tune.run(resources_per_trial={'gpu': 1}, tuner)
    tuner.fit()

What should I do?
As I mentioned before if I change the interaction numbers to less than 20, I don’t get any errors.

Asked By: Arman Asgharpoor

||

Answers:

The problem with using PBT to tune the network size is that it tries to modify these parameters mid-run – and this is usually undefined behavior. The reason for that is that you’ll either drop layers/nodes that potentially contain relevant information (when downscaling) or add randomly initialized nodes that do not contain any information (when upscaling) and in either case will usually render the rest of the network useless.

For PBT, you can mutate any parameters except for the network parameters.

Answered By: Kai

Add this code to your class:

def reset_config(self, new_config):
        self.config = new_config
        self.build_model(new_config)
        return True
Answered By: Arman_Asq