How to write/load machine learning model to/from S3 bucket through joblib?
Question:
I have an ml model which I want to save on an S3 bucket.
from lightgbm.sklearn import LGBMClassifier
# Initialize model
mdl_lightgbm = LGBMClassifier(boosting_type='rf', objective='binary')
# Fit data
mdl_lightgbm.fit(X,Y)
# Save model to dictionary
mdl_dict = {'mdl_fitted':mdl_lightgbm}
For some reasons, I’m storing the fitted model in a dictionary. The idea is to dump/load the model through joblib to/from an S3 bucket.
Answers:
Save model to S3
Based on the idea of this question, the following function let you save the model to an s3 bucket or locally through joblib:
import boto3
from io import BytesIO
def write_joblib(file, path):
'''
Function to write a joblib file to an s3 bucket or local directory.
Arguments:
* file: The file that you want to save
* path: an s3 bucket or local directory path.
'''
# Path is an s3 bucket
if path[:5] == 's3://':
s3_bucket, s3_key = path.split('/')[2], path.split('/')[3:]
s3_key = '/'.join(s3_key)
with BytesIO() as f:
joblib.dump(file, f)
f.seek(0)
boto3.client("s3").upload_fileobj(Bucket=s3_bucket, Key=s3_key, Fileobj=f)
# Path is a local directory
else:
with open(path, 'wb') as f:
joblib.dump(file, f)
In your example, if you want to save your model to an s3 bucket, just type
write_joblib(mdl_dict, 's3://bucket_name/mdl_dict.joblib')
Load model from s3
Additionaly, following the idea on this question, the following function let’s you load the model from an s3 bucket or a local file
def read_joblib(path):
'''
Function to load a joblib file from an s3 bucket or local directory.
Arguments:
* path: an s3 bucket or local directory path where the file is stored
Outputs:
* file: Joblib file loaded
'''
# Path is an s3 bucket
if path[:5] == 's3://':
s3_bucket, s3_key = path.split('/')[2], path.split('/')[3:]
s3_key = '/'.join(s3_key)
with BytesIO() as f:
boto3.client("s3").download_fileobj(Bucket=s3_bucket, Key=s3_key, Fileobj=f)
f.seek(0)
file = joblib.load(f)
# Path is a local directory
else:
with open(path, 'rb') as f:
file = joblib.load(f)
return file
In your case, to load the file from the same s3 bucket use the following line of code
mdl_lightgbm = read_joblib('s3://bucket_name/mdl_dict.joblib')
mdl_lightgbm = mdl_lightgbm['mdl_fitted']
I have an ml model which I want to save on an S3 bucket.
from lightgbm.sklearn import LGBMClassifier
# Initialize model
mdl_lightgbm = LGBMClassifier(boosting_type='rf', objective='binary')
# Fit data
mdl_lightgbm.fit(X,Y)
# Save model to dictionary
mdl_dict = {'mdl_fitted':mdl_lightgbm}
For some reasons, I’m storing the fitted model in a dictionary. The idea is to dump/load the model through joblib to/from an S3 bucket.
Save model to S3
Based on the idea of this question, the following function let you save the model to an s3 bucket or locally through joblib:
import boto3
from io import BytesIO
def write_joblib(file, path):
'''
Function to write a joblib file to an s3 bucket or local directory.
Arguments:
* file: The file that you want to save
* path: an s3 bucket or local directory path.
'''
# Path is an s3 bucket
if path[:5] == 's3://':
s3_bucket, s3_key = path.split('/')[2], path.split('/')[3:]
s3_key = '/'.join(s3_key)
with BytesIO() as f:
joblib.dump(file, f)
f.seek(0)
boto3.client("s3").upload_fileobj(Bucket=s3_bucket, Key=s3_key, Fileobj=f)
# Path is a local directory
else:
with open(path, 'wb') as f:
joblib.dump(file, f)
In your example, if you want to save your model to an s3 bucket, just type
write_joblib(mdl_dict, 's3://bucket_name/mdl_dict.joblib')
Load model from s3
Additionaly, following the idea on this question, the following function let’s you load the model from an s3 bucket or a local file
def read_joblib(path):
'''
Function to load a joblib file from an s3 bucket or local directory.
Arguments:
* path: an s3 bucket or local directory path where the file is stored
Outputs:
* file: Joblib file loaded
'''
# Path is an s3 bucket
if path[:5] == 's3://':
s3_bucket, s3_key = path.split('/')[2], path.split('/')[3:]
s3_key = '/'.join(s3_key)
with BytesIO() as f:
boto3.client("s3").download_fileobj(Bucket=s3_bucket, Key=s3_key, Fileobj=f)
f.seek(0)
file = joblib.load(f)
# Path is a local directory
else:
with open(path, 'rb') as f:
file = joblib.load(f)
return file
In your case, to load the file from the same s3 bucket use the following line of code
mdl_lightgbm = read_joblib('s3://bucket_name/mdl_dict.joblib')
mdl_lightgbm = mdl_lightgbm['mdl_fitted']