Get data from .pickle
Question:
I have a model of Multinomial NB():
text_clf_NB = Pipeline([('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', MultinomialNB()),
])
text_clf_NB.fit(Train_X_NB, Train_Y_NB)
I save it to .pickle
pickle.dump(text_clf_NB, open("NB_classification.pickle", "wb"))
In another case I load this model:
clf = pickle.load(open("NB_classification.pickle", "rb"))
Can you help me, please, how can I get sparse matrix of Train data? I mean I want to get values of Train_X_NB after TfidfTransformer from clf?
Answers:
try sth like this:
import pickle
# Load the model
clf = pickle.load(open("NB_classification.pickle", "rb"))
# Access the CountVectorizer and TfidfTransformer
count_vectorizer = clf.named_steps['vect']
tfidf_transformer = clf.named_steps['tfidf']
# Transform the training data
Train_X_counts = count_vectorizer.transform(Train_X_NB)
Train_X_tfidf = tfidf_transformer.transform(Train_X_counts)
Update
If you want to save the transformed training data while fitting the model, you can do the following:
#Fit the model and save it:
text_clf_NB.fit(Train_X_NB, Train_Y_NB)
pickle.dump(text_clf_NB, open("NB_classification.pickle", "wb"))
#Transform the training data and save it:
vect = text_clf_NB.named_steps['vect']
tfidf = text_clf_NB.named_steps['tfidf']
Train_X_counts = vect.transform(Train_X_NB)
Train_X_tfidf = tfidf.transform(Train_X_counts)
pickle.dump(Train_X_tfidf, open("Train_X_tfidf.pickle", "wb"))
Now, in your other case, you can load both the trained model and the transformed training data:
# Load the model
clf = pickle.load(open("NB_classification.pickle", "rb"))
# Load the transformed matrix
Train_X_tfidf = pickle.load(open("Train_X_tfidf.pickle", "rb"))
This way, you don’t need the original Train_X_NB data to get the transformed matrix in the other case.
I have a model of Multinomial NB():
text_clf_NB = Pipeline([('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', MultinomialNB()),
])
text_clf_NB.fit(Train_X_NB, Train_Y_NB)
I save it to .pickle
pickle.dump(text_clf_NB, open("NB_classification.pickle", "wb"))
In another case I load this model:
clf = pickle.load(open("NB_classification.pickle", "rb"))
Can you help me, please, how can I get sparse matrix of Train data? I mean I want to get values of Train_X_NB after TfidfTransformer from clf?
try sth like this:
import pickle
# Load the model
clf = pickle.load(open("NB_classification.pickle", "rb"))
# Access the CountVectorizer and TfidfTransformer
count_vectorizer = clf.named_steps['vect']
tfidf_transformer = clf.named_steps['tfidf']
# Transform the training data
Train_X_counts = count_vectorizer.transform(Train_X_NB)
Train_X_tfidf = tfidf_transformer.transform(Train_X_counts)
Update
If you want to save the transformed training data while fitting the model, you can do the following:
#Fit the model and save it:
text_clf_NB.fit(Train_X_NB, Train_Y_NB)
pickle.dump(text_clf_NB, open("NB_classification.pickle", "wb"))
#Transform the training data and save it:
vect = text_clf_NB.named_steps['vect']
tfidf = text_clf_NB.named_steps['tfidf']
Train_X_counts = vect.transform(Train_X_NB)
Train_X_tfidf = tfidf.transform(Train_X_counts)
pickle.dump(Train_X_tfidf, open("Train_X_tfidf.pickle", "wb"))
Now, in your other case, you can load both the trained model and the transformed training data:
# Load the model
clf = pickle.load(open("NB_classification.pickle", "rb"))
# Load the transformed matrix
Train_X_tfidf = pickle.load(open("Train_X_tfidf.pickle", "rb"))
This way, you don’t need the original Train_X_NB data to get the transformed matrix in the other case.