Kernel keeps dying while using BERT-based sentiment analysis model

Question:

I’m trying to use german bert sentiment analysis on Jupyter Notebook. I have installed pytorch correctly but the Kernel keeps dying. I’m on a MacBook Pro ’21 with MacOs Monterey 12.3.1. I’ve installed Python 10.3.4. PyTorch does not show up in the list of installed packages on that environment even though in the terminal it tells me that the ‘requirement is already satisfied’.

I tried to run the example code postet on hugging face.
https://huggingface.co/oliverguhr/german-sentiment-bert

from transformers import AutoModelForSequenceClassification, AutoTokenizer
from typing import List
import torch
import re

class SentimentModel():
    def __init__(self, model_name: str):
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)

        self.clean_chars = re.compile(r'[^A-Za-züöäÖÜÄß ]', re.MULTILINE)
        self.clean_http_urls = re.compile(r'https*\S+', re.MULTILINE)
        self.clean_at_mentions = re.compile(r'@\S+', re.MULTILINE)

    def predict_sentiment(self, texts: List[str])-> List[str]:
        texts = [self.clean_text(text) for text in texts]
        # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
        encoded = self.tokenizer.batch_encode_plus(texts,padding=True, add_special_tokens=True,truncation=True, return_tensors="pt")
        encoded = encoded.to(self.device)
        with torch.no_grad():
                logits = self.model(**encoded)
        
        label_ids = torch.argmax(logits[0], axis=1)
        return [self.model.config.id2label[label_id.item()] for label_id in label_ids]

    def replace_numbers(self,text: str) -> str:
            return text.replace("0"," null").replace("1"," eins").replace("2"," zwei").replace("3"," drei").replace("4"," vier").replace("5"," fünf").replace("6"," sechs").replace("7"," sieben").replace("8"," acht").replace("9"," neun")         

    def clean_text(self,text: str)-> str:    
            text = text.replace("n", " ")        
            text = self.clean_http_urls.sub('',text)
            text = self.clean_at_mentions.sub('',text)        
            text = self.replace_numbers(text)                
            text = self.clean_chars.sub('', text) # use only text chars                          
            text = ' '.join(text.split()) # substitute multiple whitespace with single whitespace   
            text = text.strip().lower()
            return text

texts = ["Mit keinem guten Ergebniss","Das war unfair", "Das ist gar nicht mal so gut",
        "Total awesome!","nicht so schlecht wie erwartet", "Das ist gar nicht mal so schlecht",
        "Der Test verlief positiv.","Sie fährt ein grünes Auto.", "Der Fall wurde an die Polzei übergeben."]

model = SentimentModel(model_name = "oliverguhr/german-sentiment-bert")

print(model.predict_sentiment(texts))



Asked By: Valeriomerio

||

Answers:

Can you please try this code. It’s the same model in a simpler-to-use lib.

pip install germansentiment
from germansentiment import SentimentModel

model = SentimentModel()

texts = [
    "Mit keinem guten Ergebniss","Das ist gar nicht mal so gut",
    "Total awesome!","nicht so schlecht wie erwartet",
    "Der Test verlief positiv.","Sie fährt ein grünes Auto."]
       
result = model.predict_sentiment(texts)
print(result)
Answered By: Oliver

For me a similar issue was solved by just moving import torch before the transformer imports. Could you try editing your imports to:

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from typing import List
import re