Why is the spaCy Scorer returning None for the entity scores but the model is extracting entities?

Question:

I am really confused why the Scorer.score is returning ents_p, ents_r, and ents_f as None for the below example. I am seeing something every similar with my own custom model and want to understand why it is returning None?

Example Scorer Code – Returning None for ents_p, ents_r, ents_f

import spacy
from spacy.scorer import Scorer
from spacy.tokens import Doc
from spacy.training.example import Example

examples = [
    ('Who is Talha Tayyab?',
     {(7, 19, 'PERSON')}),
    ('I like London and Berlin.',
     {(7, 13, 'LOC'), (18, 24, 'LOC')}),
     ('Agra is famous for Tajmahal, The CEO of Facebook will visit India shortly to meet Murari Mahaseth and to visit Tajmahal.',
     {(0, 4, 'LOC'), (40, 48, 'ORG'), (60, 65, 'GPE'), (82, 97, 'PERSON'), (111, 119, 'GPE')})
]

def my_evaluate(ner_model, examples):
    scorer = Scorer()
    example = []
    for input_, annotations in examples:
        pred = ner_model(input_)
        print(pred,annotations)
        temp = Example.from_dict(pred, dict.fromkeys(annotations))
        example.append(temp)
    scores = scorer.score(example)
    return scores

ner_model = spacy.load('en_core_web_sm') # for spaCy's pretrained use 'en_core_web_sm'
results = my_evaluate(ner_model, examples)
print(results)

Scorer Results

{'token_acc': 1.0, 'token_p': 1.0, 'token_r': 1.0, 'token_f': 1.0, 'sents_p': None, 'sents_r': None, 'sents_f': None, 'tag_acc': None, 'pos_acc': None, 'morph_acc': None, 'morph_micro_p': None, 'morph_micro_r': None, 'morph_micro_f': None, 'morph_per_feat': None, 'dep_uas': None, 'dep_las': None, 'dep_las_per_type': None, 'ents_p': None, 'ents_r': None, 'ents_f': None, 'ents_per_type': None, 'cats_score': 0.0, 'cats_score_desc': 'macro F', 'cats_micro_p': 0.0, 'cats_micro_r': 0.0, 'cats_micro_f': 0.0, 'cats_macro_p': 0.0, 'cats_macro_r': 0.0, 'cats_macro_f': 0.0, 'cats_macro_auc': 0.0, 'cats_f_per_type': {}, 'cats_auc_per_type': {}}

It is clearly picking out entities from the text

doc = ner_model('Agra is famous for Tajmahal, The CEO of Facebook will visit India shortly to meet Murari Mahaseth and to visit Tajmahal.')
for ent in doc.ents:
    print(ent.text, ent.label_)

Output

Agra PERSON
Tajmahal ORG
Facebook ORG
India GPE
Murari Mahaseth PERSON
Tajmahal ORG
Asked By: scarpacci

||

Answers:

This line is the issue, the annotations are not added to the reference docs because they’re not in the right format:

Example.from_dict(pred, dict.fromkeys(annotations))

The expected format is:

Example.from_dict(pred, {"entities": [(start, end, label), (start, end, label), ...]})

You can also use the built-in Language.evaluate if you create examples where Example.predicted is unannotated, which also creates the scorer based on your pipeline so you don’t end up a lot of irrelevant None scores:

Example.from_dict(nlp.make_doc(text), {"entities": [(start, end, label), (start, end, label), ...]})

Once you have these kinds of examples, run:

```python
scores = ner_model.evaluate(examples)
Answered By: aab

For those that are interested, here is the complete example that matches the input from @aab.

import spacy
from spacy.scorer import Scorer
from spacy.tokens import Doc
from spacy.training.example import Example

examples = [
    ('Who is Talha Tayyab?',
     {"entities":[(7, 19, 'PERSON')]}),
    ('I like London and Berlin.',
     {"entities":[(7, 13, 'LOC'), (18, 24, 'LOC')]}),
     ('Agra is famous for Tajmahal, The CEO of Facebook will visit India shortly to meet Murari Mahaseth and to visit Tajmahal.',
     {"entities":[(0, 4, 'LOC'), (40, 48, 'ORG'), (60, 65, 'GPE'), (82, 97, 'PERSON'), (111, 119, 'GPE')]})
]

def my_evaluate(ner_model, examples):
    scorer = Scorer()
    example = []
    for input_, annotations in examples:
        pred = ner_model(input_)
        print(pred,annotations)
        temp = Example.from_dict(pred, annotations)
        example.append(temp)
    scores = scorer.score(example)
    return scores

ner_model = spacy.load('en_core_web_sm') # for spaCy's pretrained use 'en_core_web_sm'
results = my_evaluate(ner_model, examples)
print(results)

Output

Who is Talha Tayyab? {'entities': [(7, 19, 'PERSON')]}
I like London and Berlin. {'entities': [(7, 13, 'LOC'), (18, 24, 'LOC')]}
Agra is famous for Tajmahal, The CEO of Facebook will visit India shortly to meet Murari Mahaseth and to visit Tajmahal. {'entities': [(0, 4, 'LOC'), (40, 48, 'ORG'), (60, 65, 'GPE'), (82, 97, 'PERSON'), (111, 119, 'GPE')]}
{'token_acc': 1.0, 'token_p': 1.0, 'token_r': 1.0, 'token_f': 1.0, 'sents_p': None, 'sents_r': None, 'sents_f': None, 'tag_acc': None, 'pos_acc': None, 'morph_acc': None, 'morph_micro_p': None, 'morph_micro_r': None, 'morph_micro_f': None, 'morph_per_feat': None, 'dep_uas': None, 'dep_las': None, 'dep_las_per_type': None, 'ents_p': 0.4444444444444444, 'ents_r': 0.5, 'ents_f': 0.47058823529411764, 'ents_per_type': {'PERSON': {'p': 0.6666666666666666, 'r': 1.0, 'f': 0.8}, 'GPE': {'p': 0.3333333333333333, 'r': 0.5, 'f': 0.4}, 'LOC': {'p': 0.0, 'r': 0.0, 'f': 0.0}, 'ORG': {'p': 0.3333333333333333, 'r': 1.0, 'f': 0.5}}, 'cats_score': 0.0, 'cats_score_desc': 'macro F', 'cats_micro_p': 0.0, 'cats_micro_r': 0.0, 'cats_micro_f': 0.0, 'cats_macro_p': 0.0, 'cats_macro_r': 0.0, 'cats_macro_f': 0.0, 'cats_macro_auc': 0.0, 'cats_f_per_type': {}, 'cats_auc_per_type': {}}
Answered By: scarpacci