44 lines
1.1 KiB
Python
44 lines
1.1 KiB
Python
"""
|
|
The implementations are adapted from https://github.com/tylin/coco-caption/
|
|
blob/master/pycocoevalcap/bleu/bleu.py
|
|
"""
|
|
|
|
from federatedscope.nlp.metric.bleu.bleu_scorer import BleuScorer
|
|
|
|
|
|
class Bleu(object):
|
|
"""
|
|
The implementation of BLEU refer to 'Bleu: a method for automatic
|
|
evaluation of machine translation.' [Papineni et al., 2002]
|
|
(https://aclanthology.org/P02-1040.pdf)
|
|
"""
|
|
def __init__(self, n=4):
|
|
# default compute Blue score up to 4
|
|
self._n = n
|
|
self._hypo_for_image = {}
|
|
self.ref_for_image = {}
|
|
|
|
def compute_score(self, gts, res):
|
|
|
|
assert (gts.keys() == res.keys())
|
|
imgIds = gts.keys()
|
|
|
|
bleu_scorer = BleuScorer(n=self._n)
|
|
for id in imgIds:
|
|
hypo = res[id]
|
|
ref = gts[id]
|
|
|
|
# Sanity check.
|
|
assert (type(hypo) is list)
|
|
assert (len(hypo) == 1)
|
|
assert (type(ref) is list)
|
|
assert (len(ref) >= 1)
|
|
|
|
bleu_scorer += (hypo[0], ref)
|
|
|
|
score, scores = bleu_scorer.compute_score(option='closest', verbose=0)
|
|
return score, scores
|
|
|
|
def method(self):
|
|
return "Bleu"
|