You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
148 lines
6.1 KiB
148 lines
6.1 KiB
import utils
|
|
|
|
beta = 1
|
|
# https://aclanthology.org/W04-1013/
|
|
|
|
|
|
def rouge_n(reference_summary, created_summary, n, pp_options=None, extended_results=False):
|
|
"""
|
|
Calculates the rouge n score
|
|
|
|
:param reference_summary: gold standard summary
|
|
:param created_summary: summary to evaluate
|
|
:param n: size of n-grams
|
|
:param pp_options: list of options for preprocessing, if None then no preprocessing will be done
|
|
:param extended_results: indicates, whether, precision, recall and f-measure should be returned
|
|
:return: the score or (precision, recall, f-measure) if extended results are wanted
|
|
"""
|
|
# preprocess
|
|
if pp_options is not None: # otherwise don't preprocess. Text is already preprocessed
|
|
reference_summary = utils.preprocess_text(reference_summary, pp_options)
|
|
created_summary = utils.preprocess_text(created_summary, pp_options)
|
|
else: # seperate sentence marks from tokens
|
|
for sentence_mark in utils.sentence_marks:
|
|
reference_summary = reference_summary.replace(sentence_mark, ' '+sentence_mark)
|
|
created_summary = created_summary.replace(sentence_mark, ' ' + sentence_mark)
|
|
# split into n-grams of size n
|
|
# count occurances of single ngrams
|
|
reference_ngrams, ref_complete_count = count_n_grams(reference_summary, n)
|
|
created_ngrams, created_complete_count = count_n_grams(created_summary, n)
|
|
|
|
overlapping_count = 0
|
|
for ref_key in reference_ngrams.keys():
|
|
created_count = created_ngrams.get(ref_key)
|
|
if created_count is not None: # ngrams in both dicts
|
|
ref_count = reference_ngrams[ref_key]
|
|
overlapping_count += min(ref_count, created_count)
|
|
|
|
# calculate score
|
|
if ref_complete_count == 0:
|
|
return 0
|
|
recall = overlapping_count / ref_complete_count
|
|
if extended_results:
|
|
precision = overlapping_count / created_complete_count
|
|
return precision, recall, (2*precision*recall) / (precision+recall)
|
|
return recall
|
|
|
|
|
|
def count_n_grams(pp_summary, n):
|
|
"""
|
|
Counts the n-grams of the given size in a summary.
|
|
|
|
:param pp_summary: Pre-processed summary
|
|
:param n: n for the size of ngrams
|
|
:return: {ngram:count} for all ngrams in the summary
|
|
"""
|
|
words = pp_summary.split(' ')
|
|
complete_count = 0
|
|
n_grams = {}
|
|
for i in range(len(words)-(n-1)):
|
|
n_gram = ' '.join(words[i:i+n])
|
|
if n_gram != '':
|
|
complete_count += 1
|
|
count = n_grams.get(n_gram)
|
|
if count is None:
|
|
count = 0
|
|
n_grams[n_gram] = count + 1
|
|
return n_grams, complete_count
|
|
|
|
|
|
def rouge_l(reference_summary, created_summary, pp_options=None, extended_results=False):
|
|
"""
|
|
Calculates the rouge-l value of a summary and its gold standard summary
|
|
|
|
:param reference_summary: Gold standard summary
|
|
:param created_summary: Created summary to compare
|
|
:param pp_options: options for preprocessing, if None then there will be no preprocessing
|
|
:param extended_results: if True, precision, recall and f-score will be returned
|
|
:return: The calculated score, if extended results are wanted (precision, recall, f-measure)
|
|
"""
|
|
# preprocess
|
|
if pp_options is not None: # otherwise don't preprocess. Text is already preprocessed
|
|
reference_summary = utils.preprocess_text(reference_summary, pp_options)
|
|
created_summary = utils.preprocess_text(created_summary, pp_options)
|
|
# seperate sentence marks from words
|
|
# split into sentences
|
|
m_reference_word_number = len(reference_summary.split(' '))
|
|
reference_summary = utils.split_into_sentences(reference_summary)
|
|
n_created_word_number = len(created_summary.split(' '))
|
|
created_summary = utils.split_into_sentences(created_summary)
|
|
total_sum_subsequences = 0
|
|
# to make sure every word in the created summary is used only once
|
|
used_created_indices = [set()]*len(created_summary)
|
|
used_gold_indices = [set()]*len(reference_summary)
|
|
for j in range(len(reference_summary)):
|
|
ref_sentence = reference_summary[j]
|
|
# calculate union longest subsequence
|
|
for i in range(len(created_summary)):
|
|
created_sentence = created_summary[i]
|
|
indices_a, indices_b = get_subsequence(ref_sentence, created_sentence)
|
|
used_gold_indices[j] = (used_gold_indices[j]).union(indices_a)
|
|
used_created_indices[i] = (used_created_indices[i]).union(indices_b)
|
|
# used indices of b here to ensure words arent used twice
|
|
used_created_indices = [len(sent_set) for sent_set in used_created_indices]
|
|
used_gold_indices = [len(sent_set) for sent_set in used_gold_indices]
|
|
total_sum_subsequences = min(sum(used_gold_indices), sum(used_created_indices))
|
|
|
|
if total_sum_subsequences == 0:
|
|
return 0
|
|
p_lcs = total_sum_subsequences / n_created_word_number
|
|
r_lcs = total_sum_subsequences / m_reference_word_number
|
|
f_lcs = ((1 + beta * beta) * r_lcs*p_lcs) / (r_lcs + beta * beta * p_lcs)
|
|
if extended_results:
|
|
return p_lcs, r_lcs, f_lcs
|
|
return f_lcs
|
|
|
|
|
|
def get_subsequence(sent_a, sent_b):
|
|
"""
|
|
Finds all (not necessarily consecutive) subsequences of a in b.
|
|
:param sent_a: Sentence to find subsequences from
|
|
:param sent_b: Sentence to find subsequence in
|
|
:return: (ind_a, ind_b) two sets of indices of sent_a and sent_b of the longest subsequence
|
|
"""
|
|
result_a = set()
|
|
words_a = sent_a.split(' ')
|
|
words_b = sent_b.split(' ')
|
|
for word_index_a in range(len(words_a)):
|
|
word_result = set()
|
|
char_index_b = 0
|
|
while word_index_a < len(words_a):
|
|
# word is contained
|
|
try:
|
|
found_index = words_b.index(words_a[word_index_a], char_index_b)
|
|
word_result.add(word_index_a)
|
|
char_index_b = found_index
|
|
word_index_a += 1
|
|
except ValueError:
|
|
# word not in b contained, do nth
|
|
word_index_a += 1
|
|
if len(word_result) > len(result_a):
|
|
result_a = word_result
|
|
result_b = set([words_b.index(words_a[a_ind]) for a_ind in result_a])
|
|
return result_a, result_b
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
print('Done')
|