You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
6.1 KiB

import utils
beta = 1
def rouge_n(reference_summary, created_summary, n, pp_options=None, extended_results=False):
Calculates the rouge n score
:param reference_summary: gold standard summary
:param created_summary: summary to evaluate
:param n: size of n-grams
:param pp_options: list of options for preprocessing, if None then no preprocessing will be done
:param extended_results: indicates, whether, precision, recall and f-measure should be returned
:return: the score or (precision, recall, f-measure) if extended results are wanted
# preprocess
if pp_options is not None: # otherwise don't preprocess. Text is already preprocessed
reference_summary = utils.preprocess_text(reference_summary, pp_options)
created_summary = utils.preprocess_text(created_summary, pp_options)
else: # seperate sentence marks from tokens
for sentence_mark in utils.sentence_marks:
reference_summary = reference_summary.replace(sentence_mark, ' '+sentence_mark)
created_summary = created_summary.replace(sentence_mark, ' ' + sentence_mark)
# split into n-grams of size n
# count occurances of single ngrams
reference_ngrams, ref_complete_count = count_n_grams(reference_summary, n)
created_ngrams, created_complete_count = count_n_grams(created_summary, n)
overlapping_count = 0
for ref_key in reference_ngrams.keys():
created_count = created_ngrams.get(ref_key)
if created_count is not None: # ngrams in both dicts
ref_count = reference_ngrams[ref_key]
overlapping_count += min(ref_count, created_count)
# calculate score
if ref_complete_count == 0:
return 0
recall = overlapping_count / ref_complete_count
if extended_results:
precision = overlapping_count / created_complete_count
return precision, recall, (2*precision*recall) / (precision+recall)
return recall
def count_n_grams(pp_summary, n):
Counts the n-grams of the given size in a summary.
:param pp_summary: Pre-processed summary
:param n: n for the size of ngrams
:return: {ngram:count} for all ngrams in the summary
words = pp_summary.split(' ')
complete_count = 0
n_grams = {}
for i in range(len(words)-(n-1)):
n_gram = ' '.join(words[i:i+n])
if n_gram != '':
complete_count += 1
count = n_grams.get(n_gram)
if count is None:
count = 0
n_grams[n_gram] = count + 1
return n_grams, complete_count
def rouge_l(reference_summary, created_summary, pp_options=None, extended_results=False):
Calculates the rouge-l value of a summary and its gold standard summary
:param reference_summary: Gold standard summary
:param created_summary: Created summary to compare
:param pp_options: options for preprocessing, if None then there will be no preprocessing
:param extended_results: if True, precision, recall and f-score will be returned
:return: The calculated score, if extended results are wanted (precision, recall, f-measure)
# preprocess
if pp_options is not None: # otherwise don't preprocess. Text is already preprocessed
reference_summary = utils.preprocess_text(reference_summary, pp_options)
created_summary = utils.preprocess_text(created_summary, pp_options)
# seperate sentence marks from words
# split into sentences
m_reference_word_number = len(reference_summary.split(' '))
reference_summary = utils.split_into_sentences(reference_summary)
n_created_word_number = len(created_summary.split(' '))
created_summary = utils.split_into_sentences(created_summary)
total_sum_subsequences = 0
# to make sure every word in the created summary is used only once
used_created_indices = [set()]*len(created_summary)
used_gold_indices = [set()]*len(reference_summary)
for j in range(len(reference_summary)):
ref_sentence = reference_summary[j]
# calculate union longest subsequence
for i in range(len(created_summary)):
created_sentence = created_summary[i]
indices_a, indices_b = get_subsequence(ref_sentence, created_sentence)
used_gold_indices[j] = (used_gold_indices[j]).union(indices_a)
used_created_indices[i] = (used_created_indices[i]).union(indices_b)
# used indices of b here to ensure words arent used twice
used_created_indices = [len(sent_set) for sent_set in used_created_indices]
used_gold_indices = [len(sent_set) for sent_set in used_gold_indices]
total_sum_subsequences = min(sum(used_gold_indices), sum(used_created_indices))
if total_sum_subsequences == 0:
return 0
p_lcs = total_sum_subsequences / n_created_word_number
r_lcs = total_sum_subsequences / m_reference_word_number
f_lcs = ((1 + beta * beta) * r_lcs*p_lcs) / (r_lcs + beta * beta * p_lcs)
if extended_results:
return p_lcs, r_lcs, f_lcs
return f_lcs
def get_subsequence(sent_a, sent_b):
Finds all (not necessarily consecutive) subsequences of a in b.
:param sent_a: Sentence to find subsequences from
:param sent_b: Sentence to find subsequence in
:return: (ind_a, ind_b) two sets of indices of sent_a and sent_b of the longest subsequence
result_a = set()
words_a = sent_a.split(' ')
words_b = sent_b.split(' ')
for word_index_a in range(len(words_a)):
word_result = set()
char_index_b = 0
while word_index_a < len(words_a):
# word is contained
found_index = words_b.index(words_a[word_index_a], char_index_b)
char_index_b = found_index
word_index_a += 1
except ValueError:
# word not in b contained, do nth
word_index_a += 1
if len(word_result) > len(result_a):
result_a = word_result
result_b = set([words_b.index(words_a[a_ind]) for a_ind in result_a])
return result_a, result_b
if __name__ == "__main__":