ICAIL_2023/rouge.py


								import utils


								beta = 1

								# https://aclanthology.org/W04-1013/


								def rouge_n(reference_summary, created_summary, n, pp_options=None, extended_results=False):

								    """

								    Calculates the rouge n score


								    :param reference_summary: gold standard summary

								    :param created_summary: summary to evaluate

								    :param n: size of n-grams

								    :param pp_options: list of options for preprocessing, if None then no preprocessing will be done

								    :param extended_results: indicates, whether, precision, recall and f-measure should be returned

								    :return: the score or (precision, recall, f-measure) if extended results are wanted

								    """

								    # preprocess

								    if pp_options is not None:  # otherwise don't preprocess. Text is already preprocessed

								        reference_summary = utils.preprocess_text(reference_summary, pp_options)

								        created_summary = utils.preprocess_text(created_summary, pp_options)

								    else:  # seperate sentence marks from tokens

								        for sentence_mark in utils.sentence_marks:

								            reference_summary = reference_summary.replace(sentence_mark, ' '+sentence_mark)

								            created_summary = created_summary.replace(sentence_mark, ' ' + sentence_mark)

								    # split into n-grams of size n

								    # count occurances of single ngrams

								    reference_ngrams, ref_complete_count = count_n_grams(reference_summary, n)

								    created_ngrams, created_complete_count = count_n_grams(created_summary, n)


								    overlapping_count = 0

								    for ref_key in reference_ngrams.keys():

								        created_count = created_ngrams.get(ref_key)

								        if created_count is not None:  # ngrams in both dicts

								            ref_count = reference_ngrams[ref_key]

								            overlapping_count += min(ref_count, created_count)


								    # calculate score

								    if ref_complete_count == 0:

								        return 0

								    recall = overlapping_count / ref_complete_count

								    if extended_results:

								        precision = overlapping_count / created_complete_count

								        return precision, recall, (2*precision*recall) / (precision+recall)

								    return recall


								def count_n_grams(pp_summary, n):

								    """

								    Counts the n-grams of the given size in a summary.


								    :param pp_summary: Pre-processed summary

								    :param n: n for the size of ngrams

								    :return: {ngram:count} for all ngrams in the summary

								    """

								    words = pp_summary.split(' ')

								    complete_count = 0

								    n_grams = {}

								    for i in range(len(words)-(n-1)):

								        n_gram = ' '.join(words[i:i+n])

								        if n_gram != '':

								            complete_count += 1

								            count = n_grams.get(n_gram)

								            if count is None:

								                count = 0

								            n_grams[n_gram] = count + 1

								    return n_grams, complete_count


								def rouge_l(reference_summary, created_summary, pp_options=None, extended_results=False):

								    """

								    Calculates the rouge-l value of a summary and its gold standard summary


								    :param reference_summary: Gold standard summary

								    :param created_summary: Created summary to compare

								    :param pp_options: options for preprocessing, if None then there will be no preprocessing

								    :param extended_results: if True, precision, recall and f-score will be returned

								    :return: The calculated score, if extended results are wanted (precision, recall, f-measure)

								    """

								    # preprocess

								    if pp_options is not None:  # otherwise don't preprocess. Text is already preprocessed

								        reference_summary = utils.preprocess_text(reference_summary, pp_options)

								        created_summary = utils.preprocess_text(created_summary, pp_options)

								    # seperate sentence marks from words

								    # split into sentences

								    m_reference_word_number = len(reference_summary.split(' '))

								    reference_summary = utils.split_into_sentences(reference_summary)

								    n_created_word_number = len(created_summary.split(' '))

								    created_summary = utils.split_into_sentences(created_summary)

								    total_sum_subsequences = 0

								    # to make sure every word in the created summary is used only once

								    used_created_indices = [set()]*len(created_summary)

								    used_gold_indices = [set()]*len(reference_summary)

								    for j in range(len(reference_summary)):

								        ref_sentence = reference_summary[j]

								        # calculate union longest subsequence

								        for i in range(len(created_summary)):

								            created_sentence = created_summary[i]

								            indices_a, indices_b = get_subsequence(ref_sentence, created_sentence)

								            used_gold_indices[j] = (used_gold_indices[j]).union(indices_a)

								            used_created_indices[i] = (used_created_indices[i]).union(indices_b)

								    # used indices of b here to ensure words arent used twice

								    used_created_indices = [len(sent_set) for sent_set in used_created_indices]

								    used_gold_indices = [len(sent_set) for sent_set in used_gold_indices]

								    total_sum_subsequences = min(sum(used_gold_indices), sum(used_created_indices))


								    if total_sum_subsequences == 0:

								        return 0

								    p_lcs = total_sum_subsequences / n_created_word_number

								    r_lcs = total_sum_subsequences / m_reference_word_number

								    f_lcs = ((1 + beta * beta) * r_lcs*p_lcs) / (r_lcs + beta * beta * p_lcs)

								    if extended_results:

								        return p_lcs, r_lcs, f_lcs

								    return f_lcs


								def get_subsequence(sent_a, sent_b):

								    """

								    Finds all (not necessarily consecutive) subsequences of a in b.

								    :param sent_a: Sentence to find subsequences from

								    :param sent_b: Sentence to find subsequence in

								    :return: (ind_a, ind_b) two sets of indices of sent_a and sent_b of the longest subsequence

								    """

								    result_a = set()

								    words_a = sent_a.split(' ')

								    words_b = sent_b.split(' ')

								    for word_index_a in range(len(words_a)):

								        word_result = set()

								        char_index_b = 0

								        while word_index_a < len(words_a):

								            # word is contained

								            try:

								                found_index = words_b.index(words_a[word_index_a], char_index_b)

								                word_result.add(word_index_a)

								                char_index_b = found_index

								                word_index_a += 1

								            except ValueError:

								                # word not in b contained, do nth

								                word_index_a += 1

								        if len(word_result) > len(result_a):

								            result_a = word_result

								    result_b = set([words_b.index(words_a[a_ind]) for a_ind in result_a])

								    return result_a, result_b


								if __name__ == "__main__":


								    print('Done')