commit 13d30ecd6fabcc757af24610f6dff9a46ea6c3f3
Author: Bianca Steffes <bianca.steffes@uni-saarland.de>
Date:   Wed Apr 26 14:43:45 2023 +0200

    Added Code and data

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9eafcfe
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+﻿.idea
+__pycache__
+data/dataframes
+rouge_evalauation/dataframes
+rouge_evalauation/figures
+rouge_evalauation/evaluated_data/second_eval/AufeinanderfolgendeSätze.xlsx
+rouge_evalauation/manual_evaluation
diff --git a/data/download_rii.py b/data/download_rii.py
new file mode 100644
index 0000000..662d134
--- /dev/null
+++ b/data/download_rii.py
@@ -0,0 +1,214 @@
+import time
+import xml.etree.ElementTree as ET
+import urllib.request as request
+import zipfile
+import os
+
+import pandas as pd
+
+import settings
+import utils
+from utils import time_convert
+
+base_dir_bgh = 'raw_data/BGH_Data'
+extended_dir_bgh = base_dir_bgh + '/senates'
+dataframe_dir_bgh = 'dataframes/bgh/'
+pickle_name_bgh = 'bgh_data.pkl'
+simple_attributes = ["doknr", "ecli", "gertyp", "gerort", "spruchkoerper", "entsch-datum",
+                     "aktenzeichen", "doktyp", "norm", "vorinstanz", "mitwirkung", "titelzeile",
+                     "leitsatz", "sonstosatz", "tenor", "tatbestand", "entscheidungsgruende",
+                     "gruende", "abwmeinung", "sonstlt", "identifier", "coverage", "language",
+                     "publisher", "accessRights"]
+nested_attributes = ["region_abk", "region_long"]
+text_attributes = ["titelzeile", "leitsatz", "sonstosatz", "tenor", "tatbestand",
+                   "entscheidungsgruende", "gruende", "abwmeinung", "sonstlt"]
+stopword_extension = '_no_stopwords'
+current_path = 'data'
+
+
+def get_file_list():
+    """
+    Makes http request for the files
+    :return: the web page with all current cases as an xml-tree
+    """
+    xml_file, https_message = request.urlretrieve('https://www.rechtsprechung-im-internet.de/rii-toc.xml')
+    tree = ET.parse(xml_file)
+    root = tree.getroot()
+    return root
+
+
+def count_cases(root, tag):
+    """
+    counts all cases belonging to the given tag and returns the count
+    :param root: downloaded xml-tree with all files
+    :param tag: tag to find in the name
+    :return: number of cases belonging to the BGH
+    """
+    count = 0
+    for child in root:
+        if tag in child[0].text:
+            count += 1
+    return count
+
+
+def download(base_dir, extended_dir, tag):
+    """
+    download all cases to a folder related to their senats
+    :param base_dir: Name of the directory for the data
+    :param extended_dir: name of the subdirectory for saving
+    :param tag: tag to recognize the court (BGH, BVerwG)
+    """
+    # set up directories
+    utils.create_dir(current_path=current_path, directory_name=base_dir)
+    utils.create_dir(current_path=current_path, directory_name=extended_dir)
+    # do the download
+    root = get_file_list()  # 0 ist gericht, 3 ist link
+    max_cases = count_cases(root, tag)
+    downloaded = 0
+    for child in root:
+        while True:
+            try:
+                if tag in child[0].text:
+                    filename, http = request.urlretrieve(child[3].text)
+                    with zipfile.ZipFile(filename, 'r') as zip_ref:
+                        zip_ref.extractall(
+                            utils.server_path(current_path=current_path,
+                                              path=extended_dir + '/' + child[0].text.replace('\n', '') + '/'))
+                    os.remove(filename)
+                    downloaded += 1
+                    print("\rDownloaded %d of %d " % (downloaded, max_cases) + tag + "Cases", end="")
+            finally:
+                break
+    print("\nDone!")
+
+
+def read_file_data(file):
+    """
+    Reads the data of one case / file.
+
+    :param file: package containing (filename, directory, directory extension) to address the file
+    :return: a dictionary with key: attribute_name and value: attribute_value
+    """
+    filename, directory, extended_dir = file
+    tree = ET.parse(utils.server_path(current_path=current_path, path=os.path.join(extended_dir, directory, filename)))
+    root = tree.getroot()
+    res = {}
+    for attribute in simple_attributes:
+        attr = root.find(attribute)  # leitsatz überprüfen: zwei Worte zusammen, aber leerzeichen immer noch da!
+        text = ''
+        for t in attr.itertext():
+            if t == '.' or t == ',' or t == ';' or t == '!' or t == '?':
+                text = text.strip()     # remove space before these characters
+            text += t + ' '
+        text = text.strip()
+        if text == '':
+            res[attribute] = None
+        else:
+            res[attribute] = text
+
+    for attribute in nested_attributes:
+        nesting = attribute.split('_')
+        xml_tag = root
+        # find nested attribute
+        for i in range(len(nesting)):
+            xml_tag = xml_tag.find(nesting[i])
+        text = ""
+        for t in xml_tag.itertext():
+            if t == '.' or t == ',' or t == ';' or t == '!' or t == '?':
+                text = text.strip()     # remove space before these characters
+            text += t + ' '
+        text = text.strip()
+        if text == '':
+            res[attribute] = None
+        else:
+            res[attribute] = text
+
+    for attribute in utils.rii_text_columns:
+        if res[attribute] is not None:
+            if settings.remove_brackets:
+                res[attribute] = utils.remove_brackets(res[attribute])
+            res[attribute] = utils.remove_spaces_before_sentence_marks(res[attribute])
+
+    return pd.DataFrame(res, index=[0])
+
+
+def create_pickle(extended_dir, pickle_name, steps):
+    """
+    Combines all downloaded files of the given extended directory into one pickle
+
+    :param extended_dir: extended dir to find the files
+    :param pickle_name: name of the pickle to save
+    :param steps: how many cases should be worked on now
+    """
+    utils.create_dir(current_path=current_path, directory_name=dataframe_dir_bgh, delete=False)
+    start_time = time.time()
+    extension = ''
+    if settings.remove_brackets:
+        extension = settings.no_brackets_suffix
+
+    files = [(filename, directory, extended_dir) for directory in
+             utils.list_dir_files(current_path=current_path, path=extended_dir) for filename in
+             utils.list_dir_files(current_path=current_path, path=os.path.join(extended_dir, directory))
+             if filename.endswith(".xml")]
+
+    original_length = len(files)
+    data = pd.DataFrame(columns=simple_attributes + nested_attributes)
+
+    pickle_path = dataframe_dir_bgh+extension+pickle_name
+
+    files, data = utils.get_step_subset_raw(steps=steps,
+                                            path_to_dest_dataframe=pickle_path,
+                                            source_data=files,
+                                            dest_data=data,
+                                            call_path=current_path)
+
+    result = utils.parallel_imap(read_file_data, files)
+    for row in result:
+        data = pd.concat([data, row], ignore_index=True)
+    with utils.open_file(current_path=current_path, path=pickle_path, modes='wb') as f:
+        data.to_pickle(f)
+
+    print('Resulting dataframes have length ' + str(data.shape[0]) +
+          ' (' + str(data.shape[0] / original_length * 100) + '%)')
+    end_time = time.time()
+    time_lapsed = end_time - start_time
+    time_convert(time_lapsed)
+
+
+def get_selected_bgh_data(directory='.\\'):
+    """
+    Shortcut for getting the BGH data currently needed. Selects all data from the Civil copurts which contain 'Urteile'
+
+    :param directory: directory offset from current position, with ending slashes
+    :return: the data
+    """
+    return get_data(pickle_name_bgh, directory, spruchkoerper='Zivilsenat', doktyp='Urteil')
+
+
+def get_data(pickle_name, directory='../data/', spruchkoerper=None, doktyp=None):
+    """
+    Method for access to the bgh pickle
+    :param pickle_name: name to identify the data
+    :param directory: directory path to the data file (with ending slash)
+    :param spruchkoerper: Parameter can be used to select the senates (checks whether the given string is contained
+    in the datas spruchkoerper)
+    :param doktyp: can be used to select specific documents (like 'Urteil', 'Beschluss', etc.), must contain the word
+    :return: The data as a pandas dataframe
+    """
+    extension = ''
+    if settings.remove_brackets:
+        extension = settings.no_brackets_suffix
+    data = utils.df_from_pickle(current_path=current_path, path=directory + dataframe_dir_bgh + extension + pickle_name)
+    if spruchkoerper is not None:
+        data = data[data['spruchkoerper'].notnull()]
+        data = data[data['spruchkoerper'].str.contains(spruchkoerper)]
+    if doktyp is not None:
+        data = data[data['doktyp'].str.lower().str.contains(doktyp.lower())]
+    data = data.dropna(axis=1, how='all')  # drop all columns with no value
+    data = data.drop_duplicates()
+    return data
+
+
+# if __name__ == "__main__":
+    # download(base_dir=base_dir_bgh, extended_dir=extended_dir_bgh, tag='BGH')
+    # create_pickle(extended_dir=extended_dir_bgh, pickle_name=pickle_name_bgh, steps=2)
diff --git a/pm_summary/annotation_evaluation.py b/pm_summary/annotation_evaluation.py
new file mode 100644
index 0000000..87adfba
--- /dev/null
+++ b/pm_summary/annotation_evaluation.py
@@ -0,0 +1,47 @@
+import os
+
+import pandas as pd
+
+
+import utils
+
+pm_sent_no = 'pm_sent_number'
+pm_sent = 'Sätze der Pressemitteilung'
+judgement_sent_no = 'judgement_sent_number'
+judgement_sent = 'Dazu passende Sätze des Urteils'
+keywords = 'Schlagworte'
+comments = 'Anmerkung'
+duration = 'Wie lange hast Du für die Bearbeitung dieses Urteils gebraucht?'
+bad_pm = 'Ist diese Pressemitteilung eine schlechte Darstellung / Zusammenfassung des Urteils?'
+current_dir = 'pm_summary/'
+
+
+def prepare_file(path):
+    """
+    Liest eine Datei aus und überführt sie in ein einheitliches Format.
+
+    :param path: Pfad zur Datei.
+    :return: dictionary, in dem die Ergebnisse stehen. Für jeden Satz der PM gibt es ein Unterdict (Zahl als key).
+    """
+    res = {}
+    raw_data = pd.read_excel(path, names=[pm_sent_no, pm_sent], header=None)
+    for index, row in raw_data.iterrows():
+        current_sentence = {pm_sent_no: row[pm_sent_no], pm_sent: row[pm_sent]}
+        res[current_sentence[pm_sent_no]] = current_sentence
+    return res
+
+
+def get_all_pm_files():
+    """
+    Returns the list of all annotated pm-files
+
+    :return: [(pm_filename, file_data)*]
+    """
+    file_path_base = utils.server_path(current_path=current_dir,
+                                       path='../rouge_evalauation/evaluated_data/extractive_judgments')
+    res = []
+    for judgment in os.listdir(file_path_base):
+        if '.xlsx' in judgment:
+            filename = file_path_base + '/' + judgment
+            res.append((judgment, prepare_file(filename)))
+    return res
diff --git a/rouge.py b/rouge.py
new file mode 100644
index 0000000..42f924f
--- /dev/null
+++ b/rouge.py
@@ -0,0 +1,148 @@
+import utils
+
+beta = 1
+# https://aclanthology.org/W04-1013/
+
+
+def rouge_n(reference_summary, created_summary, n, pp_options=None, extended_results=False):
+    """
+    Calculates the rouge n score
+
+    :param reference_summary: gold standard summary
+    :param created_summary: summary to evaluate
+    :param n: size of n-grams
+    :param pp_options: list of options for preprocessing, if None then no preprocessing will be done
+    :param extended_results: indicates, whether, precision, recall and f-measure should be returned
+    :return: the score or (precision, recall, f-measure) if extended results are wanted
+    """
+    # preprocess
+    if pp_options is not None:  # otherwise don't preprocess. Text is already preprocessed
+        reference_summary = utils.preprocess_text(reference_summary, pp_options)
+        created_summary = utils.preprocess_text(created_summary, pp_options)
+    else:  # seperate sentence marks from tokens
+        for sentence_mark in utils.sentence_marks:
+            reference_summary = reference_summary.replace(sentence_mark, ' '+sentence_mark)
+            created_summary = created_summary.replace(sentence_mark, ' ' + sentence_mark)
+    # split into n-grams of size n
+    # count occurances of single ngrams
+    reference_ngrams, ref_complete_count = count_n_grams(reference_summary, n)
+    created_ngrams, created_complete_count = count_n_grams(created_summary, n)
+
+    overlapping_count = 0
+    for ref_key in reference_ngrams.keys():
+        created_count = created_ngrams.get(ref_key)
+        if created_count is not None:  # ngrams in both dicts
+            ref_count = reference_ngrams[ref_key]
+            overlapping_count += min(ref_count, created_count)
+
+    # calculate score
+    if ref_complete_count == 0:
+        return 0
+    recall = overlapping_count / ref_complete_count
+    if extended_results:
+        precision = overlapping_count / created_complete_count
+        return precision, recall, (2*precision*recall) / (precision+recall)
+    return recall
+
+
+def count_n_grams(pp_summary, n):
+    """
+    Counts the n-grams of the given size in a summary.
+
+    :param pp_summary: Pre-processed summary
+    :param n: n for the size of ngrams
+    :return: {ngram:count} for all ngrams in the summary
+    """
+    words = pp_summary.split(' ')
+    complete_count = 0
+    n_grams = {}
+    for i in range(len(words)-(n-1)):
+        n_gram = ' '.join(words[i:i+n])
+        if n_gram != '':
+            complete_count += 1
+            count = n_grams.get(n_gram)
+            if count is None:
+                count = 0
+            n_grams[n_gram] = count + 1
+    return n_grams, complete_count
+
+
+def rouge_l(reference_summary, created_summary, pp_options=None, extended_results=False):
+    """
+    Calculates the rouge-l value of a summary and its gold standard summary
+
+    :param reference_summary: Gold standard summary
+    :param created_summary: Created summary to compare
+    :param pp_options: options for preprocessing, if None then there will be no preprocessing
+    :param extended_results: if True, precision, recall and f-score will be returned
+    :return: The calculated score, if extended results are wanted (precision, recall, f-measure)
+    """
+    # preprocess
+    if pp_options is not None:  # otherwise don't preprocess. Text is already preprocessed
+        reference_summary = utils.preprocess_text(reference_summary, pp_options)
+        created_summary = utils.preprocess_text(created_summary, pp_options)
+    # seperate sentence marks from words
+    # split into sentences
+    m_reference_word_number = len(reference_summary.split(' '))
+    reference_summary = utils.split_into_sentences(reference_summary)
+    n_created_word_number = len(created_summary.split(' '))
+    created_summary = utils.split_into_sentences(created_summary)
+    total_sum_subsequences = 0
+    # to make sure every word in the created summary is used only once
+    used_created_indices = [set()]*len(created_summary)
+    used_gold_indices = [set()]*len(reference_summary)
+    for j in range(len(reference_summary)):
+        ref_sentence = reference_summary[j]
+        # calculate union longest subsequence
+        for i in range(len(created_summary)):
+            created_sentence = created_summary[i]
+            indices_a, indices_b = get_subsequence(ref_sentence, created_sentence)
+            used_gold_indices[j] = (used_gold_indices[j]).union(indices_a)
+            used_created_indices[i] = (used_created_indices[i]).union(indices_b)
+    # used indices of b here to ensure words arent used twice
+    used_created_indices = [len(sent_set) for sent_set in used_created_indices]
+    used_gold_indices = [len(sent_set) for sent_set in used_gold_indices]
+    total_sum_subsequences = min(sum(used_gold_indices), sum(used_created_indices))
+
+    if total_sum_subsequences == 0:
+        return 0
+    p_lcs = total_sum_subsequences / n_created_word_number
+    r_lcs = total_sum_subsequences / m_reference_word_number
+    f_lcs = ((1 + beta * beta) * r_lcs*p_lcs) / (r_lcs + beta * beta * p_lcs)
+    if extended_results:
+        return p_lcs, r_lcs, f_lcs
+    return f_lcs
+
+
+def get_subsequence(sent_a, sent_b):
+    """
+    Finds all (not necessarily consecutive) subsequences of a in b.
+    :param sent_a: Sentence to find subsequences from
+    :param sent_b: Sentence to find subsequence in
+    :return: (ind_a, ind_b) two sets of indices of sent_a and sent_b of the longest subsequence
+    """
+    result_a = set()
+    words_a = sent_a.split(' ')
+    words_b = sent_b.split(' ')
+    for word_index_a in range(len(words_a)):
+        word_result = set()
+        char_index_b = 0
+        while word_index_a < len(words_a):
+            # word is contained
+            try:
+                found_index = words_b.index(words_a[word_index_a], char_index_b)
+                word_result.add(word_index_a)
+                char_index_b = found_index
+                word_index_a += 1
+            except ValueError:
+                # word not in b contained, do nth
+                word_index_a += 1
+        if len(word_result) > len(result_a):
+            result_a = word_result
+    result_b = set([words_b.index(words_a[a_ind]) for a_ind in result_a])
+    return result_a, result_b
+
+
+if __name__ == "__main__":
+
+    print('Done')
diff --git a/rouge_evalauation/create_evaluation_files.py b/rouge_evalauation/create_evaluation_files.py
new file mode 100644
index 0000000..c059f84
--- /dev/null
+++ b/rouge_evalauation/create_evaluation_files.py
@@ -0,0 +1,1439 @@
+import math
+
+import numpy as np
+import pandas as pd
+import xlsxwriter as xlsxwriter
+import seaborn as sns
+import matplotlib.pyplot as plt
+from nlp_rake import Rake
+
+import data.download_rii
+import pm_summary.annotation_evaluation
+import rouge
+import settings
+import utils
+from pm_summary.annotation_evaluation import pm_sent
+
+current_dir = 'rouge_evalauation/'
+eval_path = 'manual_evaluation/'
+eval_run_two_path = 'second_eval/'
+evaluated_path = 'evaluated_data/'
+picture_path = 'figures/'
+summary_sentences_path = 'summary_sentences/'
+data_path = 'dataframes/'
+all_summaries_df_name = 'all_summaries.json'
+sum_sentences_df_name = 'summary_sentences.json'
+issues_str = 'legal_issue'
+abstr_complete_str = 'abstr_complete'
+content_complete_str = 'content_complete'
+type_str = 'ls_pm'
+sent_no_str = 'sentence_number'
+original_str = 'original'
+interval_start_str = 'interval_start'
+sentence_str = 'sentence'
+evaluation_str = 'eval'
+summary_id_str = 'summary_id:'
+summary_str = 'summary'
+rouge_r_str = 'rouge_recall'
+rouge_p_str = 'rouge_precision'
+rouge_f_str = 'rouge_f_measure'
+rouge1_str = 'rouge1'
+rougel_str = 'rougel'
+rouge_type_str = rouge1_str + ' or ' + rougel_str
+pp_options = [utils.pp_option_stopwords, utils.pp_option_lemmatize]
+cases_one_list = ['I_ZR_23-18', 'I_ZR_139-15', 'III_ZR_35-18', 'III_ZR_42-19',
+                  'III_ZR_55-19', 'III_ZR_67-18', 'III_ZR_113-18', 'III_ZR_292-17',
+                  'III_ZR_391-17', 'V_ZR_112-18', 'V_ZR_176-17', 'V_ZR_218-18',
+                  'V_ZR_254-17', 'V_ZR_273-17', 'VI_ZR_506-17', 'VII_ZR_151-18',
+                  'VIII_ZR_94-17', 'VIII_ZR_277-16', 'X_ZR_96-17', 'XII_ZR_13-19']
+cases_two_list = ['I_ZR_146-20', 'I_ZR_153-17', 'II_ZR_84-20', 'II_ZR_152-20',
+                  'III_ZR_25-20', 'III_ZR_79-21', 'IV_ZR_144-21', 'IV_ZR_253-20',
+                  'V_ZR_8-19', 'V_ZR_299-19', 'VI_ZR_128-20', 'VI_ZR_252-19',
+                  'VIa_ZR_418-21', 'VII_ZR_78-20', 'VII_ZR_192-20', 'VIII_ZR_21-19',
+                  'VIII_ZR_66-17', 'X_ZR_107-16', 'XI_ZR_7-19', 'XI_ZR_345-18']
+
+
+def select_greedy_summary(split_text, gold_summary, interval_aim, eval_func):
+    """
+    Selects a summary from a text in a greedy fashion.
+
+    :param split_text: List of sentences, Text to choose sentences from, already split into sentences!
+    :param gold_summary: ideal summary
+    :param interval_aim: (start, end) of interval for the intended final rouge score
+    :param eval_func: rouge score to evaluate the summary, as a function with the arguments (created, gold)
+    :return: [(rouge, summary)*]  with summary the created summary and rouge the corresponding score. Contains rouge for
+            every added sentence
+    """
+    start_aim, end_aim = interval_aim
+    result = []
+    result_summary = ''
+    current_split_text = [sent for sent in split_text]
+    max_rouge = eval_func(result_summary, gold_summary)
+    while max_rouge <= start_aim and len(current_split_text) > 0:
+        new_sent = ''
+        for sent in current_split_text:
+            var_result_summary = result_summary + ' ' + sent
+            var_result_summary = var_result_summary.strip()
+            new_rouge = eval_func(var_result_summary, gold_summary)
+            if new_rouge > end_aim:
+                current_split_text.remove(sent)
+            elif max_rouge < new_rouge:
+                max_rouge = new_rouge
+                new_sent = sent
+        if new_sent != '':
+            result_summary += ' ' + new_sent
+            result_summary = result_summary.strip()
+            current_split_text.remove(new_sent)
+            result.append((max_rouge, new_sent))
+        else:
+            break
+
+    return result
+
+
+def get_evaluation_data(case_list):
+    """
+    Loads the judgement data and the press realease evaluation files, combines and returns them
+    Removes press releases without Leitsatz or judgement data
+        pm_prepared_list = [(pm,
+                         ls_data[ls_data[utils.aktenzeichen_str] == aktenzeichen]
+                         [[utils.aktenzeichen_str, utils.leitsatz_str, utils.tenor_str,
+                           utils.entscheidungsgruende_str]].squeeze())
+    :param case_list: list of cases for the run
+    :return: [(pm, judg_row_data(aktenzeichen, leitsatz, tenor, entsch_gr))]
+    """
+    ls_data = data.download_rii.get_selected_bgh_data(directory='..//data//')
+    pm_data_list = pm_summary.annotation_evaluation.get_all_pm_files()
+    pm_prepared_list = []
+    for pm_filename, pm_file_data in pm_data_list:
+        if not any(case in pm_filename for case in case_list):
+            continue
+        aktenzeichen = pm_filename.replace('.xlsx', '').replace('_', ' ').replace('-', '/')
+        original_pm = ''
+        for i in range(len(pm_file_data)):
+            sent_dict = pm_file_data[i + 1]
+            original_pm += ' ' + str(sent_dict[pm_sent])
+        pm_prepared_list.append((aktenzeichen, original_pm))
+
+    pm_prepared_list = [(pm,
+                         ls_data[ls_data[utils.aktenzeichen_str] == aktenzeichen]
+                         [[utils.aktenzeichen_str, utils.leitsatz_str, utils.tenor_str,
+                           utils.entscheidungsgruende_str]].squeeze())
+                        for (aktenzeichen, pm) in pm_prepared_list]
+    # remove pms without ls
+    pm_prepared_list = [(pm, row) for (pm, row)
+                        in pm_prepared_list if row[utils.leitsatz_str] is not None]
+
+    return pm_prepared_list
+
+
+def elaborated_sentence_splitting(text_to_split):
+    """
+    Sentence Splitting for entscheidungsgruende with readjusting of splitting if something was wrong
+
+    :param text_to_split: Raw text
+    :return: split text as list
+    """
+    # select entscheidungsgruende II as split sentences
+    res = []
+    for sentence in utils.split_into_sentences(text_to_split):
+        first, rest = utils.split_leading_listing(sentence)
+        if first is not None:
+            res.append(first)
+        res.append(rest)
+    res = rejoin_wrong_splitting(res)
+    res = readjust_splitting(res)
+    return res
+
+
+def prepare_sentences(row_data):
+    """
+    Prepares leitsatz, entscheidungsgruende and tenor. Splits them into senteces, removes listings, etc.
+
+    :param row_data: series containing the data
+    :return: l_fin, l_list, eg_list, combined_list with l_fin the leitsatz as a string, l_list leitsatz as list of
+                string, eg_list list of entscheidungsgruende sentences and combined_list the list of sentences of
+                entscheidungsgruende and tenor
+    """
+    l_list = utils.prepare_leitsatz(row_data[utils.leitsatz_str])
+    l_fin = ' '.join(l_list)
+
+    eg_list = elaborated_sentence_splitting(row_data[utils.entscheidungsgruende_str])
+    # select entscheidungsgruende II as split sentences
+    eg_list = utils.select_list_subset(eg_list, utils.entsch_gr_start_sentences)
+    eg_list = [sent for sent in eg_list if len(sent.split()) > 1]
+
+    combined_list = eg_list + elaborated_sentence_splitting(row_data[utils.tenor_str])
+    return l_fin, l_list, eg_list, combined_list
+
+
+def rejoin_wrong_splitting(sent_list):
+    """
+    Some sentences are split wrongly. They are connected here again.
+
+    :param sent_list: list to check
+    :return: updated list
+    """
+    res = []
+    combined = ''
+    for string in sent_list:
+        combined += ' ' + string
+        combined = combined.strip()
+        if not string.endswith('für die Bemessung des Nutzungsvorteils:') and \
+                not string.endswith('GB, Stand:') and not string.endswith('Probefahrt:') \
+                and not string.strip() == '§ 89 Abs. 1 II.' and not string.endswith('Medizinprodukte, A. VI.')\
+                and not combined.endswith('te, A. VI. 2.') \
+                and not string.endswith('Gemeinschaft:') and not string.endswith('InfoV.'):  # no special cases
+            res.append(combined)
+            combined = ''
+    return res
+
+
+def update_summaries_if_needed(interval_id, existings_summaries, possible_sentences, gold_sum, rouge_1, max_intervals,
+                               max_interval_index):
+    """
+    Method tries to create a summary of given interval. By keeping track off all rouge values along the way, summaries
+    of lower rouge may also be found. This information is then updated in max_intervals and existing_summaries
+
+    :param interval_id: interval_id for interval to check. Interval start = (interval_id - 1) / 10
+    :param existings_summaries: already existing summaries to intervals for this task
+    :param possible_sentences: possible sentences to choose
+    :param gold_sum: gold summary to compare
+    :param rouge_1: True if ROUGE-1 should be calculated, False otherwise
+    :param max_intervals: List for keeping track of the max intervals. By starting with highest possible rouge,
+            impossible rouge values can be detected and not run, since they are impossible.
+    :param max_interval_index: index in list for current task
+    :return: existings_summaries, max_intervals with updated values
+    """
+    if rouge_1:
+        rouge_index = 1
+    else:
+        rouge_index = 2
+
+    if existings_summaries[interval_id - 1][rouge_index] == '' and \
+            (interval_id - 1 < max_intervals[max_interval_index] or max_intervals[max_interval_index] == -1):
+
+        # no summary yet
+        if rouge_1:
+            result_list = select_greedy_summary(split_text=possible_sentences, gold_summary=gold_sum,
+                                                interval_aim=[(interval_id - 1) / 10, interval_id / 10],
+                                                eval_func=lambda created, gold:
+                                                rouge.rouge_n(reference_summary=gold,
+                                                              created_summary=created,
+                                                              pp_options=pp_options, n=1))
+        else:
+            result_list = select_greedy_summary(split_text=possible_sentences, gold_summary=gold_sum,
+                                                interval_aim=[(interval_id - 1) / 10, interval_id / 10],
+                                                eval_func=lambda created, gold:
+                                                rouge.rouge_l(reference_summary=gold,
+                                                              created_summary=created,
+                                                              pp_options=pp_options))
+
+        summary = ''
+        for (rouge_v, sentence) in result_list:
+            summary += ' ' + sentence
+            summary = summary.strip()
+            index = math.floor(rouge_v * 10)
+            if existings_summaries[index][rouge_index] == '':  # summary found
+                if rouge_1:
+                    existings_summaries[index] = (existings_summaries[index][0], summary, existings_summaries[index][2])
+                else:
+                    existings_summaries[index] = \
+                        (existings_summaries[index][0], existings_summaries[index][1], summary)
+                if index > max_intervals[max_interval_index]:
+                    max_intervals[max_interval_index] = index
+    return existings_summaries, max_intervals
+
+
+def preselect_sentences(sentence_list, gold_sum):
+    """
+    Makes a preselection of sentences. Removes sentences without two keywords or with a given phrase and combines
+    konjunktiv until next indicative.
+
+    :param sentence_list: list tu preselect from
+    :param gold_sum: gold summary to create keywords from
+    :return: resulting sentences
+    """
+    # combine konjunktiv
+    res = combine_modus(sentence_list)
+    # keywords
+    rake = Rake(
+        min_chars=1,
+        max_words=4,
+        language_code='de',
+        stopwords=settings.nlp.Defaults.stop_words,
+    )
+    keywords = rake.apply(gold_sum)
+    wordlist = set()
+    for keywordstring, _ in keywords:
+        for token in settings.nlp(keywordstring):
+            wordlist.add(token.lemma_)
+    res_var = []
+    for sent in res:
+        keyword_counts = [1 for word in sent.split(' ') if (len(settings.nlp(word)) > 0)
+                          and settings.nlp(word)[0].lemma_ in wordlist]
+        if sum(keyword_counts) >= 2:
+            res_var.append(sent)
+    res = res_var
+
+    # remove sentences with bad phrases
+    phrases_list = ['Es kann dahinstehen', 'Es kann dahingestellt bleiben',
+                    'Dabei kann dahingestellt bleiben ',
+                    'Es kann offenbleiben, dass', 'Es kann offenbleiben, ob',
+                    'Es bedarf keiner Entscheidung, ob',
+                    'Das Berufungsgericht hat zu hohe Anforderungen gestellt,',
+                    'Entgegen der Auffassung der Revision',
+                    'Entgegen der Auffassung des Berufungsgerichts',
+                    'Jedenfalls greift die Argumentation des Berufungsgerichts nicht',
+                    'Jedenfalls greift die Argumentation des Berufungsgerichts zu kurz', 'Selbst wenn']
+    res = [sentence for sentence in res if not sentence.startswith(tuple(phrases_list))]
+    return res
+
+
+def write_files_for_one_judgement(case):
+    """
+    Writes the three files for one judgement. Goes through al intervals from 0.0-0.1  0.9-1.0,
+    creates summaries and writes the results to the fieles
+
+    :param case: (tag, pm, row, improved) as resulting from get_evalution_data, improved indicates, whether the
+            improved version should be used
+    """
+    pm, row, improved = case
+    l_fin, l_list, eg_list, combined_list = prepare_sentences(row)
+    if improved:
+        eg_list = preselect_sentences(eg_list, l_fin)
+        combined_list = preselect_sentences(combined_list, pm)
+    max_intervals = [-1] * 4
+    ls_sums = [(str((i - 1) / 10) + '-' + str(i / 10), '', '') for i in range(1, 12, 1)]
+    pm_sums = [(str((i - 1) / 10) + '-' + str(i / 10), '', '') for i in range(1, 12, 1)]
+    for i in range(11, 1, -1):
+        ls_sums, max_intervals = update_summaries_if_needed(interval_id=i, gold_sum=l_fin, existings_summaries=ls_sums,
+                                                            max_interval_index=0, max_intervals=max_intervals,
+                                                            possible_sentences=eg_list, rouge_1=True)
+
+        ls_sums, max_intervals = update_summaries_if_needed(interval_id=i, gold_sum=l_fin, existings_summaries=ls_sums,
+                                                            max_interval_index=1, max_intervals=max_intervals,
+                                                            possible_sentences=eg_list, rouge_1=False)
+
+        pm_sums, max_intervals = update_summaries_if_needed(interval_id=i, gold_sum=pm, existings_summaries=pm_sums,
+                                                            max_interval_index=2, max_intervals=max_intervals,
+                                                            possible_sentences=combined_list, rouge_1=True)
+
+        pm_sums, max_intervals = update_summaries_if_needed(interval_id=i, gold_sum=pm, existings_summaries=pm_sums,
+                                                            max_interval_index=3, max_intervals=max_intervals,
+                                                            possible_sentences=combined_list, rouge_1=False)
+
+    # ROUGE Overviews
+    write_rouge_overview((l_fin, ls_sums, eg_list), (pm, pm_sums, combined_list), row[utils.aktenzeichen_str],
+                         improved=improved)
+    # Evaluation Files
+    write_evaluation_files((l_fin, ls_sums), (pm, pm_sums), row[utils.aktenzeichen_str], (eg_list, combined_list),
+                           improved)
+
+
+def combine_consecutive_sentences(sentences_to_combine, original_list):
+    """
+    In case sentences in the first list are consecutive sentences in the second list, they are combined into one string
+
+    :param sentences_to_combine: strings here might be combined
+    :param original_list: original list for getting order
+    :return: updated list
+    """
+    indices = sorted([(get_index_in_list(sent, original_list), sent) for sent in sentences_to_combine])
+    res = []
+    old_index = -1
+    current_package = ''
+    for index, sent in indices:
+        if index - old_index == 1:  # consecutive sentences
+            current_package += ' ' + sent
+            current_package = current_package.strip()
+        else:  # old package is done
+            if current_package != '':
+                res.append(current_package)
+            current_package = sent
+        old_index = index
+    if current_package != '':
+        res.append(current_package)
+    return res
+
+
+def write_evaluation_files(ls_data, pm_data, aktenzeichen, sent_lists, improved):
+    """
+    Writes the excel files for legal evaluation.
+
+    :param improved: if True, the improved versio is used
+    :param ls_data: ls, ls_sums with ls the leitsatz and ls_sums the created summaries and intervals
+    :param pm_data: p, pm_sums with pm the press release annd pm_sums the created summaries and intervals
+    :param aktenzeichen: aktenzeichen of the case
+    :param sent_lists: (eg_list, combined_list) with the original sentences for finding consecutive sentences
+    """
+    eg_list, combined_list = sent_lists
+    ls_sentences = []
+    pm_sentences = []
+    ls, ls_sums = ls_data
+    pm, pm_sums = pm_data
+    for _, r1, rl in ls_sums:
+        r1_sents = elaborated_sentence_splitting(r1)
+        if improved:
+            r1_sents = combine_modus(r1_sents)
+        r1_sents = combine_consecutive_sentences(r1_sents, eg_list)
+        r1_sents = [sent for sent in r1_sents if sent not in ls_sentences]
+        ls_sentences += r1_sents
+        rl_sents = elaborated_sentence_splitting(rl)
+        if improved:
+            rl_sents = combine_modus(rl_sents)
+        rl_sents = combine_consecutive_sentences(rl_sents, eg_list)
+        rl_sents = [sent for sent in rl_sents if sent not in ls_sentences]
+        ls_sentences += rl_sents
+    for _, r1, rl in pm_sums:
+        r1_sents = elaborated_sentence_splitting(r1)
+        if improved:
+            r1_sents = combine_modus(r1_sents)
+        r1_sents = combine_consecutive_sentences(r1_sents, combined_list)
+        r1_sents = [sent for sent in r1_sents if sent not in pm_sentences]
+        pm_sentences += r1_sents
+        rl_sents = elaborated_sentence_splitting(rl)
+        if improved:
+            rl_sents = combine_modus(rl_sents)
+        rl_sents = combine_consecutive_sentences(rl_sents, combined_list)
+        rl_sents = [sent for sent in rl_sents if sent not in pm_sentences]
+        pm_sentences += rl_sents
+
+    if improved:
+        savepath = eval_path + eval_run_two_path + 'sentences/'
+    else:
+        savepath = eval_path + 'sentences/'
+    utils.create_dir(current_path=current_dir, directory_name=savepath, delete=False)
+    workbook = xlsxwriter.Workbook(
+        utils.server_path(current_path=current_dir,
+                          path=savepath + aktenzeichen.replace('/', '-') + '.xlsx'))
+
+    # sorting to give no indicateion of object ranking
+    ls_sentences = sorted(ls_sentences)
+    pm_sentences = sorted(pm_sentences)
+    write_one_evaluation_worksheet(workbook, 'Leitsatz', ls_sentences)
+    write_one_evaluation_worksheet(workbook, 'Pressemitteilung', pm_sentences)
+    workbook.close()
+
+
+def write_one_evaluation_worksheet(workbook, worksheetname, sentences):
+    """
+    Writes one excel sheet either for press releases or leitsatz
+
+    :param workbook:  excel workbook to write in
+    :param worksheetname: name of the sheet
+    :param sentences: the sentences to write
+    """
+    worksheet = workbook.add_worksheet(name=worksheetname)
+    cell_format = workbook.add_format()
+    cell_format.set_text_wrap()
+    worksheet.set_column(2, 20, 20)
+    worksheet.set_column(1, 1, 55)
+
+    # description line
+    worksheet.write(1, 0, 'Nummer')
+    worksheet.write(1, 1, 'Satz')
+    for i in range(0, 10, 2):
+        worksheet.write(0, 2 + i, 'rechtliche Aussage:')
+        # split line
+        worksheet.write(1, 2 + i, 'Kategorie')
+        worksheet.write(1, 2 + i + 1, 'Dopplung')
+    # sentences with numbers
+    for i in range(len(sentences)):
+        worksheet.write(2 + i, 0, i + 1)
+        worksheet.write(2 + i, 1, sentences[i], cell_format)
+    # ending line
+    for i in range(0, 10, 2):
+        worksheet.write(2 + len(sentences), 2 + i,
+                        'Falls der Inhalt der rechtlichen Aussage vollständig abgebildet wurde, welche Sätze '
+                        'werden dazu benötigt?', cell_format)
+        worksheet.write(2 + len(sentences) + 1, 2 + i,
+                        'Falls der Inhalt der rechtlichen Aussage insgesamt in einem passenden '
+                        'Abstraktionsniveau angegeben wurden, '
+                        'welche Sätze werden dazu benötigt?', cell_format)
+
+
+def write_one_overview_worksheet(workbook, worksheet_name, sum_data):
+    """
+    Writes one worksheet for the rouge overview files.
+
+    :param workbook: workbook to write in
+    :param worksheet_name: name of the sheet
+    :param sum_data: (gold, created, original_sents, improves) summaries
+    """
+    worksheet = workbook.add_worksheet(name=worksheet_name)
+    gold, created, original_list, improved = sum_data
+    # original text
+    worksheet.write(0, 0, gold)
+    row = 2
+    current = ''
+    sentences = utils.split_into_sentences(created)
+    if improved:
+        sentences = combine_consecutive_sentences(sentences_to_combine=sentences, original_list=original_list)
+    for i in range(len(sentences)):
+        # sentence
+        current += ' ' + sentences[i]
+        current = current.strip()
+        worksheet.write(row, 0, sentences[i])
+        row += 1
+
+
+def write_rouge_overview(ls_data, pm_data, aktenzeichen, improved):
+    """
+    Writes the rouge overview files
+
+    :param ls_data: ls, ls_sums, eg_list with ls the leitsatz and ls_sums the created summaries
+    :param pm_data: pm, pm_sums, comb_list with pm the press release and pm_sums the created summaries
+    :param aktenzeichen: aktenzeichen of the case
+    :param improved: True if the improved version should be run
+    """
+    if improved:
+        savepath = eval_path + eval_run_two_path + 'rouge_overview/'
+    else:
+        savepath = eval_path + 'rouge_overview/'
+    utils.create_dir(current_path=current_dir, directory_name=savepath, delete=False)
+    workbook = xlsxwriter.Workbook(
+        utils.server_path(current_path=current_dir,
+                          path=savepath + aktenzeichen.replace('/', '-') + '.xlsx'))
+    ls, ls_sums, eg_list = ls_data
+    for interval, sum_r1, sum_rl in ls_sums:
+        write_one_overview_worksheet(workbook, 'ls rouge1 ' + interval, sum_data=(ls, sum_r1, eg_list, improved))
+        write_one_overview_worksheet(workbook, 'ls rougel ' + interval, sum_data=(ls, sum_rl, eg_list, improved))
+    pm, pm_sums, comb_list = pm_data
+    for interval, sum_r1, sum_rl in pm_sums:
+        write_one_overview_worksheet(workbook, 'pm rouge1 ' + interval, sum_data=(pm, sum_r1, comb_list, improved))
+        write_one_overview_worksheet(workbook, 'pm rougel ' + interval, sum_data=(pm, sum_rl, comb_list, improved))
+    workbook.close()
+
+
+def read_or_load_summaries():
+    """
+    Reads or loads the the summaries from their files. For every intervalm rouge-l and rouge-1 and pm and ls.
+    Loads if exists, reads otherwise.
+
+    :return: (summaries, sentences) dataframes with summaries the summary data, sentences the corresponding
+            sentences and counts the counts of existing summaries for each combination
+    """
+    try:
+        summaries = utils.df_from_json(current_path=current_dir, path=data_path + all_summaries_df_name)
+        sentences = utils.df_from_json(current_path=current_dir, path=data_path + sum_sentences_df_name)
+    except Exception:
+        sentences = pd.DataFrame(columns=[type_str, utils.aktenzeichen_str, rouge_type_str, sentence_str])
+        summaries = pd.DataFrame(columns=[type_str, utils.aktenzeichen_str, original_str, summary_id_str, summary_str,
+                                          rouge_p_str, rouge_r_str, rouge_f_str, rouge_type_str])
+        rouge_overview_path = eval_path + 'rouge_overview/'
+
+        for file in utils.list_dir_files(current_path=current_dir, path=rouge_overview_path):
+            current_summary_id = 0
+            file_sentences = pd.DataFrame(
+                columns=[type_str, utils.aktenzeichen_str, rouge_type_str, sentence_str])
+
+            aktenzeichen = file.replace('.xlsx', '')
+            # intervalle durchgehen
+            for i in range(1, 11, 1):
+                # pm und leitsatz durchgehen
+                for identifier in ['pm', 'ls']:
+                    # rouge 1+ l durchgehen
+                    for rouge_metric in [rouge1_str, rougel_str]:
+                        sheetname = identifier + ' ' + rouge_metric + ' ' + \
+                                    str((i - 1) / 10) + '-' + str(i / 10)
+                        df_sheet_data = pd.read_excel(rouge_overview_path + '/' + file,
+                                                      sheet_name=sheetname)
+                        if df_sheet_data.shape[0] == 0:
+                            continue
+                        my_summary = ''
+                        my_sentences = [False] * file_sentences.shape[0]
+                        original_summary = df_sheet_data.columns.values[0]
+                        for index, row in df_sheet_data.iterrows():
+                            if index == 0:  # first row is empty
+                                continue
+                            sent = row[original_summary]
+                            my_summary += ' ' + sent
+                            my_summary = my_summary.strip()
+                            # add sent to sentences or mark the old index
+                            existing_sent = file_sentences.loc[(file_sentences[utils.aktenzeichen_str] ==
+                                                                aktenzeichen) &
+                                                               (file_sentences[sentence_str] == sent) &
+                                                               (file_sentences[type_str] == identifier) &
+                                                               (file_sentences[
+                                                                    rouge_type_str] == rouge_metric)]
+                            if existing_sent.shape[0] > 0:
+                                my_sentences[existing_sent.index.values[0]] = True
+                            else:
+                                file_sentences.loc[len(file_sentences.index)] = [identifier, aktenzeichen,
+                                                                                 rouge_metric, sent] + \
+                                                                                [False] * (
+                                                                                        file_sentences.shape[
+                                                                                            1] - 4)
+                                my_sentences.append(True)
+
+                        file_sentences[current_summary_id] = my_sentences
+                        file_sentences = file_sentences.T.drop_duplicates().T
+                        if file_sentences.shape[1] <= (4 + current_summary_id):  # duplicate summary
+                            continue
+                        if rouge_metric == rouge1_str:
+                            r_p, r_r, r_f = rouge.rouge_n(reference_summary=original_summary,
+                                                          created_summary=my_summary,
+                                                          pp_options=pp_options, n=1, extended_results=True)
+                        else:
+                            r_p, r_r, r_f = rouge.rouge_l(reference_summary=original_summary,
+                                                          created_summary=my_summary,
+                                                          pp_options=pp_options, extended_results=True)
+                        summaries.loc[len(summaries.index)] = [identifier, aktenzeichen, original_summary,
+                                                               current_summary_id, my_summary, r_p, r_r,
+                                                               r_f,
+                                                               rouge_metric]
+                        current_summary_id += 1
+
+            sentences = pd.concat([sentences, file_sentences], ignore_index=True)
+
+        sentences = sentences.fillna(False)
+        utils.create_dir(current_path=current_dir, directory_name=data_path, delete=False)
+        utils.df_to_json(current_path=current_dir, path=data_path + all_summaries_df_name, dataframe=summaries)
+        utils.df_to_json(current_path=current_dir, path=data_path + sum_sentences_df_name, dataframe=sentences)
+
+    return summaries, sentences
+
+
+def read_or_load_summaries_run_two():
+    """
+    Reads or loads the the summaries from their files. For every intervalm rouge-l and rouge-1 and pm and ls.
+    Loads if exists, reads otherwise.
+
+    :return: (summaries, sentences) dataframes with summaries the summary data, sentences the corresponding
+            sentences and counts the counts of existing summaries for each combination
+    """
+    try:
+        summaries = utils.df_from_json(current_path=current_dir,
+                                       path=data_path + eval_run_two_path + all_summaries_df_name)
+        sentences = utils.df_from_json(current_path=current_dir,
+                                       path=data_path + eval_run_two_path + sum_sentences_df_name)
+    except Exception:
+        sentences = pd.DataFrame(columns=[type_str, utils.aktenzeichen_str, rouge_type_str, sentence_str])
+        summaries = pd.DataFrame(columns=[type_str, utils.aktenzeichen_str, original_str, summary_id_str, summary_str,
+                                          rouge_p_str, rouge_r_str, rouge_f_str, rouge_type_str])
+        rouge_overview_path = eval_path + eval_run_two_path + 'rouge_overview/'
+
+        for file in utils.list_dir_files(current_path=current_dir, path=rouge_overview_path):
+            current_summary_id = 0
+            file_sentences = pd.DataFrame(
+                columns=[type_str, utils.aktenzeichen_str, rouge_type_str, sentence_str])
+
+            aktenzeichen = file.replace('.xlsx', '')
+            # intervalle durchgehen
+            for i in range(1, 11, 1):
+                # pm und leitsatz durchgehen
+                for identifier in ['pm', 'ls']:
+                    # rouge 1+ l durchgehen
+                    for rouge_metric in [rouge1_str, rougel_str]:
+                        sheetname = identifier + ' ' + rouge_metric + ' ' + \
+                                    str((i - 1) / 10) + '-' + str(i / 10)
+                        df_sheet_data = pd.read_excel(rouge_overview_path + '/' + file,
+                                                      sheet_name=sheetname)
+                        if df_sheet_data.shape[0] == 0:
+                            continue
+                        my_summary = ''
+                        my_sentences = [False] * file_sentences.shape[0]
+                        original_summary = df_sheet_data.columns.values[0]
+                        for index, row in df_sheet_data.iterrows():
+                            if index == 0:  # first row is empty
+                                continue
+                            sent = row[original_summary]
+                            my_summary += ' ' + sent
+                            my_summary = my_summary.strip()
+                            # add sent to sentences or mark the old index
+                            existing_sent = file_sentences.loc[(file_sentences[utils.aktenzeichen_str] ==
+                                                                aktenzeichen) &
+                                                               (file_sentences[sentence_str] == sent) &
+                                                               (file_sentences[type_str] == identifier) &
+                                                               (file_sentences[
+                                                                    rouge_type_str] == rouge_metric)]
+                            if existing_sent.shape[0] > 0:
+                                my_sentences[existing_sent.index.values[0]] = True
+                            else:
+                                file_sentences.loc[len(file_sentences.index)] = [identifier, aktenzeichen,
+                                                                                 rouge_metric, sent] + \
+                                                                                [False] * (
+                                                                                        file_sentences.shape[
+                                                                                            1] - 4)
+                                my_sentences.append(True)
+
+                        file_sentences[current_summary_id] = my_sentences
+                        file_sentences = file_sentences.T.drop_duplicates().T
+                        if file_sentences.shape[1] <= (4 + current_summary_id):  # duplicate summary
+                            continue
+                        if rouge_metric == rouge1_str:
+                            r_p, r_r, r_f = rouge.rouge_n(reference_summary=original_summary,
+                                                          created_summary=my_summary,
+                                                          pp_options=pp_options, n=1, extended_results=True)
+                        else:
+                            r_p, r_r, r_f = rouge.rouge_l(reference_summary=original_summary,
+                                                          created_summary=my_summary,
+                                                          pp_options=pp_options, extended_results=True)
+                        summaries.loc[len(summaries.index)] = [identifier, aktenzeichen, original_summary,
+                                                               current_summary_id, my_summary, r_p, r_r,
+                                                               r_f,
+                                                               rouge_metric]
+                        current_summary_id += 1
+
+            sentences = pd.concat([sentences, file_sentences], ignore_index=True)
+
+        sentences = sentences.fillna(False)
+        utils.create_dir(current_path=current_dir, directory_name=data_path + eval_run_two_path, delete=False)
+        utils.df_to_json(current_path=current_dir, path=data_path + eval_run_two_path + all_summaries_df_name,
+                         dataframe=summaries)
+        utils.df_to_json(current_path=current_dir, path=data_path + eval_run_two_path + sum_sentences_df_name,
+                         dataframe=sentences)
+
+    return summaries, sentences
+
+
+def get_evaluated_sentences(run_two=False):
+    """
+    Reads the evaluated sentences
+
+    :return: (sentences, info) two dataframes containing all sentences and additional info about sentences needed for a
+            complete representation of the original
+    """
+    path = evaluated_path + summary_sentences_path
+    if run_two:
+        path = evaluated_path + eval_run_two_path + summary_sentences_path
+
+    result_sentences = pd.DataFrame()
+    result_info = pd.DataFrame()
+
+    for file in utils.list_dir_files(current_path=current_dir, path=path):
+        ls_data = pd.read_excel(utils.server_path(current_path=current_dir, path=path + file),
+                                sheet_name='Leitsatz')
+        pm_data = pd.read_excel(utils.server_path(current_path=current_dir, path=path + file),
+                                sheet_name='Pressemitteilung')
+        aktenzeichen = file.replace('.xlsx', '')
+
+        sentences, info = extract_one_type_data(ls_data, 'ls', aktenzeichen)
+        sentences_pm, info_pm = extract_one_type_data(pm_data, 'pm', aktenzeichen)
+        result_sentences = pd.concat([result_sentences, sentences, sentences_pm])
+        result_info = pd.concat([result_info, info, info_pm], ignore_index=True)
+
+    return result_sentences, result_info
+
+
+def extract_one_type_data(dataframe, current_type, aktenzeichen):
+    """
+    Extracts the data of one worksheet
+
+    :param dataframe: raw dataframe of the sheet
+    :param current_type: ls or pm
+    :param aktenzeichen: aktenzeichen of the judgement
+    :return: sentences, info with sentences the evaluated sentences and info a dataframe of legal issues and the
+                sentences needed to complete them
+    """
+    # check if column empty
+    data_row_count = dataframe.shape[0] - 3
+
+    drop_columns = []
+    for i in range(len(dataframe.columns)):
+        column = dataframe.columns.values[i]
+        if (i % 2) == 0:
+            if dataframe[column][1:-2].isnull().sum() == data_row_count:
+                # everything empty
+                index = dataframe.columns.get_loc(column)
+                drop_columns.append(index)
+                if index + 1 < len(dataframe.columns):
+                    drop_columns.append(index + 1)
+
+    dataframe.drop(dataframe.columns[drop_columns], axis=1, inplace=True)  # also dop dopplung
+
+    # sentence duplicates
+    duplicates = []
+    duplicate_rows = dataframe.iloc[1:-2, 3::2].dropna(how='all')
+    for index, row in duplicate_rows.iterrows():
+        for dup_index in row.unique():
+            if not pd.isna(dup_index):
+                duplicates.append((index, dup_index))
+
+    # refill values for duplicates if not given
+    for a, b in duplicates:
+        a_v = dataframe.iloc[a].iloc[2::2].dropna().unique()
+        b_v = dataframe.iloc[b].iloc[2::2].dropna().unique()
+        if len(b_v) == 0:  # missing value
+            dataframe.iloc[b, 2] = a_v[0]
+        if len(a_v) == 0:  # missing value
+            dataframe.iloc[a, 2] = b_v[0]
+
+    # Sentence evaluations
+    sentences = dataframe.iloc[1:-2, :2]
+    sentences.columns = [sent_no_str, sentence_str]
+    legal_issues = dataframe.iloc[1:-2, 2:].T.apply(lambda col: col.iloc[::2].dropna()).T
+    column_names = [issues_str + str(i) for i in range(1, legal_issues.shape[1] + 1)]
+    legal_issues.columns = column_names
+    sentences = pd.concat([sentences, legal_issues], axis=1)
+    # Kombination für vollständige Abbildung finden
+    abstr_matching = []
+    content_matching = []
+    subset = dataframe.iloc[-2:, 2:]
+    for i in range(1, subset.shape[1], 2):
+        if not pd.isna(subset.iloc[0, i]):
+            content_matching.append(subset.iloc[0, i])
+        else:
+            content_matching.append('')
+        if not pd.isna(subset.iloc[1, i]):
+            abstr_matching.append(subset.iloc[1, i])
+        else:
+            abstr_matching.append('')
+
+    sentences[type_str] = current_type
+
+    sentences[utils.aktenzeichen_str] = aktenzeichen
+    abstr_matching = insert_duplicates(abstr_matching, duplicates)
+    content_matching = insert_duplicates(content_matching, duplicates)
+    info = pd.DataFrame(columns=[utils.aktenzeichen_str, type_str, issues_str, abstr_complete_str,
+                                 content_complete_str])
+    for i in range(len(abstr_matching)):
+        info.loc[len(info.index)] = [aktenzeichen, current_type, str(i), abstr_matching[i], content_matching[i]]
+
+    return sentences, info
+
+
+def insert_duplicates(match, duplicates):
+    """
+    In the list of sentences needed for a complete content or abstraction level, the duplicates are inserted
+
+    :param match: list of either abstraction or content matchings
+    :param duplicates: [(a,b)*] the list of duplicates
+    :return: the updated input list
+    """
+    for i in range(len(match)):
+        matching = str(match[i]).replace(',', '').split(' ')
+        for a, b in duplicates:
+            if str(a) in matching:
+                matching.remove(str(a))
+                matching.append(str(a) + ',' + str(b))
+            if str(b) in matching:
+                matching.remove(str(b))
+                matching.append(str(a) + ',' + str(b))
+        match[i] = ' '.join(matching)
+    return match
+
+
+def get_interval_counts(summaries):
+    """
+    Counts all summaries for the intervals
+
+    :param summaries: all summaries
+    :return: dataframe with countet result
+    """
+    res = pd.DataFrame()
+    for type_id in ['ls', 'pm']:
+        for rouge_type in [rouge1_str, rougel_str]:
+            selected_summaries = summaries[(summaries[type_str] == type_id) &
+                                           (summaries[rouge_type_str] == rouge_type)]
+            counts = selected_summaries[interval_start_str].value_counts()
+            counts.name = rouge_type + '_' + type_id
+            res = pd.concat([res, counts], axis=1)
+    res = res.sort_index()
+
+    return res
+
+
+def get_interval_mean_ranking(summaries):
+    """
+    Gets mean ranking of all summaries for the intervals
+
+    :param summaries: all summaries
+    :return: dataframe with countet result
+    """
+    res = pd.DataFrame()
+    for type_id in ['ls', 'pm']:
+        for rouge_type in [rouge1_str, rougel_str]:
+            selected_summaries = summaries[(summaries[type_str] == type_id) &
+                                           (summaries[rouge_type_str] == rouge_type)]
+            means = selected_summaries.groupby(interval_start_str)[evaluation_str].mean()
+            means.name = rouge_type + '_' + type_id
+            res = pd.concat([res, means], axis=1)
+    res = res.sort_index()
+
+    return res
+
+
+def sentences_complete(number_list, sentence_index_list):
+    """
+    Determines wether all sentence are contained which are needed for completion.
+
+    :param number_list: abstr_ or content_ list
+    :param sentence_index_list: all sentence indices of the summary
+    :return: true if all sentences are contained, false otherwise
+    """
+    if number_list == '':
+        return False
+    else:
+        numbers = number_list.split(' ')
+        for number in numbers:
+            if ',' in number:  # duplicate numbering, 'or'
+                a, b = number.split(',')
+                if a not in sentence_index_list and b not in sentence_index_list:
+                    return False
+            else:
+                if number not in sentence_index_list:
+                    return False
+    return True
+
+
+def get_cat_values(all_values):
+    """
+    Reads all evaluated categories and writes them to a set
+
+    :param all_values: raw evaluation
+    :return: set of all letters contained.
+    """
+    all_cats = set()
+    for cat in all_values:
+        if not pd.isna(cat):
+            for char in cat:
+                all_cats.add(char)
+    return all_cats
+
+
+def get_one_summary_evaluation(package):
+    """
+    Evaluates one summary.
+
+    :param package: my_info, my_sum_sents, sum_index
+    :return: sum_index, evaluation.
+    """
+    my_info, my_sum_sents, sum_index = package
+    # for each legal issue
+    result = 0
+    num_legal_issues = my_info.shape[0]
+    for issue in range(num_legal_issues):
+        content_list = my_info[my_info[issues_str] == str(issue)][content_complete_str].iloc[0]
+        sentence_list = my_sum_sents[sent_no_str].unique()
+        sentence_list = [str(v) for v in sentence_list]
+        cat_v_content_comp = sentences_complete(content_list, sentence_list)
+        all_categories = get_cat_values(my_sum_sents[issues_str + str(issue + 1)].unique())
+        if 'F' in all_categories:
+            result = -4
+        elif 'L' in all_categories:
+            result -= 2
+
+        if 'V' in all_categories or cat_v_content_comp:
+            result += 2
+        elif 'P' in all_categories or 'E' in all_categories:
+            result += 1
+
+        if 'S' in all_categories:  # as soon as there is one S, only one point
+            result += 1
+        elif 'E' in all_categories or 'G' in all_categories:  # if no S, but G or E, then 2 points
+            result += 2
+    return sum_index, result / num_legal_issues / 4  # divide with 4 for range
+
+
+def evaluate_all_summaries(info, sents, sums):
+    """
+    Coordinates calculation of all summary evaluations
+
+    :param info: infos to the summaries
+    :param sents: sentences of all summaries
+    :param sums: summarie overviews of all summaries
+    :return: sums with an appended column cointaining the evaluation
+    """
+    packaged_info = [(info[(info[utils.aktenzeichen_str] == row[utils.aktenzeichen_str]) &
+                           (info[type_str] == row[type_str])],
+                      sents[(sents[utils.aktenzeichen_str] == row[utils.aktenzeichen_str])
+                            & (sents[str(row[summary_id_str])] == True)], index)
+                     for index, row in sums.iterrows()]
+    res = utils.parallel_imap(get_one_summary_evaluation, packaged_args=packaged_info)
+    var = []
+    for content in res:
+        index, evaluation = content
+        var.append((index, evaluation))
+
+    idx, values = zip(*var)
+    evaluations = pd.Series(values, idx)
+    evaluations.name = evaluation_str
+    res_summaries = pd.concat([sums, evaluations], axis=1)
+    return res_summaries
+
+
+def get_category_overview_data(sentences, name):
+    """
+    Gets the data for plotting a bar plot of categories
+
+    :param sentences: sentences to include
+    :param name: name for the column
+    :return: dataframe containing prepared data
+    """
+    sent_count = sentences.shape[0]
+    evals = sentences.apply(pd.Series.value_counts).sum(axis=1)
+    res = {}
+    for i in evals.index:
+        for char in i:
+            if char != ' ':
+                vals = res.get(char)
+                if vals is None:
+                    vals = 0
+                vals += evals[i]
+                res[char] = vals
+    for key in res.keys():
+        res[key] = [res[key] / sent_count]
+    res_df = pd.DataFrame.from_dict(res).T.sort_index()
+    res_df.columns = [name]
+    return res_df
+
+
+def get_category_counts(sentences, name, sums, evaluation_cols):
+    """
+    Gets the data for plotting a bar plot of categories
+
+    :param sentences: sentences to include
+    :param name: name for the column
+    :param evaluation_cols: number of evaluation columns
+    :param sums: all summaries
+    :return: dataframe containing prepared data
+    """
+    summary_count = sums.shape[0]
+    sentences_all = pd.DataFrame()
+    for _, row in sentences.iterrows():
+        for _ in range(len(elaborated_sentence_splitting(row[sentence_str]))):
+            occurance_count = row[[str(x) for x in range(0, sums[summary_id_str].max() + 1)]].value_counts()[True]
+            for _ in range(occurance_count):
+                sentences_all = pd.concat([sentences_all, row], axis=1)
+    sentences_all = sentences_all.T[[issues_str + str(i) for i in range(1, evaluation_cols + 1)]]
+    sent_count = sentences_all.shape[0]
+    evals = sentences_all.apply(pd.Series.value_counts).sum(axis=1)
+    res = {}
+    for i in evals.index:
+        for char in i:
+            if char != ' ':
+                vals = res.get(char)
+                if vals is None:
+                    vals = 0
+                vals += evals[i]
+                res[char] = vals
+    for key in res.keys():
+        res[key] = [res[key] / sent_count]
+    res['avg number of sentences'] = [sent_count / summary_count]
+    res_df = pd.DataFrame.from_dict(res).T.sort_index()
+    res_df.columns = [name]
+    return res_df
+
+
+def draw_pics(sents, sums, info, run_two=False):
+    """
+    Draws the images for the visual evaluation.
+
+    :param sents: sentences to visualize
+    :param sums: summaries to visualize
+    :param info: corresponding info
+    :param run_two: if True, then files are written to dedicated directory for second run
+    """
+    my_picture_path = picture_path
+    if run_two:
+        my_picture_path += eval_run_two_path
+    utils.create_dir(current_path=current_dir, directory_name=my_picture_path, delete=False)
+
+    # sent_subset = sents[~sents[[issues_str+str(i) for i in range(1, int(info[issues_str].max())+2)]]
+    #                     .isin(['R', 'T']).any(axis=1)]
+    # sum_subset = pd.DataFrame()
+    # for _, row in sums.iterrows():
+    #     subset = sent_subset[(sent_subset[type_str] == row[type_str]) &
+    #                          (sent_subset[rouge_type_str] == row[rouge_type_str]) &
+    #                          (sent_subset[utils.aktenzeichen_str] == row[utils.aktenzeichen_str])
+    #                          & (sent_subset[str(row[summary_id_str])] == True)]
+    #     if subset.shape[0] > 0:
+    #         sum_subset = pd.concat([sum_subset, row], axis=1)
+    # sum_subset = sum_subset.T
+    # sents = sent_subset
+    # sums = sum_subset
+
+    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(11, 6))
+    for sum_type in ['ls', 'pm']:
+        for r_type in [rouge1_str, rougel_str]:
+            x_ticks = ['0.1 -\n0.2', '0.2 -\n0.3', '0.3 -\n0.4', '0.4 -\n0.5', '0.5 -\n0.6', '0.6 -\n0.7',
+                       '0.7 -\n0.8', '0.8 -\n0.9', '0.9 -\n1.0']
+            if sum_type == 'ls':
+                if r_type == rouge1_str:
+                    ax = ax1
+                    x_label = 'ROUGE-1'
+                    y_label = 'Guiding principles'
+                else:
+                    ax = ax2
+                    x_label = 'ROUGE-L'
+                    y_label = ''
+            else:
+                if r_type == rouge1_str:
+                    ax = ax3
+                    x_label = ''
+                    y_label = 'Press releases'
+                else:
+                    ax = ax4
+                    x_label = ''
+                    y_label = ''
+            x_ticks = [tick for tick in x_ticks if float(tick[:3]) in sums[(sums[type_str] == sum_type) &
+                                                                           (sums[rouge_type_str] ==
+                                                                            r_type)][interval_start_str].unique()]
+            sums[(sums[type_str] == sum_type) & (sums[rouge_type_str] == r_type)][
+                ['eval', interval_start_str]].plot(kind='box', ax=ax, by=interval_start_str,
+                                                   color=dict(boxes='black', whiskers='black',
+                                                              medians='black', caps='black'),
+                                                   ylabel=y_label)
+            ax.set_title(x_label)
+            ax.set_ylim(0, 1)
+            ax.set_xticklabels(x_ticks)
+
+    fig.savefig(my_picture_path + 'boxplots.png')
+    xticklabels = ['ROUGE-1\ngp', 'ROUGE-L\ngp', 'ROUGE-1\npr',
+                   'ROUGE-L\npr']
+    yticklabels = ['0.1-0.2', '0.2-0.3', '0.3-0.4', '0.4-0.5', '0.5-0.6', '0.6-0.7', '0.7-0.8', '0.8-0.9', '0.9-1.0']
+    fig, (ax, ax2) = plt.subplots(ncols=2, figsize=(12, 5))
+    mean_rankings = get_interval_mean_ranking(sums).fillna(0)
+    sns.heatmap(mean_rankings, annot=True, ax=ax, cmap='Greys', vmax=1, xticklabels=xticklabels,
+                yticklabels=yticklabels)
+
+    all_counts = get_interval_counts(sums).fillna(0)
+    sns.heatmap(all_counts, annot=True, ax=ax2, cmap='Greys_r', xticklabels=xticklabels, yticklabels=yticklabels)
+    ax2.tick_params(rotation=0)
+    ax.tick_params(rotation=0)
+    ax2.set_ylabel('Interval')
+    ax.set_ylabel('Interval')
+    fig.savefig(my_picture_path + 'heatmaps.png')
+
+    evaluations_cols = info[issues_str].astype(int).max() + 1
+    evals = sents[[issues_str + str(i) for i in range(1, evaluations_cols + 1)]]
+    res_df = get_category_overview_data(evals, 'all')
+    for sum_type in ['ls', 'pm']:
+        for r_type in [rouge1_str, rougel_str]:
+            evals = sents[(sents[rouge_type_str] == r_type) & (sents[type_str] == sum_type)]
+            evals = evals[[issues_str + str(i) for i in range(1, evaluations_cols + 1)]]
+            res_df = pd.concat([res_df, get_category_overview_data(evals, sum_type + ' ' + r_type)], axis=1)
+    fig = res_df.plot(kind='bar').get_figure()
+    fig.savefig(utils.server_path(current_path=current_dir, path=my_picture_path + 'cat_perc_types.png'))
+
+    evals = sents[[issues_str + str(i) for i in range(1, evaluations_cols + 1)]]
+    res_df_intervals = get_category_overview_data(evals, 'all')
+    for interval in sums[interval_start_str].unique():
+        selection_list = sums[sums[interval_start_str] == interval][[utils.aktenzeichen_str, summary_id_str]].values
+        evals = pd.DataFrame()
+        for aktenzeichen, sum_id in selection_list:
+            evals = pd.concat([evals, sents[(sents[str(sum_id)] == True) &
+                                            (sents[utils.aktenzeichen_str] == aktenzeichen)]])
+        evals = evals[[issues_str + str(i) for i in range(1, evaluations_cols + 1)]]
+        res_df_intervals = pd.concat([res_df_intervals, get_category_overview_data(evals, interval)], axis=1)
+    fig = res_df_intervals.plot(kind='bar').get_figure()
+    fig.savefig(utils.server_path(current_path=current_dir, path=my_picture_path + 'cat_perc_intervals.png'))
+
+    # table overview
+    for sum_type in ['ls', 'pm']:
+        for r_type in [rouge1_str, rougel_str]:
+            print(sum_type + ' ' + r_type)
+            my_sents = sents[(sents[type_str] == sum_type) & (sents[rouge_type_str] == r_type)]
+            my_sums = sums[(sums[type_str] == sum_type) & (sums[rouge_type_str] == r_type)]
+            table_data = pd.DataFrame()
+            for interval in my_sums[interval_start_str].unique():
+                my_current_sums = my_sums[my_sums[interval_start_str] == interval]
+                selection_list = my_current_sums[[utils.aktenzeichen_str, summary_id_str]].values
+                evals = pd.DataFrame()
+                for aktenzeichen, sum_id in selection_list:
+                    evals = pd.concat([evals, my_sents[(my_sents[str(sum_id)] == True) &
+                                                       (my_sents[utils.aktenzeichen_str] == aktenzeichen)]])
+                table_data = pd.concat([table_data, get_category_counts(evals, interval, my_current_sums,
+                                                                        evaluations_cols).T], axis=0)
+            print_data = pd.DataFrame()
+            print_data['R+T'] = table_data['R'] + table_data['T']
+            print_data['U'] = table_data['U']
+            print_data['Rest'] = 1 - (print_data['U'] + print_data['R+T'])
+            print_data['avg sentences'] = table_data['avg number of sentences']
+            print(print_data.to_string())
+
+
+def readjust_consecutive_sentences(sents, sums, info):
+    """
+    Inputs the additional consecutive sentences from the file
+
+    :param sents: all existing sentences
+    :param sums: all existing summaries
+    :param info: all existing infos
+    :return: the new sentences
+    """
+    res = pd.DataFrame()
+    comb_sents = pd.read_excel(evaluated_path + eval_run_two_path + 'AufeinanderfolgendeSätze.xlsx')
+    comb_sents.columns = [utils.aktenzeichen_str, type_str, sentence_str, evaluation_str,
+                          evaluation_str + '_new']
+
+    new_sent_indices = {}
+    for _, comb_row in comb_sents.iterrows():
+        sentences = elaborated_sentence_splitting(comb_row[sentence_str])
+        if len(sentences) != len(comb_row[evaluation_str].split()):
+            print('Wrong splitting!')
+            elaborated_sentence_splitting(comb_row[sentence_str])
+        r1_sents = sents[(sents[type_str] == comb_row[type_str]) &
+                         (sents[utils.aktenzeichen_str] == comb_row[utils.aktenzeichen_str].replace('/', '-')) &
+                         (sents[rouge_type_str] == rouge1_str) & sents[sentence_str].isin(sentences)]
+        rl_sents = sents[(sents[type_str] == comb_row[type_str]) &
+                         (sents[utils.aktenzeichen_str] == comb_row[utils.aktenzeichen_str].replace('/', '-')) &
+                         (sents[rouge_type_str] == rougel_str) & sents[sentence_str].isin(sentences)]
+        r1_sums = []
+        rl_sums = []
+
+        # find summaries containing the new sentence package
+        for col_name in [str(i) for i in range(1, sums[summary_id_str].max() + 1)]:
+            var_r1 = r1_sents[col_name].unique()
+            if len(var_r1) == 1 and var_r1[0] == True:
+                r1_sums.append(col_name)
+            var_rl = rl_sents[col_name].unique()
+            if len(var_rl) == 1 and var_rl[0] == True:
+                rl_sums.append(col_name)
+
+        if len(r1_sums) > 0:
+            new_index, new_sent_indices = get_new_sent_index(new_sent_indices, comb_row[utils.aktenzeichen_str], sents)
+            res = pd.concat([res, get_new_row(comb_row=comb_row, my_sents=r1_sents, r_string=rouge1_str,
+                                              sums=sums, sum_list=r1_sums, info=info, sent_no=new_index)])
+        if len(rl_sums) > 0:
+            new_index, new_sent_indices = get_new_sent_index(new_sent_indices, comb_row[utils.aktenzeichen_str], sents)
+            res = pd.concat([res, get_new_row(comb_row=comb_row, my_sents=rl_sents, r_string=rougel_str,
+                                              sums=sums, sum_list=rl_sums, info=info, sent_no=new_index)])
+        for index in r1_sents.index.values:
+            for sum_id in r1_sums:
+                sents.at[index, sum_id] = False
+        for index in rl_sents.index.values:
+            for sum_id in rl_sums:
+                sents.at[index, sum_id] = False
+
+    return pd.concat([sents, res])
+
+
+def get_new_sent_index(index_dict, aktenzeichen, sentences):
+    """
+    Gets a new number for a sentence to add that wasnt used for that aktenzeichen before
+
+    :param index_dict: dict with existing new indices
+    :param aktenzeichen: aktenzeichen to look for
+    :param sentences: sentences to find existing numbers in
+    :return: a new sentences numer to use
+    """
+    new_index = index_dict.get(aktenzeichen)
+    if new_index is None:
+        new_index = sentences[(sentences[utils.aktenzeichen_str] ==
+                               aktenzeichen.replace('/', '-'))][sent_no_str].max() + 1
+    index_dict[aktenzeichen] = new_index + 1
+    return new_index, index_dict
+
+
+def get_new_row(comb_row, my_sents, r_string, sums, sum_list, info, sent_no):
+    """
+    Creates a new sentence (package) row
+
+    :param comb_row: information concerning the sentence
+    :param my_sents: sentences appeasring in that sentence
+    :param r_string: rouge_string
+    :param sums: all existing summaries
+    :param sum_list: index list of summaries for that sentence
+    :param info: all infos
+    :param sent_no: sentence number for the row
+    :return: a row as dataframe
+    """
+    row_data = {type_str: [comb_row[type_str]], rouge_type_str: [r_string],
+                utils.aktenzeichen_str: [my_sents.iloc[0][utils.aktenzeichen_str]],
+                sentence_str: [comb_row[sentence_str]], sent_no_str: [sent_no]}
+    for col_name in [str(i) for i in range(sums[summary_id_str].max())]:
+        if col_name not in sum_list:
+            row_data[col_name] = [False]
+        else:
+            row_data[col_name] = [True]
+
+    # legal issue
+    legal_issues = my_sents[[issues_str + str(i + 1) for i in range(int(info[issues_str].max()) + 1)]].dropna(axis=1,
+                                                                                                              how='all')
+    for col in legal_issues.columns:
+        row_data[col] = comb_row[evaluation_str + '_new']
+        if 'P' in str(comb_row[evaluation_str + '_new']):
+            # completeness
+            old_sentences = info[(info[utils.aktenzeichen_str] == comb_row[utils.aktenzeichen_str].replace('/', '-')) &
+                                 (info[type_str] == comb_row[type_str]) &
+                                 (info[issues_str] == str(int(col[-1]) - 1))][content_complete_str].values[0].split(' ')
+            new_sentences = ''
+            or_string = ''
+            for sentence_no in old_sentences:
+                if sentence_no in [str(x) for x in my_sents[sent_no_str].values]:
+                    or_string += ' ' + sentence_no
+                    or_string = or_string.strip()
+                else:
+                    new_sentences += ' ' + sentence_no
+                    new_sentences = new_sentences.strip()
+            if or_string != '':
+                or_string = '(' + or_string.replace(' ', '-') + ',' + str(sent_no) + ')'
+                new_sentences += ' ' + or_string
+            else:
+                new_sentences += ' ' + str(sent_no)
+            new_sentences = new_sentences.strip()
+            info.at[info[(info[utils.aktenzeichen_str] == comb_row[utils.aktenzeichen_str].replace('/', '-')) &
+                         (info[type_str] == comb_row[type_str]) &
+                         (info[issues_str] == str(int(col[-1]) - 1))]
+                    [content_complete_str].index.values[0], content_complete_str] = new_sentences
+    return pd.DataFrame.from_dict(row_data)
+
+
+def remove_bad_cats(sums, sents, infos):
+    res_sums = pd.DataFrame()
+    res_sents = sents[~sents[[issues_str + str(i + 1) for i in range(int(infos[issues_str].max()) + 1)]].isin(
+        ['R', 'T', np.nan]).all(axis=1)]
+    for _, row in sums.iterrows():
+        my_sents = res_sents[(res_sents[utils.aktenzeichen_str] == row[utils.aktenzeichen_str]) &
+                             (res_sents[type_str] == row[type_str]) &
+                             (res_sents[rouge_type_str] == row[rouge_type_str]) &
+                             (res_sents[str(row[summary_id_str])] == True)]
+        if my_sents.shape[0] > 0:
+            summary = ' '.join(my_sents[sentence_str].values)
+            new_row = {type_str: [row[type_str]], utils.aktenzeichen_str: [row[utils.aktenzeichen_str]],
+                       original_str: [row[original_str]], summary_id_str: [row[summary_id_str]],
+                       rouge_type_str: [row[rouge_type_str]], summary_str: [summary]}
+            if row[rouge_type_str] == rouge1_str:
+                p, r, f = rouge.rouge_n(reference_summary=row[original_str], created_summary=summary, n=1,
+                                        pp_options=pp_options, extended_results=True)
+            else:
+                p, r, f = rouge.rouge_l(reference_summary=row[original_str], created_summary=summary,
+                                        pp_options=pp_options, extended_results=True)
+            new_row['rouge_precision'] = p
+            new_row['rouge_recall'] = r
+            new_row['rouge_f_measure'] = f
+            res_sums = pd.concat([res_sums, pd.DataFrame.from_dict(new_row)], ignore_index=True)
+
+    return res_sents, res_sums
+
+
+def evaluate_summaries_run_two():
+    """
+    Coordinates evaluation of the summaries from the sentences evaluated
+    """
+    all_summaries, all_summary_sentences = read_or_load_summaries_run_two()
+    evaluated_sentences, evaluated_infos = get_evaluated_sentences(run_two=True)
+    final_summaries = all_summaries[all_summaries[utils.aktenzeichen_str].isin(evaluated_infos[utils.aktenzeichen_str])]
+    final_sentences = all_summary_sentences.merge(evaluated_sentences, how='inner',
+                                                  on=[utils.aktenzeichen_str, type_str, sentence_str])
+
+    # final_sentences, final_summaries = remove_bad_cats(sents=final_sentences,
+    # sums=final_summaries, infos=evaluated_infos)
+    final_summaries = evaluate_all_summaries(evaluated_infos, final_sentences, final_summaries)
+    final_summaries[interval_start_str] = final_summaries.apply(
+        lambda row: (np.ceil(row['rouge_recall'] * 10) - 1) / 10
+        if row[rouge_type_str] == rouge1_str
+        else (np.ceil(row['rouge_f_measure'] * 10) - 1) / 10, axis=1)
+    final_summaries = final_summaries.drop_duplicates([utils.aktenzeichen_str, type_str, rouge_type_str,
+                                                       interval_start_str])
+    final_summaries = final_summaries[final_summaries[interval_start_str] > 0.0]
+    draw_pics(sents=final_sentences, sums=final_summaries, info=evaluated_infos, run_two=True)
+
+
+def evaluate_summaries():
+    """
+    Coordinates evaluation of the summaries from the sentences evaluated
+    """
+    all_summaries, all_summary_sentences = read_or_load_summaries()
+    evaluated_sentences, evaluated_infos = get_evaluated_sentences()
+    final_sentences = all_summary_sentences.merge(evaluated_sentences, how='inner',
+                                                  on=[utils.aktenzeichen_str, type_str, sentence_str])
+    final_summaries = all_summaries[all_summaries[utils.aktenzeichen_str].isin(evaluated_infos[utils.aktenzeichen_str])]
+
+    # final_sentences, final_summaries = remove_bad_cats(sents=final_sentences, sums=final_summaries,
+    #                                                  infos=evaluated_infos)
+    final_summaries = evaluate_all_summaries(evaluated_infos, final_sentences, final_summaries)
+    final_summaries[interval_start_str] = final_summaries.apply(
+        lambda row: (np.ceil(row['rouge_recall'] * 10) - 1) / 10
+        if row[rouge_type_str] == rouge1_str
+        else (np.ceil(row['rouge_f_measure'] * 10) - 1) / 10, axis=1)
+    final_summaries = final_summaries.drop_duplicates([utils.aktenzeichen_str, type_str, rouge_type_str,
+                                                       interval_start_str])
+    final_summaries = final_summaries[final_summaries[interval_start_str] > 0.0]
+    draw_pics(sents=final_sentences, sums=final_summaries, info=evaluated_infos)
+
+
+def combine_modus(list_of_string):
+    """
+    For the input list of consecutive strings, konjunktiv sentences are combined with all following sentences until
+    there is an indicative sentence
+
+    :param list_of_string: strings for comibning
+    :return: updated list
+    """
+    result_strings = []
+
+    current_string = ''
+    for string in list_of_string:
+        nlp_string = settings.nlp(string)
+        ind = False
+        sub = False
+        verb_mod_tag_list = ['VVFIN', 'VAFIN', 'VMFIN', 'VVIMP', 'VAIMP']
+        for token in nlp_string:
+            if token.tag_ in verb_mod_tag_list:
+                mood = token.morph.get('Mood')
+                if 'Ind' in mood:
+                    if token.head.tag_ in verb_mod_tag_list:
+                        head_mood = token.head.morph.get('Mood')
+                        if 'Sub' not in head_mood:
+                            ind = True
+                    else:
+                        ind = True
+                if 'Sub' in mood:
+                    sub = True
+        current_string += ' ' + string
+        current_string = current_string.strip()
+        if ind or not sub:
+            result_strings.append(current_string)
+            current_string = ''
+
+    return result_strings
+
+
+def get_index_in_list(sentence, string_list):
+    """
+    Returns index of a string in a list with some leniance.
+
+    :param sentence: sentence to find
+    :param string_list: list to look in
+    :return: index or None, if nothing was found
+    """
+    try:
+        return string_list.index(sentence)
+    except ValueError:
+        short_length = 200
+        for i in range(100):
+            result_list = [string_list.index(l_item) for l_item in string_list
+                           if sentence[:min(100, len(sentence))] in l_item]
+            if len(result_list) == 1:
+                return result_list[0]
+            if len(result_list) == 0:
+                short_length -= 10
+            if len(result_list) > 1:
+                short_length += 50
+        return None
+
+
+def readjust_splitting(old_list):
+    """
+    Some sentences might not be split correctly
+
+    :param old_list: original split sentences
+    :return: new split sentences
+    """
+    res = []
+    for string in old_list:
+        if 'InfoV. ' in string:
+            split_list = string.split('InfoV. ')
+            for i in range(len(split_list) - 1):
+                split_list[i] = split_list[i] + 'InfoV.'
+            res += split_list
+        else:
+            res.append(string)
+    return res
+
+
+if __name__ == "__main__":
+    # pm_judgments = get_evaluation_data(case_list=cases_one_list)
+    # data = [(a, b, False) for (a, b) in pm_judgments]
+    # utils.parallel_imap(write_files_for_one_judgement, data)
+    # evaluate_summaries()
+
+    # pm_judgments = get_evaluation_data(case_list=cases_two_list)
+    # print_following_sentences()
+    # data = [(a, b, c, True) for (a, b, c) in pm_judgments]
+    evaluate_summaries_run_two()
+    print('Done')
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_113-18.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_113-18.xlsx
new file mode 100644
index 0000000..dcf27e2
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_113-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_25-20.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_25-20.xlsx
new file mode 100644
index 0000000..35e4097
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_25-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_292-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_292-17.xlsx
new file mode 100644
index 0000000..8dd36bd
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_292-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_35-18.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_35-18.xlsx
new file mode 100644
index 0000000..cdb6204
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_35-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_391-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_391-17.xlsx
new file mode 100644
index 0000000..26551d4
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_391-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_42-19.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_42-19.xlsx
new file mode 100644
index 0000000..df65db2
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_42-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_55-19.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_55-19.xlsx
new file mode 100644
index 0000000..379b85f
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_55-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_67-18.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_67-18.xlsx
new file mode 100644
index 0000000..7951f40
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_67-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_79-21.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_79-21.xlsx
new file mode 100644
index 0000000..daf1b10
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/III_ZR_79-21.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/II_ZR_152-20.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/II_ZR_152-20.xlsx
new file mode 100644
index 0000000..fd5d683
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/II_ZR_152-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/II_ZR_84-20.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/II_ZR_84-20.xlsx
new file mode 100644
index 0000000..08a7728
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/II_ZR_84-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/IV_ZR_144-21.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/IV_ZR_144-21.xlsx
new file mode 100644
index 0000000..7f2665a
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/IV_ZR_144-21.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/IV_ZR_253-20.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/IV_ZR_253-20.xlsx
new file mode 100644
index 0000000..0280506
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/IV_ZR_253-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_139-15.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_139-15.xlsx
new file mode 100644
index 0000000..692e522
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_139-15.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_146-20.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_146-20.xlsx
new file mode 100644
index 0000000..9f8e3d0
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_146-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_153-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_153-17.xlsx
new file mode 100644
index 0000000..d6b9c02
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_153-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_23-18.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_23-18.xlsx
new file mode 100644
index 0000000..135de89
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/I_ZR_23-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_21-19.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_21-19.xlsx
new file mode 100644
index 0000000..1eae348
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_21-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_277-16.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_277-16.xlsx
new file mode 100644
index 0000000..52a315c
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_277-16.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_66-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_66-17.xlsx
new file mode 100644
index 0000000..a37e1b2
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_66-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_94-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_94-17.xlsx
new file mode 100644
index 0000000..5c19665
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VIII_ZR_94-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_151-18.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_151-18.xlsx
new file mode 100644
index 0000000..774a8ff
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_151-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_192-20.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_192-20.xlsx
new file mode 100644
index 0000000..a6e6bc4
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_192-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_78-20.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_78-20.xlsx
new file mode 100644
index 0000000..4d9b92a
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VII_ZR_78-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_128-20.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_128-20.xlsx
new file mode 100644
index 0000000..6e26093
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_128-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_252-19.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_252-19.xlsx
new file mode 100644
index 0000000..c8a3013
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_252-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_506-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_506-17.xlsx
new file mode 100644
index 0000000..51aeefc
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VI_ZR_506-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/VIa_ZR_418-21.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/VIa_ZR_418-21.xlsx
new file mode 100644
index 0000000..9b49871
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/VIa_ZR_418-21.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_112-18.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_112-18.xlsx
new file mode 100644
index 0000000..4a014cb
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_112-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_176-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_176-17.xlsx
new file mode 100644
index 0000000..b13de77
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_176-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_218-18.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_218-18.xlsx
new file mode 100644
index 0000000..a28f3b8
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_218-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_254-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_254-17.xlsx
new file mode 100644
index 0000000..656ea75
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_254-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_273-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_273-17.xlsx
new file mode 100644
index 0000000..d6ae081
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_273-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_299-19.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_299-19.xlsx
new file mode 100644
index 0000000..c202d38
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_299-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_8-19.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_8-19.xlsx
new file mode 100644
index 0000000..f2236e7
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/V_ZR_8-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/XII_ZR_13-19.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/XII_ZR_13-19.xlsx
new file mode 100644
index 0000000..1a61d1a
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/XII_ZR_13-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/XI_ZR_345-18.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/XI_ZR_345-18.xlsx
new file mode 100644
index 0000000..b65241d
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/XI_ZR_345-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/XI_ZR_7-19.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/XI_ZR_7-19.xlsx
new file mode 100644
index 0000000..f9dcfe0
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/XI_ZR_7-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/X_ZR_107-16.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/X_ZR_107-16.xlsx
new file mode 100644
index 0000000..3c64b7c
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/X_ZR_107-16.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/extractive_judgments/X_ZR_96-17.xlsx b/rouge_evalauation/evaluated_data/extractive_judgments/X_ZR_96-17.xlsx
new file mode 100644
index 0000000..62fbddb
Binary files /dev/null and b/rouge_evalauation/evaluated_data/extractive_judgments/X_ZR_96-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/I ZR 146-20.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/I ZR 146-20.xlsx
new file mode 100644
index 0000000..1331532
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/I ZR 146-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/I ZR 153-17.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/I ZR 153-17.xlsx
new file mode 100644
index 0000000..aa268f4
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/I ZR 153-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/II ZR 152-20.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/II ZR 152-20.xlsx
new file mode 100644
index 0000000..2ddff83
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/II ZR 152-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/II ZR 84-20.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/II ZR 84-20.xlsx
new file mode 100644
index 0000000..dfb10f6
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/II ZR 84-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/III ZR 25-20.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/III ZR 25-20.xlsx
new file mode 100644
index 0000000..11c5b50
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/III ZR 25-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/III ZR 79-21.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/III ZR 79-21.xlsx
new file mode 100644
index 0000000..08a32ad
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/III ZR 79-21.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/IV ZR 144-21.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/IV ZR 144-21.xlsx
new file mode 100644
index 0000000..90e8c16
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/IV ZR 144-21.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/IV ZR 253-20.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/IV ZR 253-20.xlsx
new file mode 100644
index 0000000..589956e
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/IV ZR 253-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/V ZR 299-19.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/V ZR 299-19.xlsx
new file mode 100644
index 0000000..482f717
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/V ZR 299-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/V ZR 8-19.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/V ZR 8-19.xlsx
new file mode 100644
index 0000000..8674bb7
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/V ZR 8-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VI ZR 128-20.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VI ZR 128-20.xlsx
new file mode 100644
index 0000000..f5e6de6
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VI ZR 128-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VI ZR 252-19.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VI ZR 252-19.xlsx
new file mode 100644
index 0000000..e4cfe6a
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VI ZR 252-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VII ZR 192-20.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VII ZR 192-20.xlsx
new file mode 100644
index 0000000..fabe9de
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VII ZR 192-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VII ZR 78-20.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VII ZR 78-20.xlsx
new file mode 100644
index 0000000..e28c8c6
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VII ZR 78-20.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIII ZR 21-19.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIII ZR 21-19.xlsx
new file mode 100644
index 0000000..a63a49f
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIII ZR 21-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIII ZR 66-17.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIII ZR 66-17.xlsx
new file mode 100644
index 0000000..dfde839
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIII ZR 66-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIa ZR 418-21.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIa ZR 418-21.xlsx
new file mode 100644
index 0000000..c10fd45
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/VIa ZR 418-21.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/X ZR 107-16.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/X ZR 107-16.xlsx
new file mode 100644
index 0000000..2826344
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/X ZR 107-16.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/XI ZR 345-18.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/XI ZR 345-18.xlsx
new file mode 100644
index 0000000..c77e3f5
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/XI ZR 345-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/second_eval/summary_sentences/XI ZR 7-19.xlsx b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/XI ZR 7-19.xlsx
new file mode 100644
index 0000000..4c3ef0a
Binary files /dev/null and b/rouge_evalauation/evaluated_data/second_eval/summary_sentences/XI ZR 7-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/I ZR 139-15.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/I ZR 139-15.xlsx
new file mode 100644
index 0000000..9a42bf0
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/I ZR 139-15.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/I ZR 23-18.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/I ZR 23-18.xlsx
new file mode 100644
index 0000000..fde1ae4
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/I ZR 23-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/III ZR 113-18.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 113-18.xlsx
new file mode 100644
index 0000000..59a978c
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 113-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/III ZR 292-17.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 292-17.xlsx
new file mode 100644
index 0000000..918b89e
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 292-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/III ZR 35-18.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 35-18.xlsx
new file mode 100644
index 0000000..d676d29
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 35-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/III ZR 391-17.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 391-17.xlsx
new file mode 100644
index 0000000..a535a94
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 391-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/III ZR 42-19.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 42-19.xlsx
new file mode 100644
index 0000000..0099a33
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 42-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/III ZR 55-19.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 55-19.xlsx
new file mode 100644
index 0000000..ccc4a67
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 55-19.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/III ZR 67-18.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 67-18.xlsx
new file mode 100644
index 0000000..9b092e4
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/III ZR 67-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/V ZR 112-18.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 112-18.xlsx
new file mode 100644
index 0000000..fd575c6
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 112-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/V ZR 176-17.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 176-17.xlsx
new file mode 100644
index 0000000..06c89f4
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 176-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/V ZR 218-18.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 218-18.xlsx
new file mode 100644
index 0000000..3c7fdd2
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 218-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/V ZR 254-17.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 254-17.xlsx
new file mode 100644
index 0000000..57be3ab
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 254-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/V ZR 273-17.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 273-17.xlsx
new file mode 100644
index 0000000..1acc22e
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/V ZR 273-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/VI ZR 506-17.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/VI ZR 506-17.xlsx
new file mode 100644
index 0000000..94a3e00
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/VI ZR 506-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/VII ZR 151-18.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/VII ZR 151-18.xlsx
new file mode 100644
index 0000000..af9d165
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/VII ZR 151-18.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/VIII ZR 277-16.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/VIII ZR 277-16.xlsx
new file mode 100644
index 0000000..c3d5aab
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/VIII ZR 277-16.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/VIII ZR 94-17.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/VIII ZR 94-17.xlsx
new file mode 100644
index 0000000..113418f
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/VIII ZR 94-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/X ZR 96-17.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/X ZR 96-17.xlsx
new file mode 100644
index 0000000..e20e6d3
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/X ZR 96-17.xlsx differ
diff --git a/rouge_evalauation/evaluated_data/summary_sentences/XII ZR 13-19.xlsx b/rouge_evalauation/evaluated_data/summary_sentences/XII ZR 13-19.xlsx
new file mode 100644
index 0000000..72af682
Binary files /dev/null and b/rouge_evalauation/evaluated_data/summary_sentences/XII ZR 13-19.xlsx differ
diff --git a/settings.py b/settings.py
new file mode 100644
index 0000000..1c60355
--- /dev/null
+++ b/settings.py
@@ -0,0 +1,12 @@
+# creating BGH data
+import spacy
+
+remove_brackets = False
+server = False
+
+no_brackets_suffix = "no_br_"
+nlp = spacy.load("de_core_news_sm")  # small one
+# nlp = spacy.load("de_dep_news_trf")  # big one, CUDA Problems on Server...
+
+
+
diff --git a/test/test_rouge.py b/test/test_rouge.py
new file mode 100644
index 0000000..00d569b
--- /dev/null
+++ b/test/test_rouge.py
@@ -0,0 +1,192 @@
+from unittest import TestCase
+
+import rouge
+import utils
+
+
+def get_file_data(filename):
+    text_file = open(filename, "r", encoding='utf-8')
+    data = text_file.read()
+    text_file.close()
+    return data
+
+
+def run_tests_with_data(self, test_data):
+    for original, generated_one, generated_two, description in test_data:
+        print(description)
+        print('original: ' + original)
+        rouge_one = rouge.rouge_n(original, generated_one, n=1)
+        rouge_two = rouge.rouge_n(original, generated_two, n=1)
+        print('Rouge for one: ' + str(rouge_one) + ' ' + generated_one)
+        print('Rouge for two: ' + str(rouge_two) + ' ' + generated_two)
+        combined = generated_two + ' ' + generated_one
+        combined_rouge = rouge.rouge_n(original, combined, n=1)
+        print('Rouge for combined: ' + str(combined_rouge) + ' ' + combined)
+        self.assertGreaterEqual(combined_rouge, rouge_one)
+        self.assertGreaterEqual(combined_rouge, rouge_two)
+
+
+class RougeTest(TestCase):
+
+    def tests_from_paper(self):
+        s1 = 'police killed the gunman'
+        s2 = 'police kill the gunman'
+        s3 = 'the gunman kill police'
+        score_s2 = rouge.rouge_l(s1, s2)
+        self.assertEqual(score_s2, 0.75)
+        score_s3 = rouge.rouge_l(s1, s3)
+        self.assertEqual(score_s3, 0.5)
+
+        reference = 'affe birne club düne essen'
+        summary = 'affe birne feder geld himmel. affe club insel jagd essen.'
+        lcs = 4
+        p = lcs / 12
+        r = lcs / 5
+        f = 2*(r*p)/(r+p)
+        r_p, r_r, r_f = rouge.rouge_l(reference_summary=reference, created_summary=summary,  pp_options=[utils.pp_option_stopwords],
+                                      extended_results=True)
+        self.assertEqual(r_r, r)
+        self.assertEqual(r_p, p)
+        self.assertEqual(r_f, f)
+
+        score_equal = rouge.rouge_l(summary, summary)
+        self.assertEqual(score_equal, 1)
+
+    def test_one(self):
+        original = 'Für die Frage, ob alle in Art. 6 Abs. 1 der Richtlinie 2011/83/EU genannten Informationen objektiv in einem Werbemittel dargestellt werden können, ist erheblich, welchen Anteil diese Informationen am verfügbaren Raum des vom Unternehmer ausgewählten Werbeträgers einnehmen würden; die Werbebotschaft muss gegenüber den Verbraucherinformationen nicht zurücktreten.'
+        sent_1 = '(1) Zwar ist für die nach der Vorabentscheidung des Gerichtshofs der Europäischen Union maßgebliche Frage, ob alle in Art. 6 Abs. 1 der Richtlinie 2011/83/EU genannten Informationen objektiv in einem Werbemittel dargestellt werden können, erheblich, welchen Anteil diese Informationen am verfügbaren Raum des vom Unternehmer ausgewählten Werbeträgers einnehmen würden.'
+        sent_2 = 'Aus der Anforderung, die Informationen objektiv in der Werbebotschaft darstellen zu können, ist zu schließen, dass die Werbebotschaft gegenüber den Verbraucherinformationen nicht zurücktreten muss.'
+        rouge_v1 = rouge.rouge_n(original, sent_1, 1, pp_options=[utils.pp_option_stopwords])
+        rouge_v2 = rouge.rouge_n(original, sent_1 + ' ' + sent_2, 1, pp_options=[utils.pp_option_stopwords])
+        self.assertGreater(rouge_v2, rouge_v1)
+
+    def test_one_match(self):
+        original = 'a b c d e.'
+        score = rouge.rouge_n(original, 'd.', n=1)
+        self.assertGreater(score, 0)
+
+    def test_extension(self):
+        original_short = 'a b c d e.'
+        original_medi = 'a b c d e f g h i j k l m n o.'
+        original_long = 'a b c d e f g h i j k l m n o p q r s t u v w x y z.'
+        test_data = [[original_short, 'a b.', 'a b d.', 'small extension short sentence'],
+                     [original_short, 'a.', 'a b c d.', 'large extension short sentence'],
+                     [original_medi, 'a b c d e f g h i.', 'a b c d e f g h i j.', 'small extension medi sentence'],
+                     [original_medi, 'a b c d e f g h i.', 'a b c d e f g h i m n o l.',
+                      'large extension medi sentence'],
+                     [original_long, 'a b c d e f g h i j k l m n o p q r s t u v.',
+                      'a b c d e f g h i j k l m n o p q r s t u v w.', 'small extension long sentence'],
+                     [original_long, 'a b c d e f g h i j k.',
+                      'a b c d e f g h i j k l m n o p q r s t u v w.', 'large extension long sentence'],
+                     ]
+        print('Test extensions')
+        run_tests_with_data(self, test_data)
+
+    def test_differing(self):
+        original_short = 'a b c d e.'
+        original_medi = 'a b c d e f g h i j k l m n o.'
+        original_long = 'a b c d e f g h i j k l m n o p q r s t u v w x y z.'
+
+        test_data = [[original_short, 'a b c.', 'a b d.', 'small difference short sentence'],
+                     [original_short, 'a e.', 'a b c.', 'large difference short sentence'],
+                     [original_medi, 'a b c d e f g h i.', 'a b c d e f g h j.', 'small difference medi sentence'],
+                     [original_medi, 'a b c d e f g h i.', 'a b c d j k l m.',
+                      'large difference medi sentence'],
+                     [original_long, 'a b c d e f g h i j k l m n o p q r s t u v.',
+                      'a b c d e f g h i j k l m n o p q r s t u w.', 'small difference long sentence'],
+                     [original_long, 'a b c d e f g h i j k.',
+                      'a b l m n o p q r s t u v.', 'large difference long sentence'],
+                     ]
+        print('Test differences')
+        run_tests_with_data(self, test_data)
+
+    def test_rougel_high_precision_or_recall(self):
+        gold = 'Boot.'
+        created = 'Boot. Boot.'
+        r_p, r_r, r_f = rouge.rouge_l(created_summary=created, reference_summary=gold, extended_results=True,
+                                      pp_options=[utils.pp_option_stopwords, utils.pp_option_lemmatize])
+        self.assertEqual(r_p, 1/2)
+        self.assertEqual(r_r, 1)
+        self.assertEqual(r_f, 2/3)
+
+        gold = 'Affe Boot. Boot Club.'
+        created = 'Boot.'
+        r_p, r_r, r_f = rouge.rouge_l(created_summary=created, reference_summary=gold, extended_results=True,
+                                      pp_options=[utils.pp_option_stopwords, utils.pp_option_lemmatize])
+        self.assertEqual(r_p, 1)
+        self.assertEqual(r_r, 2/6)
+        self.assertEqual(r_f, 1/2)
+
+
+        gold = 'Im Rahmen der bei Prüfung der Schutzschranke der Berichterstattung über Tagesereignisse gemäß § 50 ' \
+               'UrhG vorzunehmenden Grundrechtsabwägung ist im Falle der Veröffentlichung eines bislang ' \
+               'unveröffentlichten Werks auch das vom Urheberpersönlichkeitsrecht geschützte Interesse an einer ' \
+               'Geheimhaltung des Werks zu berücksichtigen. Dieses schützt das urheberrechtsspezifische Interesse des ' \
+               'Urhebers, darüber zu bestimmen, ob er mit der erstmaligen Veröffentlichung den Schritt von der ' \
+               'Privatsphäre in die Öffentlichkeit tut und sich und sein Werk damit der öffentlichen Kenntnisnahme ' \
+               'und Kritik aussetzt. Nicht zu berücksichtigen ist bei dieser Abwägung dagegen das Interesse an der ' \
+               'Geheimhaltung von Umständen, deren Offenlegung Nachteile für die Interessen des Staates und seiner ' \
+               'Einrichtungen haben könnten. Dieses Interesse ist nicht durch das Urheberpersönlichkeitsrecht, ' \
+               'sondern durch andere Vorschriften - etwa das Sicherheitsüberprüfungsgesetz, § 3 Nr. 1 Buchst. b IFG ' \
+               'und die strafrechtlichen Bestimmungen gegen Landesverrat und die Gefährdung der äußeren Sicherheit ' \
+               'gemäß §§ 93 ff. StGB - geschützt. '
+        created = 'Dieses Interesse ist vielmehr durch die allgemeinen Vorschriften - etwa das ' \
+                  'Sicherheitsüberprüfungsgesetz, § 3 Nr. 1 Buchst. b IFG und die strafrechtlichen Bestimmungen gegen ' \
+                  'Landesverrat und die Gefährdung der äußeren Sicherheit gemäß §§ 93 ff. '
+        r_p, r_r, r_f = rouge.rouge_l(created_summary=created, reference_summary=gold, extended_results=True,
+                                      pp_options=[utils.pp_option_stopwords, utils.pp_option_lemmatize])
+        self.assertLessEqual(r_p, 1)
+
+        gold = 'Der Eigentümer eines Grundstücks ist hinsichtlich der von einem darauf befindlichen Baum (hier: ' \
+               'Birken) ausgehenden natürlichen Immissionen auf benachbarte Grundstücke Störer i.S.d. § 1004 Abs. 1 ' \
+               'BGB, wenn er sein Grundstück nicht ordnungsgemäß bewirtschaftet. Hieran fehlt es in aller Regel, ' \
+               'wenn die für die Anpflanzung bestehenden landesrechtlichen Abstandsregelungen eingehalten sind. 1b. ' \
+               'Ein Anspruch auf Beseitigung des Baums lässt sich in diesem Fall regelmäßig auch nicht aus dem ' \
+               'nachbarlichen Gemeinschaftsverhältnis herleiten. Hält der Grundstückseigentümer die für die ' \
+               'Anpflanzung bestehenden landesrechtlichen Abstandsregelungen ein, hat der Eigentümer des ' \
+               'Nachbargrundstücks wegen der Beeinträchtigungen durch die von den Anpflanzungen ausgehenden ' \
+               'natürlichen Immissionen weder einen Ausgleichsanspruch gemäß § 906 Abs. 2 Satz 2 BGB in unmittelbarer ' \
+               'Anwendung noch einen nachbarrechtlichen Ausgleichsanspruch gemäß § 906 Abs. 2 Satz 2 analog (' \
+               'Abgrenzung zu Senat, Urteil vom 27. Oktober 2017 - V ZR 8/17, ZfIR 2018, 190). '
+        created = "Für die Entscheidung des Meinungsstreits ist von dem oben dargelegten Grundsatz auszugehen, " \
+                  "dass der Eigentümer eines Grundstücks hinsichtlich der von einem darauf befindlichen Baum " \
+                  "ausgehenden natürlichen Immissionen auf benachbarte Grundstücke Störer i.S.d. § 1004 Abs. 1 BGB " \
+                  "ist, wenn er sein Grundstück nicht ordnungsgemäß bewirtschaftet. Hält der Grundstückseigentümer " \
+                  "die für die Anpflanzung bestehenden landes-rechtlichen Abstandsregelungen ein, hat der Eigentümer " \
+                  "des Nachbargrund-stücks wegen der Beeinträchtigungen durch die von den Anpflanzungen ausgehenden " \
+                  "natürlichen Immissionen weder einen Ausgleichsanspruch gemäß § 906 Abs. 2 Satz 2 BGB in " \
+                  "unmittelbarer Anwendung noch einen nachbarrechtlichen Ausgleichsanspruch gemäß § 906 Abs. 2 Satz 2 " \
+                  "analog. Sind die für die Anpflanzung bestehenden landesrechtlichen Abstandsregelungen eingehalten, " \
+                  "lässt sich ein Anspruch auf Beseitigung der Bäume in aller Regel - und so auch hier - nicht aus " \
+                  "dem nachbarlichen Gemeinschaftsverhältnis herleiten. Gemäß § 907 Abs. 2 BGB gehören aber Bäume und " \
+                  "Sträucher nicht zu den Anlagen i.S.d. § 907 Abs. 1 BGB. Ob den Grundstückseigentümer für " \
+                  "natürliche Immissionen eine „Sicherungspflicht“ trifft und er damit Störer i.S.d. § 1004 Abs. 1 " \
+                  "BGB ist, ist jeweils anhand der Umstände des Einzelfalls zu prüfen. Rechtsfehlerhaft ist jedoch " \
+                  "die Auffassung des Berufungsgerichts, der Beklagte sei als Störer i.S.d. § 1004 Abs. 1 BGB für die " \
+                  "von den Birken ausgehenden Immissionen auf das Grundstück des Klägers verantwortlich. In diesem " \
+                  "Fall ist er regelmäßig schon nicht Störer, so dass es bereits an einem Beseitigungsanspruch gemäß " \
+                  "§ 1004 Abs. 1 BGB fehlt und der von dem Berufungsgericht beschriebene Konflikt zwischen den Regeln " \
+                  "des Bürgerlichen Gesetzbuchs und den landesrechtlichen Vorschriften nicht besteht. Voraussetzung " \
+                  "hierfür ist jedoch, dass der in Anspruch genommene Grundstückseigentümer für die " \
+                  "Eigentumsbeeinträchtigung verantwortlich und damit Störer i.S.d. § 1004 Abs. 1 BGB ist. "
+
+        r_p, r_r, r_f = rouge.rouge_l(created_summary=created, reference_summary=gold, extended_results=True,
+                                      pp_options=[utils.pp_option_stopwords, utils.pp_option_lemmatize])
+        self.assertLessEqual(r_p, 1)
+        self.assertLessEqual(r_r, 1)
+
+    def test_specific(self):
+        gold = 'Diese Voraussetzungen hat der XII. Zivilsenat für den vorliegenden Fall bejaht.'
+        created = 'Dies ist insbesondere der Fall, wenn die Sanktion außer Verhältnis zum Gewicht des Vertragsverstoßes und den Folgen für den Schuldner der Vertragsstrafe steht.'
+        created_2 = 'Deren Untergrenze ist mit 30 € angegeben.'
+        r_p, r_r, r_f = rouge.rouge_l(created_summary=created, reference_summary=gold, extended_results=True,
+                                      pp_options=[utils.pp_option_stopwords, utils.pp_option_lemmatize])
+        r_p_2, r_r_2, r_f_2 = rouge.rouge_l(created_summary=created_2, reference_summary=gold, extended_results=True,
+                                      pp_options=[utils.pp_option_stopwords, utils.pp_option_lemmatize])
+
+        r_p_c, r_r_c, r_f_c = rouge.rouge_l(created_summary=created+ ' '+created_2, reference_summary=gold, extended_results=True,
+                                      pp_options=[utils.pp_option_stopwords, utils.pp_option_lemmatize])
+
+        self.assertGreater(r_f, r_f_2)
+        self.assertGreater(r_f_c, r_f)
+
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..1b2deb8
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,475 @@
+import json
+import multiprocessing
+import os
+import re
+import shutil
+
+import pandas as pd
+
+import settings
+
+pool_processes = 8
+pool_maxtask = 10
+pool_chunksize = 30
+leitsatz_str = 'leitsatz'
+tenor_str = 'tenor'
+tatbestand_str = 'tatbestand'
+entscheidungsgruende_str = 'entscheidungsgruende'
+aktenzeichen_str = 'aktenzeichen'
+rii_text_columns = [leitsatz_str, tenor_str, tatbestand_str, entscheidungsgruende_str]
+sentence_marks = ['.', ',', ';', '!', '?']
+pp_option_lemmatize = 'preprocessing: lemmatize the text'
+pp_option_stopwords = 'preprocessing: remove stopwords'
+pp_option_case_normalize = 'preprocessing: normalize cases / put to lower'
+pp_option_remove_qout_marks_sing = 'preprocessing: remove qoutation marks around single words'
+no_stopword_list = ['nicht', 'kein']
+entsch_gr_start_sentences = ['II.', 'B.', 'B']
+
+
+def server_path(current_path, path):
+    """
+    Method to add path in case it is run on server.
+
+    :param current_path: Path to add when run on server
+    :param path: Path for local
+    :return: Final path for local or server
+    """
+    if settings.server:
+        path = current_path + '/' + path
+    return path
+
+
+def open_file(current_path, path, modes, encoding=None, newline=None):
+    """
+    Wraps the builtin open function to adjust to server settings
+
+    :param current_path: path of the calling file to adjust for server (without /)
+    :param path: Path for file loading relative to calling file
+    :param modes: Modes to apply
+    :param newline: newline option of the original method, if None nothing will be passed
+    :param encoding: encoding option of the original method, if None nothing will be passed
+    :return: the opened file
+    """
+    if encoding is not None:
+        return open(server_path(current_path=current_path, path=path), modes, encoding=encoding)
+    if newline is not None:
+        return open(server_path(current_path=current_path, path=path), modes, newline=newline)
+    if newline is not None and encoding is not None:
+        return open(server_path(current_path=current_path, path=path), modes, encoding=encoding, newline=newline)
+    return open(server_path(current_path=current_path, path=path), modes)
+
+
+def file_exists(current_path, path):
+    """
+    Wraps the builtin exists function to adjust to server settings
+
+    :param current_path: path of the calling file to adjust for server (without /)
+    :param path: Path for file loading relative to calling file
+    :return: True if the file exists
+    """
+    return os.path.exists(server_path(current_path=current_path, path=path))
+
+
+def list_dir_files(current_path, path):
+    """
+    Wraps the builtin os.listdir function to adjust to server settings
+
+    :param current_path: path of the calling file to adjust for server (without /)
+    :param path: Path for file loading relative to calling file
+    :return: The filenames of the directory
+    """
+    return os.listdir(server_path(current_path=current_path, path=path))
+
+
+def df_from_pickle(current_path, path):
+    """
+    Wraps the pd.read_pickle function to adjust to server settings
+
+    :param current_path: path of the calling file to adjust for server (without /)
+    :param path: Path for file loading relative to calling file
+    :return: The loaded dataframe
+    """
+    return pd.read_pickle(server_path(current_path=current_path, path=path))
+
+
+def df_to_json(current_path, path, dataframe):
+    """
+    Wraps the df.to_json function to adjust to server settings
+
+    :param current_path: path of the calling file to adjust for server (without /)
+    :param path: Path for file loading relative to calling file
+    :param dataframe: The dataframe to save
+    """
+    dataframe.to_json(server_path(current_path=current_path, path=path))
+
+
+def df_from_json(current_path, path):
+    """
+    Wraps the json.load function in combination with a dataframe creation to adjust to server settings
+
+    :param current_path: path of the calling file to adjust for server (without /)
+    :param path: Path for file loading relative to calling file
+    :return: The loaded dataframe
+    """
+    return pd.DataFrame(json.load(open_file(current_path=current_path, path=path, modes="r")))
+
+
+def time_convert(sec):
+    """
+    Gibt eine Zeitangabe hübsch aus. Format : Time Lapsed = hh:mm:ss
+
+    :param sec: Zeit zu zeigen
+    """
+    mins = sec // 60
+    sec = sec % 60
+    hours = mins // 60
+    mins = mins % 60
+    print("Time Lapsed = {0}:{1}:{2}".format(int(hours), int(mins), sec))
+
+
+def parallel_imap(function, packaged_args):
+    """
+    Executes the given function in a parallel way. For list data.
+
+    :param function: Function to do in parallel.
+    :param packaged_args: Iterable of argumentpairs for each run to be done.
+    :return: Result of the parallel work
+    """
+    if settings.server:
+        pool_obj = multiprocessing.Pool(maxtasksperchild=pool_maxtask)
+        result = pool_obj.imap(function, packaged_args, chunksize=pool_chunksize)
+    else:
+        pool_obj = multiprocessing.Pool(processes=pool_processes)
+        result = pool_obj.imap(function, packaged_args)
+    pool_obj.close()
+    pool_obj.join()
+    return result
+
+
+def get_step_subset_raw(steps, path_to_dest_dataframe, source_data, dest_data, call_path):
+    """
+    Method for stepwise work on datasets. Reads in the already present data and starts
+    where last time ended. Used for raw pickle-files in destination
+
+    :param steps: How many rows should be selcted now
+    :param path_to_dest_dataframe: Path on where to load the destination data
+    :param source_data: Source dataframe to select the rows
+    :param dest_data: empty dataframe to load the data into
+    :param call_path: path from which the method was called, for server path
+    :return: the subset of the source data an the loaded destintion data (source, dest)
+    """
+    if steps > 0:
+        try:
+            try:
+                var = df_from_pickle(current_path=call_path, path=path_to_dest_dataframe)
+            except Exception:
+                var = df_from_json(current_path=call_path, path=path_to_dest_dataframe)
+            dest_data = pd.concat([dest_data, var], ignore_index=True)
+            start = dest_data.shape[0]
+        except OSError as _:
+            start = 0
+        finally:
+            end = start + steps
+            try:  # case source is a dataframe
+                if end >= source_data.shape[0]:
+                    return source_data.iloc[start:], dest_data  # subset
+                else:
+                    return source_data.iloc[start:end], dest_data  # subset
+            except Exception:
+                if end >= len(source_data):
+                    return source_data[start:], dest_data  # subset
+                else:
+                    return source_data[start:end], dest_data  # subset
+
+
+def remove_spaces_before_sentence_marks(text):
+    """
+    Removes unneccessary spaces before '.' etc.
+
+    :param text: Text to replace in
+    :return: The cleaned text
+    """
+    for sentence_mark in sentence_marks:
+        while ' ' + sentence_mark in text:
+            text = text.replace(' ' + sentence_mark, sentence_mark)
+    return text
+
+
+def remove_brackets(text):
+    """
+    Removes all matching round bracktet pairs () with their content. Always takes the first brackets that
+    appear in the text, so could also be an enumeration like a)
+
+    :param text: Text to remove the brackets from.
+    :return: Resulting text
+    """
+    startindex = text.find('(')
+    res = ''
+    while startindex > -1:
+        endindex = startindex + text[startindex:].find(')')
+        if endindex > -1:
+            # in case there is a ' ' in front or after the brackets, remove one space
+            if startindex > 0 and text[startindex - 1] == ' ':
+                startindex -= 1
+            # if endindex < len(text) - 1 and text[endindex + 1] == ' ':
+            #   endindex += 1
+            res += text[:startindex]
+            text = text[endindex + 1:]
+        else:
+            break
+        startindex = text.find('(')
+    res += text
+    return res
+
+
+def remove_leading_keywords_and_listing_sentences(sentences):
+    """
+    Method intended for Leitsätze. Some of them start with a single keyword in the first line.
+    This is removed. Additionally, Sentences which are only a listin ('1.') will also be removed.
+
+    :param sentences: List of sentences in the original order to remove these things from
+    :return: the list of sentences after removing
+    """
+    # remove leading keywords and sentences which are only enumerations
+    sentences_var = list()
+    sentence_var = ''
+    for i in range(len(sentences)):
+        sentence = sentences[i].strip()
+        if len(sentence) > 1 and sentence[-1] == '.' and ' ' not in sentence:  # at least two chars
+            if any(char.isdigit() for char in sentence) and sentence[0].isdigit():  # most likely enumeration like '1.'
+                continue
+        if i > 0 or (i == 0 and len(sentence) > 20):
+            # most likely not a short keyword at the beginning
+            if sentence[-1] == '.' or sentence[-1] == ',' or sentence[-1] == ':' or \
+                    sentence[-1] == ';' or sentence[-1] == '!' or sentence[-1] == '?':
+                # sentence end
+                sentence_var += sentence
+                sentences_var.append(remove_spaces_before_sentence_marks(sentence_var))
+                sentence_var = ''
+            else:
+                # continuing sentence
+                sentence_var += sentence + ' '
+    return sentences_var
+
+
+def prepare_leitsatz(l_satz):
+    """
+    Does the preparation for Leitsätze: First splits into sentences, removes leading keywords and
+    single listing sentences and leading listings of sentences
+
+    :param l_satz: Original Leitsatz as one string
+    :return: prepared Leitsatz as a list of String
+    """
+    sentences = split_into_sentences(l_satz)
+    sentences = remove_leading_keywords_and_listing_sentences(sentences)
+    sentences = [remove_leading_listing(sentence) for sentence in sentences]
+    return sentences
+
+
+def select_list_subset(list_of_string, start_strings, end_string=None):
+    """
+    Selects a subset of a list of strings. If the start_string is not in the list,
+    the whole original list is returned. (case-sensitive)
+    If more start strings are given, then it will be copied from the first occuring start string.
+
+    sometimes entscheidungsgruende II. is started not with II. but B. Use start_String_2 here
+
+    :param list_of_string: List to get subset from
+    :param start_strings: List of Strings to start to copy
+    :param end_string: First string where one shouldn't copy anymore, if none is given, then till the end
+    :return: Selected subset
+    """
+    result_list = []
+    copy = False
+    for i in range(len(list_of_string)):
+        string = list_of_string[i]
+        if string in start_strings:
+            copy = True
+        if end_string is not None and string == end_string:
+            copy = False
+        if copy:
+            result_list.append(string)
+    # if nothing was found or very little was found
+    if len(result_list) == 0 or len(result_list) / len(list_of_string) < 0.2:
+        return list_of_string
+    return result_list
+
+
+def abbreviation_ending(text):
+    """
+    Checks for an input text whether it ends with a known legal abbreviation.
+    Known issues: numbers and roman numbering with following dots arent matched
+
+    :param text: Input Text
+    :return: True, if it does and with such an abbreviation, False otherwise
+    """
+    abbrev_list = ['A.', ' a.', 'a.A.', 'a.a.O.', 'ABl.', ' abl.', 'Abs.', ' abs.', 'Abschn.', 'Abse.',
+                   ' abzgl.', 'a.D.', 'a.E.', ' a.F.', ' ähnl.', 'a.l.i.c.', ' allg.', ' allgem.',
+                   'Alt.', 'AmtsBl.', ' and.', ' angef.', 'Anh.', 'Anl.', 'Anm.', ' Art.', '(Art.', ' aufgeh.',
+                   'Aufl.', ' ausf.', 'Ausn.', 'BAnz.', 'BArbBl.', 'BayJMBl.', 'Bd.', 'Bde.', 'Bdg.',
+                   'Bearb.', ' begr.', 'Beil.', 'Bek.', ' ber.', ' bes.', 'Beschl.', ' best.', ' bestr.',
+                   'Betr.', ' betr.', 'Bf.', 'BGBl.', ' bish.', ' Bl.', 'BPräs.', 'BReg.', 'Bsp.', 'Bst.',
+                   'BStBl.', 'BT-Drucks.', 'Buchst.', 'bzgl.', 'bzw.', 'c.i.c.', 'Co.', 'c.p.c.',
+                   'c.s.q.n.', 'Ct.', ' dar.', 'Darst.', ' ders.', 'd.h.', 'Diss.', ' div.', 'Dr.',
+                   'Drucks.', ' dto.', 'DVBl.', ' ebd.', ' Ed.', 'E.G.', ' eingef.', 'Einf.', 'Einl.',
+                   ' einschl.', 'Erg.', ' erk.Sen.', ' erk.', ' Erl.', 'etc.', 'E.U.', ' e.V.',
+                   'EVertr.', ' evtl.', 'E.W.G.', ' F.', ' f.', ' Fa.', ' Festschr.', ' ff.', ' Fn.',
+                   ' form.', ' fr.', ' fr.Rspr.', ' Fz.', 'GBl.', ' geänd.', 'Gedschr.', ' geg.',
+                   ' gem.', 'Ges.', ' gg.', ' ggf.', ' ggü.', ' ggüb.', ' Gl.', ' GMBl.', 'G.o.A.',
+                   'Grds.', ' grdsl.', 'Großkomm.', 'Großkomm.z.', 'GVBl.', 'GVOBl.', ' h.A.', 'Halbs.',
+                   ' h.c.', 'Hdlg.', 'Hess.', ' heut.', ' heut.Rspr.', ' hins.', ' h.L.', ' h.Lit.',
+                   ' h.M.', 'Hrsg.', ' h.Rspr.', 'HS.', 'Hs.', ' i.A.', ' ib.', ' ibd.', ' ibid.',
+                   'i.d.', 'i.d.F.', 'i.d.R.', 'i.d.S.', 'i.E.', 'i.e.', 'i.e.S.', 'i.H.d.', 'i.H.v.',
+                   'i.K.', ' incl.', ' inkl.', 'inkl.MwSt.', ' insb.', ' insbes.', 'Int.', ' i.O.',
+                   ' i.R.', ' i.R.d.', 'i.S.', 'i.S.d.', 'i.S.e.', 'i.S.v.', 'i.ü.', ' iur.', 'i.V.',
+                   'i.V.m.', 'i.W.', 'i.Wes.', 'i.w.S.', 'i.Zw.', 'Jahrb.', ' jew.', ' Jh.', 'JMBl.',
+                   ' jur.', ' Kap.', ' Ko.', ' krit.', ' kzfr.', 'Lb.', 'Lfg.', 'lgfr.', ' Lief.',
+                   'Lit.', ' lit.',  ' lt.', 'Ltd.', 'M.A.', 'm.Änd.', 'MABl.', 'mat.', 'm.a.W.', 'm.E.',
+                   ' med.', 'mgl.', 'Mglkt.', 'MinBl.', 'Mio.', ' Mot.', 'M.M.', 'm.N.', 'Mod.',
+                   ' mögl.', 'Mot.', 'MünchKomm.', 'm.w.', 'm.w.N.', 'MwSt.', 'Mwst.', 'm.W.v.',
+                   'm.zust.Anm.', 'Nachw.', 'Nachw.b.', ' nat.', 'Nds.', 'Neubearb.',  'Neuf.',
+                   ' neugef.', 'n.F.', 'Nr.', 'Nrn.', ' o.', 'o.Ä.', ' od.', ' oec.', ' öff.',
+                   ' o.g.', ' österr.', 'p.F.V.', ' pharm.', ' phil.', ' pol.', 'Postf.', ' pp.',
+                   ' ppA.', ' ppa.', 'Prof.', 'Prot.', ' publ.', ' p.V.', 'p.V.V.', 'q.e.d.',
+                   'RdErl.', 'Rdn.', 'Rdnr.', 'RdSchr.', ' rel.', ' rer.', 'RGBl.', 'Rn.', 'Rspr.',
+                   'Rz.', 'S.', ' s.', 's.a.', 'Schr.', ' scil.', 'Sen.', ' sinngem.', 'SiZess.',
+                   'Slg.', 's.o.', ' sog.', 'Sonderbeil.', 'Stpfl.', ' str.', ' st.', 'st.Rspr.',
+                   ' st. Rspr.', 'stud.iur.', 's.u.', ' teilw.', ' theol.', 'Thür.', ' TO.', ' tw.',
+                   'Tz.', ' u.', 'u.a.', 'UAbs.', 'u.a.m.', ' umstr.', ' unmgl.', 'Unmglkt.', ' unmögl.',
+                   'Urt.', ' usw.', ' u.U.', ' V.', ' v.', 'Var.', 'Ver.', ' vgl.', 'V.m.', 'VOBl.',
+                   'Vor.', 'Vorbem.', 'Warn.', ' weg.', ' wg.', 'W.G.G.', 'w.z.b.w.', 'z.B.', 'z.Hd.',
+                   'Ziff.', 'z.T.', ' zust.', 'zust.Anm.', ' zw.' 'z.Z.', ' zzgl.', ';', 'II.1.a.',  '(s.',
+                   ]
+    for abbrev in abbrev_list:
+        if text.endswith(abbrev):
+            return True
+    if len(text) >= 3 and re.search(" .\\.", text[-3:]):
+        return True
+    return False
+
+
+def remove_leading_listing(sentence):
+    """
+    removes leading listings / enumerations like 1. or a)
+
+    :param sentence: Sentence to remove from
+    :return: Processed sentence
+    """
+    return split_leading_listing(sentence)[1]
+
+
+def split_leading_listing(sentence):
+    """
+    Splits the sentence from a possible listing (1. or a) ) at the start.
+
+    :param sentence: Sentence to split
+    :return: (start, rest) with start being the listing or None, if there is no listing and
+                rest being the rest of the sentence or the original sentence if there was no listing
+    """
+    first_word = sentence.split()
+    if first_word is None or len(first_word) == 0:
+        first_word = ''
+    else:
+        first_word = first_word[0]
+    rest = sentence[len(first_word) + 1:]
+    # could be a name like M. Leicht
+    if (first_word.endswith('.') or first_word.endswith(')')) and len(rest.split()) > 1 and first_word != 'Art.':
+        # Enumeration!
+        return first_word, rest
+    else:
+        return None, sentence
+
+
+def split_into_sentences(input_text):
+    """
+    Splits text into sentences. Uses spacy sentences but fixes broken sentences on \n or Abbreviations
+
+    :param input_text: Text to split into sentences
+    :return: A list of sentences which where split
+    """
+
+    paragraphs = input_text.split('\n')
+    sentences = list()
+    sentence_var = ''
+    # roughly split original leitsatz into sentences
+    for paragraph in paragraphs:
+        nlp_paragraph = settings.nlp(paragraph)
+        sentences_paragraph = []
+        for sent in nlp_paragraph.sents:
+            sent = sent.text.strip()
+            # some leading listings aren't detected
+            a, b = split_leading_listing(sent)
+            if a is not None:
+                sentences_paragraph.append(a)
+            sentences_paragraph.append(b)
+        for i in range(0, len(sentences_paragraph)):
+            # add a space before next token if it isn't a sentence mark
+            if not (sentences_paragraph[i].startswith('.') or sentences_paragraph[i].startswith(':')
+                    or sentences_paragraph[i].startswith('?') or sentences_paragraph[i].startswith('!')):
+                sentence_var += ' '
+            sentence_var += sentences_paragraph[i]
+            # if not sentence_var.count('(') > sentence_var.count(
+            #        ')') and not sentence_var.strip() == '':  # no unclosed brackets
+            if (sentences_paragraph[i].endswith('.') or sentences_paragraph[i].endswith(':')
+                or sentences_paragraph[i].endswith('?') or sentences_paragraph[i].endswith('!')) \
+                    and not abbreviation_ending(sentence_var) \
+                    and not sentence_var.strip() == '':
+                # Satz sehr wahrscheinlich wirklich zuende
+                sentences.append(sentence_var.strip())
+                sentence_var = ''
+        if not sentence_var.strip() == '':
+            #        if not sentence_var.count('(') > sentence_var.count(
+            #               ')') and not sentence_var.strip() == '':  # no unclosed brackets
+            sentences.append(sentence_var.strip())  # am Ende des Paragraphen soll auch fertig sein
+            sentence_var = ''
+    # end of whole text
+    if sentence_var.strip() != '':
+        sentences.append(sentence_var.strip())
+    return sentences
+
+
+def preprocess_text(text, options):
+    """
+    Allows simple preprocessing like lemmatization on strings.
+
+    :param text: Text to preprocess
+    :param options: Options specifying on what preprocessing is to be done, if None, text will be returned
+    :return: the preprocessed text, if text is None, the result will also be ''
+    """
+    if text is None:
+        return ''
+    if options is None:
+        return text
+    text_spacy = settings.nlp(text)
+    result_text = ''
+    for token in text_spacy:
+        # stop-words removing: no stopwords or stopwords shouldn't be removed
+        if not token.is_stop or pp_option_stopwords not in options or token.lemma_ in no_stopword_list:
+            # lemmatization if wanted
+            if pp_option_lemmatize in options and token.text not in sentence_marks:
+                to_append = token.lemma_
+            else:
+                to_append = token.text
+            if pp_option_remove_qout_marks_sing in options and to_append[0] == '"' and to_append[-1] == '"':
+                to_append = to_append.replace('"', '')
+            result_text += to_append + ' '
+    result_text = result_text.strip()
+    # case-normlaization, all to lower
+    if pp_option_case_normalize in options:
+        return result_text.lower()
+    else:
+        return result_text
+
+
+def create_dir(current_path, directory_name, delete=True):
+    """
+    Creates a directory if it doesn't exist
+
+    :param current_path: path of the calling file
+    :param directory_name: name / path to create
+    :param delete: if True, than an old directory with same name will be delted
+    """
+    if delete and file_exists(current_path=current_path, path=directory_name):
+        shutil.rmtree(server_path(current_path=current_path, path=directory_name))
+    if not file_exists(current_path=current_path, path=directory_name):
+        os.makedirs(server_path(current_path=current_path, path=directory_name))