You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
47 lines
1.5 KiB
47 lines
1.5 KiB
import os
|
|
|
|
import pandas as pd
|
|
|
|
|
|
import utils
|
|
|
|
pm_sent_no = 'pm_sent_number'
|
|
pm_sent = 'Sätze der Pressemitteilung'
|
|
judgement_sent_no = 'judgement_sent_number'
|
|
judgement_sent = 'Dazu passende Sätze des Urteils'
|
|
keywords = 'Schlagworte'
|
|
comments = 'Anmerkung'
|
|
duration = 'Wie lange hast Du für die Bearbeitung dieses Urteils gebraucht?'
|
|
bad_pm = 'Ist diese Pressemitteilung eine schlechte Darstellung / Zusammenfassung des Urteils?'
|
|
current_dir = 'pm_summary/'
|
|
|
|
|
|
def prepare_file(path):
|
|
"""
|
|
Liest eine Datei aus und überführt sie in ein einheitliches Format.
|
|
|
|
:param path: Pfad zur Datei.
|
|
:return: dictionary, in dem die Ergebnisse stehen. Für jeden Satz der PM gibt es ein Unterdict (Zahl als key).
|
|
"""
|
|
res = {}
|
|
raw_data = pd.read_excel(path, names=[pm_sent_no, pm_sent], header=None)
|
|
for index, row in raw_data.iterrows():
|
|
current_sentence = {pm_sent_no: row[pm_sent_no], pm_sent: row[pm_sent]}
|
|
res[current_sentence[pm_sent_no]] = current_sentence
|
|
return res
|
|
|
|
|
|
def get_all_pm_files():
|
|
"""
|
|
Returns the list of all annotated pm-files
|
|
|
|
:return: [(pm_filename, file_data)*]
|
|
"""
|
|
file_path_base = utils.server_path(current_path=current_dir,
|
|
path='../rouge_evalauation/evaluated_data/extractive_judgments')
|
|
res = []
|
|
for judgment in os.listdir(file_path_base):
|
|
if '.xlsx' in judgment:
|
|
filename = file_path_base + '/' + judgment
|
|
res.append((judgment, prepare_file(filename)))
|
|
return res
|