From e0f15cb3516a6003e6068558a16f9fdac556d461 Mon Sep 17 00:00:00 2001 From: Bianca Steffes Date: Fri, 22 Apr 2022 13:57:11 +0200 Subject: [PATCH] Parallel Version implemented still very slow.... --- fix_dates.py | 62 ++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/fix_dates.py b/fix_dates.py index 50c3750..4a539e9 100644 --- a/fix_dates.py +++ b/fix_dates.py @@ -1,6 +1,7 @@ import argparse import datetime import filecmp +import multiprocessing import os import tempfile import time @@ -30,7 +31,7 @@ def propfind(path, auth): """ # do not descend further into subdirectories\ # TODO: we could probably be faster if we did - headers = {"Depth": "1"} + headers = {"Depth": "infinity"} #"1"} # This body returns only the timelastmodified and the fileid variable requested_data = \ """ @@ -116,7 +117,7 @@ def content_equal(original_entry, fixed_version, auth): :param original_entry: Filename of the entry with wrong timestamp. :param fixed_version: Filename of the version for comparison with the original. :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) - :return: True, if they are equal in Metadata anc content. False otherwise. + :return: True, if they are equal in Metadata and content. False otherwise. """ original = download_file(original_entry, auth) fixed = download_file(fixed_version, auth) @@ -132,7 +133,23 @@ def content_equal(original_entry, fixed_version, auth): return deep and shallow -#TODO: parallelisieren +def restore_file(packed): + """ + Handles one file. Searches for the latest older version with intact timestamp and compares them. + :param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments + are the runtime arguments and euth is for the http authentification + """ + entry, arguments, auth = packed + fixed_version = find_valid_version(propfind( + arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), + auth)) + if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server + + fixed_version['path'], auth): + print("Restore from {}".format(fixed_version)) + else: + print("Touch file.") + + if __name__ == "__main__": # get all necessary data from the command line argparser = argparse.ArgumentParser(description="Fix broken dates in Nextcloud folders.") @@ -155,39 +172,22 @@ if __name__ == "__main__": # List of all entries with wrong time wrongtime = [] - restore_coumt = 0 - fixed_count = 0 - touch_count = 0 - # Iterate through all folders and check for wrong timestamps while folders: url = arguments.server + folders.pop(0) - print("+", end="", flush=True) + # print("+", end="", flush=True) for entry in propfind(url, auth): - print(".", end="", flush=True) + # print(".", end="", flush=True) # put directories in search list - if "resource_type" in entry and "{DAV:}collection" in entry["resource_type"]: - folders.append(entry["path"]) + # if "resource_type" in entry and "{DAV:}collection" in entry["resource_type"]: + # folders.append(entry["path"]) # put files with wrong date in wrong date list (we don't know what to do if a directory has an invalid date) - elif "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD: + if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD: wrongtime.append(entry) - # Iterate through all fileids with wrong timestamps and check for versions with intact timestamp + print() - # NOTE: you can indent this into the loop above to fix things on-the-fly instead of all at once - # TODO: indented for quicker access to examples - for entry in wrongtime: - # print(urllib.parse.unquote(entry["path"][len(FILES_PATH_PREFIX):])) - fixed_version = find_valid_version(propfind( - arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), - auth)) - if fixed_version: - fixed_count+=1 - if fixed_version and content_equal(arguments.server + entry['path'], arguments.server + fixed_version['path'], auth): -# print("Restore from {}".format(fixed_version)) - restore_coumt +=1 - else: - touch_count +=1 -# print("Touch file.") - - print(restore_coumt) - print(touch_count) \ No newline at end of file + # Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch + # done parallel + data = [(entry, arguments, auth) for entry in wrongtime] + pool_obj = multiprocessing.Pool(processes=4) + results = pool_obj.map(restore_file, data)