From bef6f4e13e5c244a1475fea3a61f92be0f25cef4 Mon Sep 17 00:00:00 2001 From: Bianca Steffes Date: Thu, 5 May 2022 16:36:10 +0200 Subject: [PATCH] Files in cloud restore are named with a UUID and all file versions are downloaded for the potential restoring (starting with the latest version. If that version is already matching, the others won't be downloaded) --- fix_dates.py | 54 ++++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/fix_dates.py b/fix_dates.py index 93bced3..6e6a4bf 100644 --- a/fix_dates.py +++ b/fix_dates.py @@ -7,6 +7,7 @@ import tempfile import time import urllib.parse import sys +import uuid import xml.etree.ElementTree as ET from pathlib import Path @@ -80,14 +81,13 @@ def find_valid_version(versions): :param versions: An iterator as returned by propfind() :return: The entry of the iterator which has the most recent date or None if none exists """ - # mock entry for comparison - most_recent = {"last_modified": DATE_THRESHOLD} + all_versions = {} for version in versions: - if "last_modified" in version and most_recent["last_modified"] < version["last_modified"]: - most_recent = version - if most_recent["last_modified"] == DATE_THRESHOLD: + if "last_modified" in version and DATE_THRESHOLD < version["last_modified"]: + all_versions[version["last_modified"]] = version + if len(all_versions) == 0: return None - return most_recent + return all_versions def download_file(path, auth): @@ -129,16 +129,15 @@ def upload_file(local_path, remote_path, auth): return False -def content_equal(original_entry, fixed_version, auth): +def content_equal(original, fixed_version, auth): """ Compares the two file versions for replacement. - :param original_entry: Filename of the entry with wrong timestamp. + :param original: local filename of the entry with wrong timestamp. :param fixed_version: Filename of the version for comparison with the original. :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) :return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't be downloaded """ - original = download_file(original_entry, auth) fixed = download_file(fixed_version, auth) if original == '' or fixed == '': # TODO: sth better if the downloading failed? @@ -147,9 +146,6 @@ def content_equal(original_entry, fixed_version, auth): shallow = filecmp.cmp(original, fixed) # deep comparison deep = filecmp.cmp(original, fixed, shallow=False) - if deep != shallow: - print(deep) - os.remove(original) os.remove(fixed) return deep and shallow @@ -161,31 +157,39 @@ def restore_file(packed): are the runtime arguments and auth is for the http authentification """ entry, arguments, auth = packed - fixed_version = find_valid_version(propfind( + fixed_versions = find_valid_version(propfind( arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), auth)) - if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server + - fixed_version['path'], auth): - filename = os.path.basename(os.path.normpath(fixed_version['path'])) - # print("Restore from {}".format(fixed_version)) - restored = restore_by_version(arguments.server + fixed_version['path'], filename, auth, arguments) + original = download_file(arguments.server + entry['path'], auth) + restored = False + if fixed_versions is not None and len(fixed_versions) > 0: + keys = sorted(fixed_versions, reverse=True ) # sort dates descending to start with latest version + + for i in range(0, len(keys)): + if content_equal(original, arguments.server + fixed_versions[keys[i]]['path'], auth): + # found latest matching version + # print("Restore from {}".format(fixed_versions)) + restored = restore_by_version(arguments.server + fixed_versions['path'], auth, arguments) + break # stop looking any further else: # print("Touch file.") - restored = restore_by_touch(arguments.server + entry['path'], auth) + restored = restore_by_touch(arguments.server + entry['path'], original, auth) if not restored: print('File couldn\'t be restored: ' + entry['path']) + os.remove(original) + -def restore_by_version(path_version, temp_id, auth, args): +def restore_by_version(path_version, auth, args): """ Restores the given old version of a file :param path_version: cloud path to the version to be restored - :param temp_id: temporary filename in the restore folder (can't be static because of parallelism) :param auth: Auth data for the HTTP request :param args: Runtime arguments :return: True if the version was successfully restored, false otherwise """ - headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + temp_id} + # uuid4 should create a random uuid + headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + uuid.uuid4()} r = requests.request( method='move', url=path_version, @@ -198,14 +202,14 @@ def restore_by_version(path_version, temp_id, auth, args): return False -def restore_by_touch(path, auth): +def restore_by_touch(path, local_path, auth): """ Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again :param path: The cloud path to the file in question + :param local_path: The local path to the file in question :param auth: Auth data for the HTTP request :return: True if the restoring was successful, False otherwise. """ - local_path = download_file(path, auth) if local_path != '': return upload_file(local_path, path, auth) else: @@ -242,5 +246,5 @@ if __name__ == "__main__": # Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch # done parallel data = [(entry, arguments, auth) for entry in wrongtime] - pool_obj = multiprocessing.Pool(processes=8) + pool_obj = multiprocessing.Pool(processes=1) results = pool_obj.map(restore_file, data)