diff --git a/fix_dates.py b/fix_dates.py index 4a539e9..93bced3 100644 --- a/fix_dates.py +++ b/fix_dates.py @@ -8,6 +8,7 @@ import time import urllib.parse import sys import xml.etree.ElementTree as ET +from pathlib import Path import requests @@ -20,7 +21,6 @@ DATE_THRESHOLD = datetime.datetime(1990, 1, 1) SESSION = requests.Session() - def propfind(path, auth): """ Get a file's Last Modified timestamp and FileID via a PROPFIND request @@ -31,7 +31,7 @@ def propfind(path, auth): """ # do not descend further into subdirectories\ # TODO: we could probably be faster if we did - headers = {"Depth": "infinity"} #"1"} + headers = {"Depth": "infinity"} # "1"} # This body returns only the timelastmodified and the fileid variable requested_data = \ """ @@ -83,7 +83,7 @@ def find_valid_version(versions): # mock entry for comparison most_recent = {"last_modified": DATE_THRESHOLD} for version in versions: - if "last_modified" in version and version["last_modified"] > most_recent["last_modified"]: + if "last_modified" in version and most_recent["last_modified"] < version["last_modified"]: most_recent = version if most_recent["last_modified"] == DATE_THRESHOLD: return None @@ -95,7 +95,7 @@ def download_file(path, auth): This function downloads one file and saves it on the local device. :param path: The path to the file in question :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) - :return: The path of the created file + :return: The path of the created file or '' if no file could be downloaded """ r = requests.request( method='get', @@ -111,17 +111,38 @@ def download_file(path, auth): return '' +def upload_file(local_path, remote_path, auth): + """ + Uploads a file to the cloud + :param local_path: File path of the file to be uploaded + :param remote_path: Path where it should be uploaded on the cloud + :param auth: Auth data for the HTTP request + :return: True if the file was successfully uploaded, False otherwise + """ + r = requests.put( + url=remote_path, + auth=auth, + data=open(local_path, 'rb').read() + ) + if 200 <= r.status_code < 300: + return True + return False + + def content_equal(original_entry, fixed_version, auth): """ Compares the two file versions for replacement. :param original_entry: Filename of the entry with wrong timestamp. :param fixed_version: Filename of the version for comparison with the original. :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) - :return: True, if they are equal in Metadata and content. False otherwise. + :return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't + be downloaded """ original = download_file(original_entry, auth) fixed = download_file(fixed_version, auth) - + if original == '' or fixed == '': + # TODO: sth better if the downloading failed? + return False # shallow comparison shallow = filecmp.cmp(original, fixed) # deep comparison @@ -137,17 +158,58 @@ def restore_file(packed): """ Handles one file. Searches for the latest older version with intact timestamp and compares them. :param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments - are the runtime arguments and euth is for the http authentification + are the runtime arguments and auth is for the http authentification """ entry, arguments, auth = packed fixed_version = find_valid_version(propfind( arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), auth)) if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server + - fixed_version['path'], auth): - print("Restore from {}".format(fixed_version)) + fixed_version['path'], auth): + filename = os.path.basename(os.path.normpath(fixed_version['path'])) + # print("Restore from {}".format(fixed_version)) + restored = restore_by_version(arguments.server + fixed_version['path'], filename, auth, arguments) + else: + # print("Touch file.") + restored = restore_by_touch(arguments.server + entry['path'], auth) + if not restored: + print('File couldn\'t be restored: ' + entry['path']) + + +def restore_by_version(path_version, temp_id, auth, args): + """ + Restores the given old version of a file + :param path_version: cloud path to the version to be restored + :param temp_id: temporary filename in the restore folder (can't be static because of parallelism) + :param auth: Auth data for the HTTP request + :param args: Runtime arguments + :return: True if the version was successfully restored, false otherwise + """ + headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + temp_id} + r = requests.request( + method='move', + url=path_version, + auth=auth, + headers=headers + ) + + if 200 <= r.status_code < 300: + return True + return False + + +def restore_by_touch(path, auth): + """ + Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again + :param path: The cloud path to the file in question + :param auth: Auth data for the HTTP request + :return: True if the restoring was successful, False otherwise. + """ + local_path = download_file(path, auth) + if local_path != '': + return upload_file(local_path, path, auth) else: - print("Touch file.") + return False if __name__ == "__main__": @@ -167,27 +229,18 @@ if __name__ == "__main__": auth = requests.auth.HTTPBasicAuth(arguments.username, arguments.password) # Prepare the path we want to use mainpath = FILES_PATH_PREFIX + arguments.username + arguments.search_path - # List of all folders we need to enter - folders = [mainpath] # List of all entries with wrong time wrongtime = [] # Iterate through all folders and check for wrong timestamps - while folders: - url = arguments.server + folders.pop(0) - # print("+", end="", flush=True) - for entry in propfind(url, auth): - # print(".", end="", flush=True) - # put directories in search list - # if "resource_type" in entry and "{DAV:}collection" in entry["resource_type"]: - # folders.append(entry["path"]) - # put files with wrong date in wrong date list (we don't know what to do if a directory has an invalid date) - if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD: - wrongtime.append(entry) + url = arguments.server + mainpath + for entry in propfind(url, auth): + if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD: + wrongtime.append(entry) print() # Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch # done parallel data = [(entry, arguments, auth) for entry in wrongtime] - pool_obj = multiprocessing.Pool(processes=4) + pool_obj = multiprocessing.Pool(processes=8) results = pool_obj.map(restore_file, data)