From fe770b0d0a22701fb7dc4c1cb6a1298e5f65aafb Mon Sep 17 00:00:00 2001 From: Bianca Steffes Date: Wed, 13 Apr 2022 11:26:52 +0200 Subject: [PATCH] Added first version for file comparing (much too slow, try parallelism next) --- fix_dates.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 6 deletions(-) diff --git a/fix_dates.py b/fix_dates.py index ab40782..50c3750 100644 --- a/fix_dates.py +++ b/fix_dates.py @@ -1,5 +1,8 @@ import argparse import datetime +import filecmp +import os +import tempfile import time import urllib.parse import sys @@ -7,7 +10,6 @@ import xml.etree.ElementTree as ET import requests - # some constants FILES_PATH_PREFIX = "/remote.php/dav/files/" VERSIONS_PATH_PREFIX = "/remote.php/dav/versions/" @@ -17,6 +19,7 @@ DATE_THRESHOLD = datetime.datetime(1990, 1, 1) SESSION = requests.Session() + def propfind(path, auth): """ Get a file's Last Modified timestamp and FileID via a PROPFIND request @@ -86,6 +89,50 @@ def find_valid_version(versions): return most_recent +def download_file(path, auth): + """ + This function downloads one file and saves it on the local device. + :param path: The path to the file in question + :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) + :return: The path of the created file + """ + r = requests.request( + method='get', + url=path, + auth=auth + ) + + _, filename = os.path.split(path) + if r.status_code == 200: + with open(filename, 'wb') as file: + file.write(r.content) + return filename + return '' + + +def content_equal(original_entry, fixed_version, auth): + """ + Compares the two file versions for replacement. + :param original_entry: Filename of the entry with wrong timestamp. + :param fixed_version: Filename of the version for comparison with the original. + :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) + :return: True, if they are equal in Metadata anc content. False otherwise. + """ + original = download_file(original_entry, auth) + fixed = download_file(fixed_version, auth) + + # shallow comparison + shallow = filecmp.cmp(original, fixed) + # deep comparison + deep = filecmp.cmp(original, fixed, shallow=False) + if deep != shallow: + print(deep) + os.remove(original) + os.remove(fixed) + return deep and shallow + + +#TODO: parallelisieren if __name__ == "__main__": # get all necessary data from the command line argparser = argparse.ArgumentParser(description="Fix broken dates in Nextcloud folders.") @@ -108,6 +155,10 @@ if __name__ == "__main__": # List of all entries with wrong time wrongtime = [] + restore_coumt = 0 + fixed_count = 0 + touch_count = 0 + # Iterate through all folders and check for wrong timestamps while folders: url = arguments.server + folders.pop(0) @@ -120,13 +171,23 @@ if __name__ == "__main__": # put files with wrong date in wrong date list (we don't know what to do if a directory has an invalid date) elif "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD: wrongtime.append(entry) - # Iterate through all fileids with wrong timestamps and check for versions with intact timestamp + # Iterate through all fileids with wrong timestamps and check for versions with intact timestamp print() # NOTE: you can indent this into the loop above to fix things on-the-fly instead of all at once + # TODO: indented for quicker access to examples for entry in wrongtime: - print(urllib.parse.unquote(entry["path"][len(FILES_PATH_PREFIX):])) - fixed_version = find_valid_version(propfind(arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), auth)) + # print(urllib.parse.unquote(entry["path"][len(FILES_PATH_PREFIX):])) + fixed_version = find_valid_version(propfind( + arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), + auth)) if fixed_version: - print("Restore from {}".format(fixed_version)) + fixed_count+=1 + if fixed_version and content_equal(arguments.server + entry['path'], arguments.server + fixed_version['path'], auth): +# print("Restore from {}".format(fixed_version)) + restore_coumt +=1 else: - print("Touch file.") \ No newline at end of file + touch_count +=1 +# print("Touch file.") + + print(restore_coumt) + print(touch_count) \ No newline at end of file