Browse Source

Files in cloud restore are named with a UUID and all file versions are downloaded for the potential restoring (starting with the latest version. If that version is already matching, the others won't be downloaded)

main
Bianca Steffes 2 years ago
parent
commit
bef6f4e13e
  1. 54
      fix_dates.py

54
fix_dates.py

@ -7,6 +7,7 @@ import tempfile
import time
import urllib.parse
import sys
import uuid
import xml.etree.ElementTree as ET
from pathlib import Path
@ -80,14 +81,13 @@ def find_valid_version(versions):
:param versions: An iterator as returned by propfind()
:return: The entry of the iterator which has the most recent date or None if none exists
"""
# mock entry for comparison
most_recent = {"last_modified": DATE_THRESHOLD}
all_versions = {}
for version in versions:
if "last_modified" in version and most_recent["last_modified"] < version["last_modified"]:
most_recent = version
if most_recent["last_modified"] == DATE_THRESHOLD:
if "last_modified" in version and DATE_THRESHOLD < version["last_modified"]:
all_versions[version["last_modified"]] = version
if len(all_versions) == 0:
return None
return most_recent
return all_versions
def download_file(path, auth):
@ -129,16 +129,15 @@ def upload_file(local_path, remote_path, auth):
return False
def content_equal(original_entry, fixed_version, auth):
def content_equal(original, fixed_version, auth):
"""
Compares the two file versions for replacement.
:param original_entry: Filename of the entry with wrong timestamp.
:param original: local filename of the entry with wrong timestamp.
:param fixed_version: Filename of the version for comparison with the original.
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't
be downloaded
"""
original = download_file(original_entry, auth)
fixed = download_file(fixed_version, auth)
if original == '' or fixed == '':
# TODO: sth better if the downloading failed?
@ -147,9 +146,6 @@ def content_equal(original_entry, fixed_version, auth):
shallow = filecmp.cmp(original, fixed)
# deep comparison
deep = filecmp.cmp(original, fixed, shallow=False)
if deep != shallow:
print(deep)
os.remove(original)
os.remove(fixed)
return deep and shallow
@ -161,31 +157,39 @@ def restore_file(packed):
are the runtime arguments and auth is for the http authentification
"""
entry, arguments, auth = packed
fixed_version = find_valid_version(propfind(
fixed_versions = find_valid_version(propfind(
arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]),
auth))
if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server +
fixed_version['path'], auth):
filename = os.path.basename(os.path.normpath(fixed_version['path']))
# print("Restore from {}".format(fixed_version))
restored = restore_by_version(arguments.server + fixed_version['path'], filename, auth, arguments)
original = download_file(arguments.server + entry['path'], auth)
restored = False
if fixed_versions is not None and len(fixed_versions) > 0:
keys = sorted(fixed_versions, reverse=True ) # sort dates descending to start with latest version
for i in range(0, len(keys)):
if content_equal(original, arguments.server + fixed_versions[keys[i]]['path'], auth):
# found latest matching version
# print("Restore from {}".format(fixed_versions))
restored = restore_by_version(arguments.server + fixed_versions['path'], auth, arguments)
break # stop looking any further
else:
# print("Touch file.")
restored = restore_by_touch(arguments.server + entry['path'], auth)
restored = restore_by_touch(arguments.server + entry['path'], original, auth)
if not restored:
print('File couldn\'t be restored: ' + entry['path'])
os.remove(original)
def restore_by_version(path_version, temp_id, auth, args):
def restore_by_version(path_version, auth, args):
"""
Restores the given old version of a file
:param path_version: cloud path to the version to be restored
:param temp_id: temporary filename in the restore folder (can't be static because of parallelism)
:param auth: Auth data for the HTTP request
:param args: Runtime arguments
:return: True if the version was successfully restored, false otherwise
"""
headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + temp_id}
# uuid4 should create a random uuid
headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + uuid.uuid4()}
r = requests.request(
method='move',
url=path_version,
@ -198,14 +202,14 @@ def restore_by_version(path_version, temp_id, auth, args):
return False
def restore_by_touch(path, auth):
def restore_by_touch(path, local_path, auth):
"""
Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again
:param path: The cloud path to the file in question
:param local_path: The local path to the file in question
:param auth: Auth data for the HTTP request
:return: True if the restoring was successful, False otherwise.
"""
local_path = download_file(path, auth)
if local_path != '':
return upload_file(local_path, path, auth)
else:
@ -242,5 +246,5 @@ if __name__ == "__main__":
# Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch
# done parallel
data = [(entry, arguments, auth) for entry in wrongtime]
pool_obj = multiprocessing.Pool(processes=8)
pool_obj = multiprocessing.Pool(processes=1)
results = pool_obj.map(restore_file, data)
Loading…
Cancel
Save