Browse Source

Files in cloud restore are named with a UUID and all file versions are downloaded for the potential restoring (starting with the latest version. If that version is already matching, the others won't be downloaded)

main
Bianca Steffes 3 years ago
parent
commit
bef6f4e13e
  1. 54
      fix_dates.py

54
fix_dates.py

@ -7,6 +7,7 @@ import tempfile
import time import time
import urllib.parse import urllib.parse
import sys import sys
import uuid
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from pathlib import Path from pathlib import Path
@ -80,14 +81,13 @@ def find_valid_version(versions):
:param versions: An iterator as returned by propfind() :param versions: An iterator as returned by propfind()
:return: The entry of the iterator which has the most recent date or None if none exists :return: The entry of the iterator which has the most recent date or None if none exists
""" """
# mock entry for comparison
most_recent = {"last_modified": DATE_THRESHOLD}
all_versions = {}
for version in versions: for version in versions:
if "last_modified" in version and most_recent["last_modified"] < version["last_modified"]:
most_recent = version
if most_recent["last_modified"] == DATE_THRESHOLD:
if "last_modified" in version and DATE_THRESHOLD < version["last_modified"]:
all_versions[version["last_modified"]] = version
if len(all_versions) == 0:
return None return None
return most_recent
return all_versions
def download_file(path, auth): def download_file(path, auth):
@ -129,16 +129,15 @@ def upload_file(local_path, remote_path, auth):
return False return False
def content_equal(original_entry, fixed_version, auth):
def content_equal(original, fixed_version, auth):
""" """
Compares the two file versions for replacement. Compares the two file versions for replacement.
:param original_entry: Filename of the entry with wrong timestamp.
:param original: local filename of the entry with wrong timestamp.
:param fixed_version: Filename of the version for comparison with the original. :param fixed_version: Filename of the version for comparison with the original.
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't :return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't
be downloaded be downloaded
""" """
original = download_file(original_entry, auth)
fixed = download_file(fixed_version, auth) fixed = download_file(fixed_version, auth)
if original == '' or fixed == '': if original == '' or fixed == '':
# TODO: sth better if the downloading failed? # TODO: sth better if the downloading failed?
@ -147,9 +146,6 @@ def content_equal(original_entry, fixed_version, auth):
shallow = filecmp.cmp(original, fixed) shallow = filecmp.cmp(original, fixed)
# deep comparison # deep comparison
deep = filecmp.cmp(original, fixed, shallow=False) deep = filecmp.cmp(original, fixed, shallow=False)
if deep != shallow:
print(deep)
os.remove(original)
os.remove(fixed) os.remove(fixed)
return deep and shallow return deep and shallow
@ -161,31 +157,39 @@ def restore_file(packed):
are the runtime arguments and auth is for the http authentification are the runtime arguments and auth is for the http authentification
""" """
entry, arguments, auth = packed entry, arguments, auth = packed
fixed_version = find_valid_version(propfind(
fixed_versions = find_valid_version(propfind(
arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]),
auth)) auth))
if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server +
fixed_version['path'], auth):
filename = os.path.basename(os.path.normpath(fixed_version['path']))
# print("Restore from {}".format(fixed_version))
restored = restore_by_version(arguments.server + fixed_version['path'], filename, auth, arguments)
original = download_file(arguments.server + entry['path'], auth)
restored = False
if fixed_versions is not None and len(fixed_versions) > 0:
keys = sorted(fixed_versions, reverse=True ) # sort dates descending to start with latest version
for i in range(0, len(keys)):
if content_equal(original, arguments.server + fixed_versions[keys[i]]['path'], auth):
# found latest matching version
# print("Restore from {}".format(fixed_versions))
restored = restore_by_version(arguments.server + fixed_versions['path'], auth, arguments)
break # stop looking any further
else: else:
# print("Touch file.") # print("Touch file.")
restored = restore_by_touch(arguments.server + entry['path'], auth)
restored = restore_by_touch(arguments.server + entry['path'], original, auth)
if not restored: if not restored:
print('File couldn\'t be restored: ' + entry['path']) print('File couldn\'t be restored: ' + entry['path'])
os.remove(original)
def restore_by_version(path_version, temp_id, auth, args):
def restore_by_version(path_version, auth, args):
""" """
Restores the given old version of a file Restores the given old version of a file
:param path_version: cloud path to the version to be restored :param path_version: cloud path to the version to be restored
:param temp_id: temporary filename in the restore folder (can't be static because of parallelism)
:param auth: Auth data for the HTTP request :param auth: Auth data for the HTTP request
:param args: Runtime arguments :param args: Runtime arguments
:return: True if the version was successfully restored, false otherwise :return: True if the version was successfully restored, false otherwise
""" """
headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + temp_id}
# uuid4 should create a random uuid
headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + uuid.uuid4()}
r = requests.request( r = requests.request(
method='move', method='move',
url=path_version, url=path_version,
@ -198,14 +202,14 @@ def restore_by_version(path_version, temp_id, auth, args):
return False return False
def restore_by_touch(path, auth):
def restore_by_touch(path, local_path, auth):
""" """
Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again
:param path: The cloud path to the file in question :param path: The cloud path to the file in question
:param local_path: The local path to the file in question
:param auth: Auth data for the HTTP request :param auth: Auth data for the HTTP request
:return: True if the restoring was successful, False otherwise. :return: True if the restoring was successful, False otherwise.
""" """
local_path = download_file(path, auth)
if local_path != '': if local_path != '':
return upload_file(local_path, path, auth) return upload_file(local_path, path, auth)
else: else:
@ -242,5 +246,5 @@ if __name__ == "__main__":
# Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch # Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch
# done parallel # done parallel
data = [(entry, arguments, auth) for entry in wrongtime] data = [(entry, arguments, auth) for entry in wrongtime]
pool_obj = multiprocessing.Pool(processes=8)
pool_obj = multiprocessing.Pool(processes=1)
results = pool_obj.map(restore_file, data) results = pool_obj.map(restore_file, data)
Loading…
Cancel
Save