|
@ -8,6 +8,7 @@ import time |
|
|
import urllib.parse |
|
|
import urllib.parse |
|
|
import sys |
|
|
import sys |
|
|
import xml.etree.ElementTree as ET |
|
|
import xml.etree.ElementTree as ET |
|
|
|
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
import requests |
|
|
import requests |
|
|
|
|
|
|
|
@ -20,7 +21,6 @@ DATE_THRESHOLD = datetime.datetime(1990, 1, 1) |
|
|
SESSION = requests.Session() |
|
|
SESSION = requests.Session() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def propfind(path, auth): |
|
|
def propfind(path, auth): |
|
|
""" |
|
|
""" |
|
|
Get a file's Last Modified timestamp and FileID via a PROPFIND request |
|
|
Get a file's Last Modified timestamp and FileID via a PROPFIND request |
|
@ -31,7 +31,7 @@ def propfind(path, auth): |
|
|
""" |
|
|
""" |
|
|
# do not descend further into subdirectories\ |
|
|
# do not descend further into subdirectories\ |
|
|
# TODO: we could probably be faster if we did |
|
|
# TODO: we could probably be faster if we did |
|
|
headers = {"Depth": "infinity"} #"1"} |
|
|
|
|
|
|
|
|
headers = {"Depth": "infinity"} # "1"} |
|
|
# This body returns only the timelastmodified and the fileid variable |
|
|
# This body returns only the timelastmodified and the fileid variable |
|
|
requested_data = \ |
|
|
requested_data = \ |
|
|
""" |
|
|
""" |
|
@ -83,7 +83,7 @@ def find_valid_version(versions): |
|
|
# mock entry for comparison |
|
|
# mock entry for comparison |
|
|
most_recent = {"last_modified": DATE_THRESHOLD} |
|
|
most_recent = {"last_modified": DATE_THRESHOLD} |
|
|
for version in versions: |
|
|
for version in versions: |
|
|
if "last_modified" in version and version["last_modified"] > most_recent["last_modified"]: |
|
|
|
|
|
|
|
|
if "last_modified" in version and most_recent["last_modified"] < version["last_modified"]: |
|
|
most_recent = version |
|
|
most_recent = version |
|
|
if most_recent["last_modified"] == DATE_THRESHOLD: |
|
|
if most_recent["last_modified"] == DATE_THRESHOLD: |
|
|
return None |
|
|
return None |
|
@ -95,7 +95,7 @@ def download_file(path, auth): |
|
|
This function downloads one file and saves it on the local device. |
|
|
This function downloads one file and saves it on the local device. |
|
|
:param path: The path to the file in question |
|
|
:param path: The path to the file in question |
|
|
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) |
|
|
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) |
|
|
:return: The path of the created file |
|
|
|
|
|
|
|
|
:return: The path of the created file or '' if no file could be downloaded |
|
|
""" |
|
|
""" |
|
|
r = requests.request( |
|
|
r = requests.request( |
|
|
method='get', |
|
|
method='get', |
|
@ -111,17 +111,38 @@ def download_file(path, auth): |
|
|
return '' |
|
|
return '' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def upload_file(local_path, remote_path, auth): |
|
|
|
|
|
""" |
|
|
|
|
|
Uploads a file to the cloud |
|
|
|
|
|
:param local_path: File path of the file to be uploaded |
|
|
|
|
|
:param remote_path: Path where it should be uploaded on the cloud |
|
|
|
|
|
:param auth: Auth data for the HTTP request |
|
|
|
|
|
:return: True if the file was successfully uploaded, False otherwise |
|
|
|
|
|
""" |
|
|
|
|
|
r = requests.put( |
|
|
|
|
|
url=remote_path, |
|
|
|
|
|
auth=auth, |
|
|
|
|
|
data=open(local_path, 'rb').read() |
|
|
|
|
|
) |
|
|
|
|
|
if 200 <= r.status_code < 300: |
|
|
|
|
|
return True |
|
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def content_equal(original_entry, fixed_version, auth): |
|
|
def content_equal(original_entry, fixed_version, auth): |
|
|
""" |
|
|
""" |
|
|
Compares the two file versions for replacement. |
|
|
Compares the two file versions for replacement. |
|
|
:param original_entry: Filename of the entry with wrong timestamp. |
|
|
:param original_entry: Filename of the entry with wrong timestamp. |
|
|
:param fixed_version: Filename of the version for comparison with the original. |
|
|
:param fixed_version: Filename of the version for comparison with the original. |
|
|
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) |
|
|
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) |
|
|
:return: True, if they are equal in Metadata and content. False otherwise. |
|
|
|
|
|
|
|
|
:return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't |
|
|
|
|
|
be downloaded |
|
|
""" |
|
|
""" |
|
|
original = download_file(original_entry, auth) |
|
|
original = download_file(original_entry, auth) |
|
|
fixed = download_file(fixed_version, auth) |
|
|
fixed = download_file(fixed_version, auth) |
|
|
|
|
|
|
|
|
|
|
|
if original == '' or fixed == '': |
|
|
|
|
|
# TODO: sth better if the downloading failed? |
|
|
|
|
|
return False |
|
|
# shallow comparison |
|
|
# shallow comparison |
|
|
shallow = filecmp.cmp(original, fixed) |
|
|
shallow = filecmp.cmp(original, fixed) |
|
|
# deep comparison |
|
|
# deep comparison |
|
@ -137,17 +158,58 @@ def restore_file(packed): |
|
|
""" |
|
|
""" |
|
|
Handles one file. Searches for the latest older version with intact timestamp and compares them. |
|
|
Handles one file. Searches for the latest older version with intact timestamp and compares them. |
|
|
:param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments |
|
|
:param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments |
|
|
are the runtime arguments and euth is for the http authentification |
|
|
|
|
|
|
|
|
are the runtime arguments and auth is for the http authentification |
|
|
""" |
|
|
""" |
|
|
entry, arguments, auth = packed |
|
|
entry, arguments, auth = packed |
|
|
fixed_version = find_valid_version(propfind( |
|
|
fixed_version = find_valid_version(propfind( |
|
|
arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), |
|
|
arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), |
|
|
auth)) |
|
|
auth)) |
|
|
if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server + |
|
|
if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server + |
|
|
fixed_version['path'], auth): |
|
|
|
|
|
print("Restore from {}".format(fixed_version)) |
|
|
|
|
|
|
|
|
fixed_version['path'], auth): |
|
|
|
|
|
filename = os.path.basename(os.path.normpath(fixed_version['path'])) |
|
|
|
|
|
# print("Restore from {}".format(fixed_version)) |
|
|
|
|
|
restored = restore_by_version(arguments.server + fixed_version['path'], filename, auth, arguments) |
|
|
|
|
|
else: |
|
|
|
|
|
# print("Touch file.") |
|
|
|
|
|
restored = restore_by_touch(arguments.server + entry['path'], auth) |
|
|
|
|
|
if not restored: |
|
|
|
|
|
print('File couldn\'t be restored: ' + entry['path']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def restore_by_version(path_version, temp_id, auth, args): |
|
|
|
|
|
""" |
|
|
|
|
|
Restores the given old version of a file |
|
|
|
|
|
:param path_version: cloud path to the version to be restored |
|
|
|
|
|
:param temp_id: temporary filename in the restore folder (can't be static because of parallelism) |
|
|
|
|
|
:param auth: Auth data for the HTTP request |
|
|
|
|
|
:param args: Runtime arguments |
|
|
|
|
|
:return: True if the version was successfully restored, false otherwise |
|
|
|
|
|
""" |
|
|
|
|
|
headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + temp_id} |
|
|
|
|
|
r = requests.request( |
|
|
|
|
|
method='move', |
|
|
|
|
|
url=path_version, |
|
|
|
|
|
auth=auth, |
|
|
|
|
|
headers=headers |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
if 200 <= r.status_code < 300: |
|
|
|
|
|
return True |
|
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def restore_by_touch(path, auth): |
|
|
|
|
|
""" |
|
|
|
|
|
Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again |
|
|
|
|
|
:param path: The cloud path to the file in question |
|
|
|
|
|
:param auth: Auth data for the HTTP request |
|
|
|
|
|
:return: True if the restoring was successful, False otherwise. |
|
|
|
|
|
""" |
|
|
|
|
|
local_path = download_file(path, auth) |
|
|
|
|
|
if local_path != '': |
|
|
|
|
|
return upload_file(local_path, path, auth) |
|
|
else: |
|
|
else: |
|
|
print("Touch file.") |
|
|
|
|
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
if __name__ == "__main__": |
|
@ -167,27 +229,18 @@ if __name__ == "__main__": |
|
|
auth = requests.auth.HTTPBasicAuth(arguments.username, arguments.password) |
|
|
auth = requests.auth.HTTPBasicAuth(arguments.username, arguments.password) |
|
|
# Prepare the path we want to use |
|
|
# Prepare the path we want to use |
|
|
mainpath = FILES_PATH_PREFIX + arguments.username + arguments.search_path |
|
|
mainpath = FILES_PATH_PREFIX + arguments.username + arguments.search_path |
|
|
# List of all folders we need to enter |
|
|
|
|
|
folders = [mainpath] |
|
|
|
|
|
# List of all entries with wrong time |
|
|
# List of all entries with wrong time |
|
|
wrongtime = [] |
|
|
wrongtime = [] |
|
|
|
|
|
|
|
|
# Iterate through all folders and check for wrong timestamps |
|
|
# Iterate through all folders and check for wrong timestamps |
|
|
while folders: |
|
|
|
|
|
url = arguments.server + folders.pop(0) |
|
|
|
|
|
# print("+", end="", flush=True) |
|
|
|
|
|
for entry in propfind(url, auth): |
|
|
|
|
|
# print(".", end="", flush=True) |
|
|
|
|
|
# put directories in search list |
|
|
|
|
|
# if "resource_type" in entry and "{DAV:}collection" in entry["resource_type"]: |
|
|
|
|
|
# folders.append(entry["path"]) |
|
|
|
|
|
# put files with wrong date in wrong date list (we don't know what to do if a directory has an invalid date) |
|
|
|
|
|
if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD: |
|
|
|
|
|
wrongtime.append(entry) |
|
|
|
|
|
|
|
|
url = arguments.server + mainpath |
|
|
|
|
|
for entry in propfind(url, auth): |
|
|
|
|
|
if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD: |
|
|
|
|
|
wrongtime.append(entry) |
|
|
|
|
|
|
|
|
print() |
|
|
print() |
|
|
# Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch |
|
|
# Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch |
|
|
# done parallel |
|
|
# done parallel |
|
|
data = [(entry, arguments, auth) for entry in wrongtime] |
|
|
data = [(entry, arguments, auth) for entry in wrongtime] |
|
|
pool_obj = multiprocessing.Pool(processes=4) |
|
|
|
|
|
|
|
|
pool_obj = multiprocessing.Pool(processes=8) |
|
|
results = pool_obj.map(restore_file, data) |
|
|
results = pool_obj.map(restore_file, data) |