Browse Source

Finished Code

main
Bianca Steffes 3 years ago
parent
commit
79a3781493
  1. 101
      fix_dates.py

101
fix_dates.py

@ -8,6 +8,7 @@ import time
import urllib.parse
import sys
import xml.etree.ElementTree as ET
from pathlib import Path
import requests
@ -20,7 +21,6 @@ DATE_THRESHOLD = datetime.datetime(1990, 1, 1)
SESSION = requests.Session()
def propfind(path, auth):
"""
Get a file's Last Modified timestamp and FileID via a PROPFIND request
@ -31,7 +31,7 @@ def propfind(path, auth):
"""
# do not descend further into subdirectories\
# TODO: we could probably be faster if we did
headers = {"Depth": "infinity"} #"1"}
headers = {"Depth": "infinity"} # "1"}
# This body returns only the timelastmodified and the fileid variable
requested_data = \
"""
@ -83,7 +83,7 @@ def find_valid_version(versions):
# mock entry for comparison
most_recent = {"last_modified": DATE_THRESHOLD}
for version in versions:
if "last_modified" in version and version["last_modified"] > most_recent["last_modified"]:
if "last_modified" in version and most_recent["last_modified"] < version["last_modified"]:
most_recent = version
if most_recent["last_modified"] == DATE_THRESHOLD:
return None
@ -95,7 +95,7 @@ def download_file(path, auth):
This function downloads one file and saves it on the local device.
:param path: The path to the file in question
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: The path of the created file
:return: The path of the created file or '' if no file could be downloaded
"""
r = requests.request(
method='get',
@ -111,17 +111,38 @@ def download_file(path, auth):
return ''
def upload_file(local_path, remote_path, auth):
"""
Uploads a file to the cloud
:param local_path: File path of the file to be uploaded
:param remote_path: Path where it should be uploaded on the cloud
:param auth: Auth data for the HTTP request
:return: True if the file was successfully uploaded, False otherwise
"""
r = requests.put(
url=remote_path,
auth=auth,
data=open(local_path, 'rb').read()
)
if 200 <= r.status_code < 300:
return True
return False
def content_equal(original_entry, fixed_version, auth):
"""
Compares the two file versions for replacement.
:param original_entry: Filename of the entry with wrong timestamp.
:param fixed_version: Filename of the version for comparison with the original.
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: True, if they are equal in Metadata and content. False otherwise.
:return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't
be downloaded
"""
original = download_file(original_entry, auth)
fixed = download_file(fixed_version, auth)
if original == '' or fixed == '':
# TODO: sth better if the downloading failed?
return False
# shallow comparison
shallow = filecmp.cmp(original, fixed)
# deep comparison
@ -137,17 +158,58 @@ def restore_file(packed):
"""
Handles one file. Searches for the latest older version with intact timestamp and compares them.
:param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments
are the runtime arguments and euth is for the http authentification
are the runtime arguments and auth is for the http authentification
"""
entry, arguments, auth = packed
fixed_version = find_valid_version(propfind(
arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]),
auth))
if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server +
fixed_version['path'], auth):
print("Restore from {}".format(fixed_version))
fixed_version['path'], auth):
filename = os.path.basename(os.path.normpath(fixed_version['path']))
# print("Restore from {}".format(fixed_version))
restored = restore_by_version(arguments.server + fixed_version['path'], filename, auth, arguments)
else:
# print("Touch file.")
restored = restore_by_touch(arguments.server + entry['path'], auth)
if not restored:
print('File couldn\'t be restored: ' + entry['path'])
def restore_by_version(path_version, temp_id, auth, args):
"""
Restores the given old version of a file
:param path_version: cloud path to the version to be restored
:param temp_id: temporary filename in the restore folder (can't be static because of parallelism)
:param auth: Auth data for the HTTP request
:param args: Runtime arguments
:return: True if the version was successfully restored, false otherwise
"""
headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + temp_id}
r = requests.request(
method='move',
url=path_version,
auth=auth,
headers=headers
)
if 200 <= r.status_code < 300:
return True
return False
def restore_by_touch(path, auth):
"""
Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again
:param path: The cloud path to the file in question
:param auth: Auth data for the HTTP request
:return: True if the restoring was successful, False otherwise.
"""
local_path = download_file(path, auth)
if local_path != '':
return upload_file(local_path, path, auth)
else:
print("Touch file.")
return False
if __name__ == "__main__":
@ -167,27 +229,18 @@ if __name__ == "__main__":
auth = requests.auth.HTTPBasicAuth(arguments.username, arguments.password)
# Prepare the path we want to use
mainpath = FILES_PATH_PREFIX + arguments.username + arguments.search_path
# List of all folders we need to enter
folders = [mainpath]
# List of all entries with wrong time
wrongtime = []
# Iterate through all folders and check for wrong timestamps
while folders:
url = arguments.server + folders.pop(0)
# print("+", end="", flush=True)
for entry in propfind(url, auth):
# print(".", end="", flush=True)
# put directories in search list
# if "resource_type" in entry and "{DAV:}collection" in entry["resource_type"]:
# folders.append(entry["path"])
# put files with wrong date in wrong date list (we don't know what to do if a directory has an invalid date)
if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD:
wrongtime.append(entry)
url = arguments.server + mainpath
for entry in propfind(url, auth):
if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD:
wrongtime.append(entry)
print()
# Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch
# done parallel
data = [(entry, arguments, auth) for entry in wrongtime]
pool_obj = multiprocessing.Pool(processes=4)
pool_obj = multiprocessing.Pool(processes=8)
results = pool_obj.map(restore_file, data)
Loading…
Cancel
Save