Browse Source

Finished Code

main
Bianca Steffes 3 years ago
parent
commit
79a3781493
  1. 93
      fix_dates.py

93
fix_dates.py

@ -8,6 +8,7 @@ import time
import urllib.parse import urllib.parse
import sys import sys
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from pathlib import Path
import requests import requests
@ -20,7 +21,6 @@ DATE_THRESHOLD = datetime.datetime(1990, 1, 1)
SESSION = requests.Session() SESSION = requests.Session()
def propfind(path, auth): def propfind(path, auth):
""" """
Get a file's Last Modified timestamp and FileID via a PROPFIND request Get a file's Last Modified timestamp and FileID via a PROPFIND request
@ -31,7 +31,7 @@ def propfind(path, auth):
""" """
# do not descend further into subdirectories\ # do not descend further into subdirectories\
# TODO: we could probably be faster if we did # TODO: we could probably be faster if we did
headers = {"Depth": "infinity"} #"1"}
headers = {"Depth": "infinity"} # "1"}
# This body returns only the timelastmodified and the fileid variable # This body returns only the timelastmodified and the fileid variable
requested_data = \ requested_data = \
""" """
@ -83,7 +83,7 @@ def find_valid_version(versions):
# mock entry for comparison # mock entry for comparison
most_recent = {"last_modified": DATE_THRESHOLD} most_recent = {"last_modified": DATE_THRESHOLD}
for version in versions: for version in versions:
if "last_modified" in version and version["last_modified"] > most_recent["last_modified"]:
if "last_modified" in version and most_recent["last_modified"] < version["last_modified"]:
most_recent = version most_recent = version
if most_recent["last_modified"] == DATE_THRESHOLD: if most_recent["last_modified"] == DATE_THRESHOLD:
return None return None
@ -95,7 +95,7 @@ def download_file(path, auth):
This function downloads one file and saves it on the local device. This function downloads one file and saves it on the local device.
:param path: The path to the file in question :param path: The path to the file in question
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: The path of the created file
:return: The path of the created file or '' if no file could be downloaded
""" """
r = requests.request( r = requests.request(
method='get', method='get',
@ -111,17 +111,38 @@ def download_file(path, auth):
return '' return ''
def upload_file(local_path, remote_path, auth):
"""
Uploads a file to the cloud
:param local_path: File path of the file to be uploaded
:param remote_path: Path where it should be uploaded on the cloud
:param auth: Auth data for the HTTP request
:return: True if the file was successfully uploaded, False otherwise
"""
r = requests.put(
url=remote_path,
auth=auth,
data=open(local_path, 'rb').read()
)
if 200 <= r.status_code < 300:
return True
return False
def content_equal(original_entry, fixed_version, auth): def content_equal(original_entry, fixed_version, auth):
""" """
Compares the two file versions for replacement. Compares the two file versions for replacement.
:param original_entry: Filename of the entry with wrong timestamp. :param original_entry: Filename of the entry with wrong timestamp.
:param fixed_version: Filename of the version for comparison with the original. :param fixed_version: Filename of the version for comparison with the original.
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object) :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: True, if they are equal in Metadata and content. False otherwise.
:return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't
be downloaded
""" """
original = download_file(original_entry, auth) original = download_file(original_entry, auth)
fixed = download_file(fixed_version, auth) fixed = download_file(fixed_version, auth)
if original == '' or fixed == '':
# TODO: sth better if the downloading failed?
return False
# shallow comparison # shallow comparison
shallow = filecmp.cmp(original, fixed) shallow = filecmp.cmp(original, fixed)
# deep comparison # deep comparison
@ -137,7 +158,7 @@ def restore_file(packed):
""" """
Handles one file. Searches for the latest older version with intact timestamp and compares them. Handles one file. Searches for the latest older version with intact timestamp and compares them.
:param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments :param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments
are the runtime arguments and euth is for the http authentification
are the runtime arguments and auth is for the http authentification
""" """
entry, arguments, auth = packed entry, arguments, auth = packed
fixed_version = find_valid_version(propfind( fixed_version = find_valid_version(propfind(
@ -145,9 +166,50 @@ def restore_file(packed):
auth)) auth))
if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server + if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server +
fixed_version['path'], auth): fixed_version['path'], auth):
print("Restore from {}".format(fixed_version))
filename = os.path.basename(os.path.normpath(fixed_version['path']))
# print("Restore from {}".format(fixed_version))
restored = restore_by_version(arguments.server + fixed_version['path'], filename, auth, arguments)
else:
# print("Touch file.")
restored = restore_by_touch(arguments.server + entry['path'], auth)
if not restored:
print('File couldn\'t be restored: ' + entry['path'])
def restore_by_version(path_version, temp_id, auth, args):
"""
Restores the given old version of a file
:param path_version: cloud path to the version to be restored
:param temp_id: temporary filename in the restore folder (can't be static because of parallelism)
:param auth: Auth data for the HTTP request
:param args: Runtime arguments
:return: True if the version was successfully restored, false otherwise
"""
headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + arguments.username + "/restore/" + temp_id}
r = requests.request(
method='move',
url=path_version,
auth=auth,
headers=headers
)
if 200 <= r.status_code < 300:
return True
return False
def restore_by_touch(path, auth):
"""
Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again
:param path: The cloud path to the file in question
:param auth: Auth data for the HTTP request
:return: True if the restoring was successful, False otherwise.
"""
local_path = download_file(path, auth)
if local_path != '':
return upload_file(local_path, path, auth)
else: else:
print("Touch file.")
return False
if __name__ == "__main__": if __name__ == "__main__":
@ -167,21 +229,12 @@ if __name__ == "__main__":
auth = requests.auth.HTTPBasicAuth(arguments.username, arguments.password) auth = requests.auth.HTTPBasicAuth(arguments.username, arguments.password)
# Prepare the path we want to use # Prepare the path we want to use
mainpath = FILES_PATH_PREFIX + arguments.username + arguments.search_path mainpath = FILES_PATH_PREFIX + arguments.username + arguments.search_path
# List of all folders we need to enter
folders = [mainpath]
# List of all entries with wrong time # List of all entries with wrong time
wrongtime = [] wrongtime = []
# Iterate through all folders and check for wrong timestamps # Iterate through all folders and check for wrong timestamps
while folders:
url = arguments.server + folders.pop(0)
# print("+", end="", flush=True)
url = arguments.server + mainpath
for entry in propfind(url, auth): for entry in propfind(url, auth):
# print(".", end="", flush=True)
# put directories in search list
# if "resource_type" in entry and "{DAV:}collection" in entry["resource_type"]:
# folders.append(entry["path"])
# put files with wrong date in wrong date list (we don't know what to do if a directory has an invalid date)
if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD: if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD:
wrongtime.append(entry) wrongtime.append(entry)
@ -189,5 +242,5 @@ if __name__ == "__main__":
# Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch # Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch
# done parallel # done parallel
data = [(entry, arguments, auth) for entry in wrongtime] data = [(entry, arguments, auth) for entry in wrongtime]
pool_obj = multiprocessing.Pool(processes=4)
pool_obj = multiprocessing.Pool(processes=8)
results = pool_obj.map(restore_file, data) results = pool_obj.map(restore_file, data)
Loading…
Cancel
Save