Browse Source

Parallel Version implemented

still very slow....
main
Bianca Steffes 3 years ago
parent
commit
e0f15cb351
  1. 62
      fix_dates.py

62
fix_dates.py

@ -1,6 +1,7 @@
import argparse
import datetime
import filecmp
import multiprocessing
import os
import tempfile
import time
@ -30,7 +31,7 @@ def propfind(path, auth):
"""
# do not descend further into subdirectories\
# TODO: we could probably be faster if we did
headers = {"Depth": "1"}
headers = {"Depth": "infinity"} #"1"}
# This body returns only the timelastmodified and the fileid variable
requested_data = \
"""
@ -116,7 +117,7 @@ def content_equal(original_entry, fixed_version, auth):
:param original_entry: Filename of the entry with wrong timestamp.
:param fixed_version: Filename of the version for comparison with the original.
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: True, if they are equal in Metadata anc content. False otherwise.
:return: True, if they are equal in Metadata and content. False otherwise.
"""
original = download_file(original_entry, auth)
fixed = download_file(fixed_version, auth)
@ -132,7 +133,23 @@ def content_equal(original_entry, fixed_version, auth):
return deep and shallow
#TODO: parallelisieren
def restore_file(packed):
"""
Handles one file. Searches for the latest older version with intact timestamp and compares them.
:param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments
are the runtime arguments and euth is for the http authentification
"""
entry, arguments, auth = packed
fixed_version = find_valid_version(propfind(
arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]),
auth))
if fixed_version is not None and content_equal(arguments.server + entry['path'], arguments.server +
fixed_version['path'], auth):
print("Restore from {}".format(fixed_version))
else:
print("Touch file.")
if __name__ == "__main__":
# get all necessary data from the command line
argparser = argparse.ArgumentParser(description="Fix broken dates in Nextcloud folders.")
@ -155,39 +172,22 @@ if __name__ == "__main__":
# List of all entries with wrong time
wrongtime = []
restore_coumt = 0
fixed_count = 0
touch_count = 0
# Iterate through all folders and check for wrong timestamps
while folders:
url = arguments.server + folders.pop(0)
print("+", end="", flush=True)
# print("+", end="", flush=True)
for entry in propfind(url, auth):
print(".", end="", flush=True)
# print(".", end="", flush=True)
# put directories in search list
if "resource_type" in entry and "{DAV:}collection" in entry["resource_type"]:
folders.append(entry["path"])
# if "resource_type" in entry and "{DAV:}collection" in entry["resource_type"]:
# folders.append(entry["path"])
# put files with wrong date in wrong date list (we don't know what to do if a directory has an invalid date)
elif "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD:
if "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD:
wrongtime.append(entry)
# Iterate through all fileids with wrong timestamps and check for versions with intact timestamp
print()
# NOTE: you can indent this into the loop above to fix things on-the-fly instead of all at once
# TODO: indented for quicker access to examples
for entry in wrongtime:
# print(urllib.parse.unquote(entry["path"][len(FILES_PATH_PREFIX):]))
fixed_version = find_valid_version(propfind(
arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]),
auth))
if fixed_version:
fixed_count+=1
if fixed_version and content_equal(arguments.server + entry['path'], arguments.server + fixed_version['path'], auth):
# print("Restore from {}".format(fixed_version))
restore_coumt +=1
else:
touch_count +=1
# print("Touch file.")
print(restore_coumt)
print(touch_count)
# Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch
# done parallel
data = [(entry, arguments, auth) for entry in wrongtime]
pool_obj = multiprocessing.Pool(processes=4)
results = pool_obj.map(restore_file, data)
Loading…
Cancel
Save