Browse Source

Added first version for file comparing (much too slow, try parallelism next)

main
Bianca Steffes 2 years ago
parent
commit
fe770b0d0a
  1. 73
      fix_dates.py

73
fix_dates.py

@ -1,5 +1,8 @@
import argparse
import datetime
import filecmp
import os
import tempfile
import time
import urllib.parse
import sys
@ -7,7 +10,6 @@ import xml.etree.ElementTree as ET
import requests
# some constants
FILES_PATH_PREFIX = "/remote.php/dav/files/"
VERSIONS_PATH_PREFIX = "/remote.php/dav/versions/"
@ -17,6 +19,7 @@ DATE_THRESHOLD = datetime.datetime(1990, 1, 1)
SESSION = requests.Session()
def propfind(path, auth):
"""
Get a file's Last Modified timestamp and FileID via a PROPFIND request
@ -86,6 +89,50 @@ def find_valid_version(versions):
return most_recent
def download_file(path, auth):
"""
This function downloads one file and saves it on the local device.
:param path: The path to the file in question
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: The path of the created file
"""
r = requests.request(
method='get',
url=path,
auth=auth
)
_, filename = os.path.split(path)
if r.status_code == 200:
with open(filename, 'wb') as file:
file.write(r.content)
return filename
return ''
def content_equal(original_entry, fixed_version, auth):
"""
Compares the two file versions for replacement.
:param original_entry: Filename of the entry with wrong timestamp.
:param fixed_version: Filename of the version for comparison with the original.
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: True, if they are equal in Metadata anc content. False otherwise.
"""
original = download_file(original_entry, auth)
fixed = download_file(fixed_version, auth)
# shallow comparison
shallow = filecmp.cmp(original, fixed)
# deep comparison
deep = filecmp.cmp(original, fixed, shallow=False)
if deep != shallow:
print(deep)
os.remove(original)
os.remove(fixed)
return deep and shallow
#TODO: parallelisieren
if __name__ == "__main__":
# get all necessary data from the command line
argparser = argparse.ArgumentParser(description="Fix broken dates in Nextcloud folders.")
@ -108,6 +155,10 @@ if __name__ == "__main__":
# List of all entries with wrong time
wrongtime = []
restore_coumt = 0
fixed_count = 0
touch_count = 0
# Iterate through all folders and check for wrong timestamps
while folders:
url = arguments.server + folders.pop(0)
@ -120,13 +171,23 @@ if __name__ == "__main__":
# put files with wrong date in wrong date list (we don't know what to do if a directory has an invalid date)
elif "last_modified" in entry and entry["last_modified"] < DATE_THRESHOLD:
wrongtime.append(entry)
# Iterate through all fileids with wrong timestamps and check for versions with intact timestamp
# Iterate through all fileids with wrong timestamps and check for versions with intact timestamp
print()
# NOTE: you can indent this into the loop above to fix things on-the-fly instead of all at once
# TODO: indented for quicker access to examples
for entry in wrongtime:
print(urllib.parse.unquote(entry["path"][len(FILES_PATH_PREFIX):]))
fixed_version = find_valid_version(propfind(arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]), auth))
# print(urllib.parse.unquote(entry["path"][len(FILES_PATH_PREFIX):]))
fixed_version = find_valid_version(propfind(
arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]),
auth))
if fixed_version:
print("Restore from {}".format(fixed_version))
fixed_count+=1
if fixed_version and content_equal(arguments.server + entry['path'], arguments.server + fixed_version['path'], auth):
# print("Restore from {}".format(fixed_version))
restore_coumt +=1
else:
print("Touch file.")
touch_count +=1
# print("Touch file.")
print(restore_coumt)
print(touch_count)
Loading…
Cancel
Save