Fix broken modification dates in Nextcloud folders
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

161 lines
7.2 KiB

import argparse
import datetime
import time
import xml.etree.ElementTree as ET
import requests
# some constants
FILES_PATH_PREFIX = "/remote.php/dav/files/"
VERSIONS_PATH_PREFIX = "/remote.php/dav/versions/"
# we only need one session for the whole script
session = requests.Session()
def propfind(path, auth):
"""
Get a file's Last Modified timestamp and FileID via a PROPFIND request
:param path: The path of the file in question
:param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
:return: The properties in XML format
"""
headers = {"Depth": "1"}
# This body returns only the timelastmodified and the fileid variable
requested_data = \
"""
<d:propfind xmlns:d=\"DAV:\" xmlns:oc=\"http://owncloud.org/ns\" xmlns:nc=\"http://nextcloud.org/ns\">
<d:prop>
<d:getlastmodified />
<oc:fileid />
</d:prop>
</d:propfind>
"""
req = requests.Request("PROPFIND", path, headers=headers, auth=auth, data=requested_data)
resp = session.send(req.prepare())
print(resp.text)
return resp.text
def search_folder(requestreturn):
"""
Iterates through a folder's properties XML and find entries with invalid timestamps
:param requestreturn: The XML returned by propfind()
:return: A tuple of two lists. The first list contains all subfolders, the second contains all FileIDs of entries
with an invalid timestamp.
"""
# List to collect path of folders stored in path
innerfolders = []
# List to collect path of files with wrong timestamp
linkswrongtime = []
# First folder provided in the xml file is always the folder we are currently in.
# To prevent searching this folder twice, or ending in a loop, we can not store this folder in the folderlist again.
firstfolder = True
# Get a xml tree
tree = ET.ElementTree(ET.fromstring(requestreturn))
# Find all responses in the tree, those contain the fielpath, lasttimemodified, typeoffile etc.
for resp in tree.findall('.//{DAV:}response'):
# Here we can get the filpath out of href and get further information in prop (lastimemodified, typeoffile, etc.)
for p in resp:
# In case p.text is not none, it contains the filepath
if not (p.text is None):
if (p.text[-1] == '/'):
# If the current object is a folder, check it its not first folder
if not (firstfolder):
innerfolders.append(p.text)
else:
firstfolder = False
break
# In case p.text is none, it contains the further information
else:
for t in p.findall('.//{DAV:}getlastmodified'):
# this function converts the given date to unix timestamp
lastmodified = time.mktime(
datetime.datetime.strptime(t.text, "%a, %d %b %Y %H:%M:%S GMT").timetuple())
# 631148400 is the unix timestamp of 01.01.1990 00:00:00, because we know there is no file older
# than this in our nextcloud
if lastmodified < 631148400:
for fileid in p.findall('.//{http://owncloud.org/ns}fileid'):
linkswrongtime.append(fileid.text)
return (innerfolders, linkswrongtime)
def version_check(xmlfile):
"""
This function returns the fileid of the version of a given fileid with the most current timestamp or None if
there are no versions with a timestamp younger than 01.01.1990
:param xmlfile: An XML file with Last Modified timestamps and FileIDs as returned by propfind()
:return: The FileID of the most recent version or None if no valid version exists
"""
tree = ET.ElementTree(ET.fromstring(xmlfile))
# Name of files are stored as a string. But we can check the timestamp of the file only after we can check the name,
# so it needs to be stored temporary in case the timestamp is the most current
temp = "this is a temporary string"
# These two variables are used to store the highest/most current timestamp and the associated fileid
most_current_timestamp = 631148400
most_current_timestamp_fileid = 0
for resp in tree.findall('.//{DAV:}response'):
for p in resp:
# In case p.text is not none, it contains the filepath
if not (p.text is None):
temp = p.text
else:
for t in p.findall('.//{DAV:}getlastmodified'):
if not (t.text is None):
# this function converts the given date to unix timestamp
lastmodified = time.mktime(
datetime.datetime.strptime(t.text, "%a, %d %b %Y %H:%M:%S GMT").timetuple())
#
if lastmodified > most_current_timestamp:
most_current_timestamp = lastmodified
fileid_old_version = temp.split('/')
most_current_timestamp_fileid = fileid_old_version[-1]
else:
break
# Check if there is another version and a file with a current timestamp
if most_current_timestamp_fileid != 0:
return most_current_timestamp_fileid
else:
return None
if __name__ == "__main__":
# get all necessary data from the command line
argparser = argparse.ArgumentParser(description="Fix broken dates in Nextcloud folders.")
argparser.add_argument("server", help="The base URL of the Nextcloud server.")
argparser.add_argument("username", help="The user to log in as.")
argparser.add_argument("password", help="The password for accessing Nextcloud. Hint: Use an App Token!")
argparser.add_argument(
"-p", "--path",
default="/",
help="The path to search, relative to the user's root. Default: /",
dest="search_path"
)
arguments = argparser.parse_args()
# Prepare HTTP Basic Authentication
auth = requests.auth.HTTPBasicAuth(arguments.username, arguments.password)
# Prepare the path we want to use
mainpath = FILES_PATH_PREFIX + arguments.username + arguments.search_path
# List of all folders we need to enter
folders = [mainpath]
# List of all fileids with wrong time
wrongtime = []
# Iterate through all folders and check for wrong timestamps
while folders:
path_suffix = folders.pop(0)
path = arguments.server + str(path_suffix)
r = propfind(path, auth)
new_folders, new_wrongtime = search_folder(r)
# Append all found folders and files with wrong timestamps to global list
folders += new_folders
wrongtime += new_wrongtime
# Iterate through all fileids with wrong timestamps and check for versions with intact timestamp
while wrongtime:
fileid = wrongtime.pop(0)
version_suffix = VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + fileid
version_path = arguments.server + version_suffix
versions = propfind(version_path, auth)
mrv = version_check(versions)
print(fileid, end=": ")
print(mrv)