diff --git a/trying_to_fix_nextcloud.py b/trying_to_fix_nextcloud.py index cbb6b89..725946a 100644 --- a/trying_to_fix_nextcloud.py +++ b/trying_to_fix_nextcloud.py @@ -1,116 +1,121 @@ -import requests -import xml.etree.ElementTree as ET -import sys -import time -import datetime - -# Prepare and send the propfind request return xml string -def propfind(path,auth): - headers = {"Depth":"1"} - # This body returns only the timelastmodified and the fileid variable - data = ""+""+""+ ""+""+"" - req = r = requests.Request("PROPFIND", path, headers=headers, auth=auth, data=data) - prepped = req.prepare() - - s = requests.Session() - resp = s.send(prepped) - return resp.text - -# Function iterates through the given xml which contains all files and folders in the provided path -def search_folder(requestreturn): - # List to collect path of folders stored in path - innerfolders = [] - # List to collect path of files with wrong timestamp - linkswrongtime = [] - # First folder provided in the xml file is always the folder we are currently in. - # To prevent searching this folder twice, or ending in a loop, we can not store this folder in the folderlist again. - firstfolder = True - # Get a xml tree - tree = ET.ElementTree(ET.fromstring(requestreturn)) - # Find all responses in the tree, those contain the fielpath, lasttimemodified, typeoffile etc. - for resp in tree.findall('.//{DAV:}response'): - # Here we can get the filpath out of href and get further information in prop (lastimemodified, typeoffile, etc.) - for p in resp: - # In case p.text is not none, it contains the filepath - if not(p.text is None): - if (p.text[-1] == '/'): - # If the current object is a folder, check it its not first folder - if not(firstfolder): - innerfolders.append(p.text) - else: - firstfolder = False - break - # In case p.text is none, it contains the further information - else: - for t in p.findall('.//{DAV:}getlastmodified'): - #this function converts the given date to unix timestamp - lastmodified = time.mktime(datetime.datetime.strptime(t.text, "%a, %d %b %Y %H:%M:%S GMT").timetuple()) - #631148400 is the unix timestamp of 01.01.1990 00:00:00, because we know there is no file older than this in our nextcloud system - if lastmodified < 631148400: - for fileid in p.findall('.//{http://owncloud.org/ns}fileid'): - linkswrongtime.append(fileid.text) - return [innerfolders, linkswrongtime] - -# This function returns the fileid of the version of a given fileid with the most current timestamp or None if there are no versions with a timestamp younger than 01.01.1990 -def version_check(xmlfile): - tree = ET.ElementTree(ET.fromstring(xmlfile)) - # Name of files are stored as a string. But we can check the timestamp of the file only after we can check the name, - # so it needs to be stored temporary in case the timestamp is the most current - temp = "this is a temporary string" - # These two variables are used to store the highest/most current timestamp and the associated fileid - most_current_timestamp = 631148400 - most_current_timestamp_fileid = 0 - for resp in tree.findall('.//{DAV:}response'): - for p in resp: - # In case p.text is not none, it contains the filepath - if not(p.text is None): - temp = p.text - else: - for t in p.findall('.//{DAV:}getlastmodified'): - if not(t.text is None): - #this function converts the given date to unix timestamp - lastmodified = time.mktime(datetime.datetime.strptime(t.text, "%a, %d %b %Y %H:%M:%S GMT").timetuple()) - # - if lastmodified > most_current_timestamp: - most_current_timestamp = lastmodified - fileid_old_version = temp.split('/') - most_current_timestamp_fileid = fileid_old_version[-1] - else: - break - # Check if there is another version and a file with a current timestamp - if most_current_timestamp_fileid != 0: - return most_current_timestamp_fileid - else: - return None - -def main(): - # Enter username and password to enter nextcloud via webdav - user = sys.argv[1] - passw = sys.argv[2] - auth = requests.auth.HTTPBasicAuth(user,passw) - # Prepare the path we want to use - prefix_path = "https://kingsx.cs.uni-saarland.de" - mainpath = "/remote.php/dav/files/"+ user + "/Testrequests/" - # List of all folderpaths we need to enter - folders = [mainpath] - # List of all fileids with wrong time - wrongtime = [] - - # Iterate through all folders and check for wrong timestamps - while folders: - path_suffix = folders.pop(0) - path = prefix_path + str(path_suffix) - r = propfind(path,auth) - res = search_folder(r) - # Append all found folders and files with wrong timestamps to global list - folders = folders + res[0] - wrongtime = wrongtime + res[1] - # Iterate through all fileids with wrong timestamps and check for versions with intact timestamp - while wrongtime: - fileid = wrongtime.pop(0) - version_suffix = "/remote.php/dav/versions/" + user + "/versions/" + fileid - version_path = prefix_path + version_suffix - versions = propfind(version_path,auth) - print(version_check(versions)) - -main() +import datetime +import sys +import time +import xml.etree.ElementTree as ET + +import requests + + +# Prepare and send the propfind request return xml string +def propfind(path, auth): + headers = {"Depth": "1"} + # This body returns only the timelastmodified and the fileid variable + data = "" + "" + "" + "" + "" + "" + req = r = requests.Request("PROPFIND", path, headers=headers, auth=auth, data=data) + prepped = req.prepare() + + s = requests.Session() + resp = s.send(prepped) + return resp.text + + +# Function iterates through the given xml which contains all files and folders in the provided path +def search_folder(requestreturn): + # List to collect path of folders stored in path + innerfolders = [] + # List to collect path of files with wrong timestamp + linkswrongtime = [] + # First folder provided in the xml file is always the folder we are currently in. + # To prevent searching this folder twice, or ending in a loop, we can not store this folder in the folderlist again. + firstfolder = True + # Get a xml tree + tree = ET.ElementTree(ET.fromstring(requestreturn)) + # Find all responses in the tree, those contain the fielpath, lasttimemodified, typeoffile etc. + for resp in tree.findall('.//{DAV:}response'): + # Here we can get the filpath out of href and get further information in prop (lastimemodified, typeoffile, etc.) + for p in resp: + # In case p.text is not none, it contains the filepath + if not (p.text is None): + if (p.text[-1] == '/'): + # If the current object is a folder, check it its not first folder + if not (firstfolder): + innerfolders.append(p.text) + else: + firstfolder = False + break + # In case p.text is none, it contains the further information + else: + for t in p.findall('.//{DAV:}getlastmodified'): + # this function converts the given date to unix timestamp + lastmodified = time.mktime( + datetime.datetime.strptime(t.text, "%a, %d %b %Y %H:%M:%S GMT").timetuple()) + # 631148400 is the unix timestamp of 01.01.1990 00:00:00, because we know there is no file older than this in our nextcloud system + if lastmodified < 631148400: + for fileid in p.findall('.//{http://owncloud.org/ns}fileid'): + linkswrongtime.append(fileid.text) + return [innerfolders, linkswrongtime] + + +# This function returns the fileid of the version of a given fileid with the most current timestamp or None if there are no versions with a timestamp younger than 01.01.1990 +def version_check(xmlfile): + tree = ET.ElementTree(ET.fromstring(xmlfile)) + # Name of files are stored as a string. But we can check the timestamp of the file only after we can check the name, + # so it needs to be stored temporary in case the timestamp is the most current + temp = "this is a temporary string" + # These two variables are used to store the highest/most current timestamp and the associated fileid + most_current_timestamp = 631148400 + most_current_timestamp_fileid = 0 + for resp in tree.findall('.//{DAV:}response'): + for p in resp: + # In case p.text is not none, it contains the filepath + if not (p.text is None): + temp = p.text + else: + for t in p.findall('.//{DAV:}getlastmodified'): + if not (t.text is None): + # this function converts the given date to unix timestamp + lastmodified = time.mktime( + datetime.datetime.strptime(t.text, "%a, %d %b %Y %H:%M:%S GMT").timetuple()) + # + if lastmodified > most_current_timestamp: + most_current_timestamp = lastmodified + fileid_old_version = temp.split('/') + most_current_timestamp_fileid = fileid_old_version[-1] + else: + break + # Check if there is another version and a file with a current timestamp + if most_current_timestamp_fileid != 0: + return most_current_timestamp_fileid + else: + return None + + +if __name__ == "__main__": + # Enter username and password to enter nextcloud via webdav + user = sys.argv[1] + passw = sys.argv[2] + auth = requests.auth.HTTPBasicAuth(user, passw) + # Prepare the path we want to use + prefix_path = "https://kingsx.cs.uni-saarland.de" + mainpath = "/remote.php/dav/files/" + user + "/Testrequests/" + # List of all folderpaths we need to enter + folders = [mainpath] + # List of all fileids with wrong time + wrongtime = [] + + # Iterate through all folders and check for wrong timestamps + while folders: + path_suffix = folders.pop(0) + path = prefix_path + str(path_suffix) + r = propfind(path, auth) + res = search_folder(r) + # Append all found folders and files with wrong timestamps to global list + folders = folders + res[0] + wrongtime = wrongtime + res[1] + # Iterate through all fileids with wrong timestamps and check for versions with intact timestamp + while wrongtime: + fileid = wrongtime.pop(0) + version_suffix = "/remote.php/dav/versions/" + user + "/versions/" + fileid + version_path = prefix_path + version_suffix + versions = propfind(version_path, auth) + print(version_check(versions))