Fix broken modification dates in Nextcloud folders
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
9.3 KiB

2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
2 years ago
  1. import argparse
  2. import datetime
  3. import filecmp
  4. import multiprocessing
  5. import os
  6. import tempfile
  7. import time
  8. import urllib.parse
  9. import sys
  10. import uuid
  11. import xml.etree.ElementTree as ET
  12. from pathlib import Path
  13. import requests
  14. # some constants
  15. FILES_PATH_PREFIX = "/remote.php/dav/files/"
  16. VERSIONS_PATH_PREFIX = "/remote.php/dav/versions/"
  17. # the threshold for file timestamps (dates older than this are considered invalid)
  18. DATE_THRESHOLD = datetime.datetime(1990, 1, 1)
  19. # we only need one session for the whole script
  20. SESSION = requests.Session()
  21. def propfind(path, auth):
  22. """
  23. Get a file's Last Modified timestamp and FileID via a PROPFIND request
  24. :param path: The path of the file in question
  25. :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
  26. :return: An iterator of dictionaries, one for every directory entry. Entry properties are taken from the PROPFIND
  27. response
  28. """
  29. # do not descend further into subdirectories\
  30. # TODO: we could probably be faster if we did
  31. headers = {"Depth": "infinity"} # "1"}
  32. # This body returns only the timelastmodified and the fileid variable
  33. requested_data = \
  34. """
  35. <d:propfind xmlns:d=\"DAV:\" xmlns:oc=\"http://owncloud.org/ns\" xmlns:nc=\"http://nextcloud.org/ns\">
  36. <d:prop>
  37. <d:getlastmodified />
  38. <d:resourcetype />
  39. <oc:fileid />
  40. </d:prop>
  41. </d:propfind>
  42. """
  43. req = requests.Request("PROPFIND", path, headers=headers, auth=auth, data=requested_data)
  44. resp = SESSION.send(req.prepare())
  45. et = ET.fromstring(resp.text)
  46. for dav_response in et.findall('{DAV:}response'):
  47. entry = {}
  48. entry["path"] = dav_response.find("{DAV:}href").text
  49. # skip this entry itself
  50. if path.endswith(entry["path"]):
  51. continue
  52. props = dav_response.find("{DAV:}propstat").find("{DAV:}prop")
  53. try:
  54. entry["last_modified"] = datetime.datetime.strptime(
  55. props.find("{DAV:}getlastmodified").text,
  56. "%a, %d %b %Y %H:%M:%S GMT"
  57. )
  58. except (AttributeError, TypeError):
  59. pass
  60. entry["resource_type"] = []
  61. try:
  62. for resourcetype in props.find("{DAV:}resourcetype"):
  63. entry["resource_type"].append(resourcetype.tag)
  64. except (AttributeError, TypeError):
  65. pass
  66. try:
  67. entry["file_id"] = int(props.find("{http://owncloud.org/ns}fileid").text)
  68. except (AttributeError, TypeError):
  69. pass
  70. yield entry
  71. def find_valid_version(versions):
  72. """
  73. This function returns the fileid of the version of a given fileid with the most current timestamp or None if
  74. there are no versions with a timestamp younger than the threshold
  75. :param versions: An iterator as returned by propfind()
  76. :return: The entry of the iterator which has the most recent date or None if none exists
  77. """
  78. all_versions = {}
  79. for version in versions:
  80. if "last_modified" in version and DATE_THRESHOLD < version["last_modified"]:
  81. all_versions[version["last_modified"]] = version
  82. if len(all_versions) == 0:
  83. return None
  84. return all_versions
  85. def download_file(path, auth):
  86. """
  87. This function downloads one file and saves it on the local device.
  88. :param path: The path to the file in question
  89. :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
  90. :return: The path of the created file or '' if no file could be downloaded
  91. """
  92. r = requests.request(
  93. method='get',
  94. url=path,
  95. auth=auth
  96. )
  97. _, filename = os.path.split(path)
  98. if r.status_code == 200:
  99. with open(filename, 'wb') as file:
  100. file.write(r.content)
  101. return filename
  102. return ''
  103. def upload_file(local_path, remote_path, auth):
  104. """
  105. Uploads a file to the cloud
  106. :param local_path: File path of the file to be uploaded
  107. :param remote_path: Path where it should be uploaded on the cloud
  108. :param auth: Auth data for the HTTP request
  109. :return: True if the file was successfully uploaded, False otherwise
  110. """
  111. r = requests.put(
  112. url=remote_path,
  113. auth=auth,
  114. data=open(local_path, 'rb').read()
  115. )
  116. if 200 <= r.status_code < 300:
  117. return True
  118. return False
  119. def content_equal(original, fixed_version, auth):
  120. """
  121. Compares the two file versions for replacement.
  122. :param original: local filename of the entry with wrong timestamp.
  123. :param fixed_version: Filename of the version for comparison with the original.
  124. :param auth: Auth data for the HTTP request (e.g. a requests.auth.HTTPBasicAuth object)
  125. :return: True, if they are equal in Metadata and content. False otherwise. Also if one or both files couldn't
  126. be downloaded
  127. """
  128. fixed = download_file(fixed_version, auth)
  129. if original == '' or fixed == '':
  130. # TODO: sth better if the downloading failed?
  131. return False
  132. # shallow comparison
  133. shallow = filecmp.cmp(original, fixed)
  134. # deep comparison
  135. deep = filecmp.cmp(original, fixed, shallow=False)
  136. os.remove(fixed)
  137. return deep and shallow
  138. def restore_file(packed):
  139. """
  140. Handles one file. Searches for the latest older version with intact timestamp and compares them.
  141. :param packed: data needed for one file: (entry, arguments, auth). entry represents the original file, arguments
  142. are the runtime arguments and auth is for the http authentification
  143. """
  144. entry, arguments, auth = packed
  145. fixed_versions = find_valid_version(propfind(
  146. arguments.server + VERSIONS_PATH_PREFIX + arguments.username + "/versions/" + str(entry["file_id"]),
  147. auth))
  148. original = download_file(arguments.server + entry['path'], auth)
  149. restored = False
  150. if fixed_versions is not None and len(fixed_versions) > 0:
  151. keys = sorted(fixed_versions, reverse=True ) # sort dates descending to start with latest version
  152. for i in range(0, len(keys)):
  153. if content_equal(original, arguments.server + fixed_versions[keys[i]]['path'], auth):
  154. # found latest matching version
  155. # print("Restore from {}".format(fixed_versions[keys[i]]))
  156. restored = restore_by_version(arguments.server + fixed_versions[keys[i]]['path'], auth, arguments)
  157. break # stop looking any further
  158. if not restored:
  159. # print("Touch file.")
  160. restored = restore_by_touch(arguments.server + entry['path'], original, auth)
  161. if not restored:
  162. print('File couldn\'t be restored: ' + entry['path'])
  163. os.remove(original)
  164. def restore_by_version(path_version, auth, args):
  165. """
  166. Restores the given old version of a file
  167. :param path_version: cloud path to the version to be restored
  168. :param auth: Auth data for the HTTP request
  169. :param args: Runtime arguments
  170. :return: True if the version was successfully restored, false otherwise
  171. """
  172. # uuid4 should create a random uuid
  173. headers = {"Destination": args.server + VERSIONS_PATH_PREFIX + args.username + "/restore/" + str(uuid.uuid4())}
  174. r = requests.request(
  175. method='move',
  176. url=path_version,
  177. auth=auth,
  178. headers=headers
  179. )
  180. if 200 <= r.status_code < 300:
  181. return True
  182. return False
  183. def restore_by_touch(path, local_path, auth):
  184. """
  185. Restores a file by touch: Touch on cloud isn't possible so the file is simply downloaded and uploaded again
  186. :param path: The cloud path to the file in question
  187. :param local_path: The local path to the file in question
  188. :param auth: Auth data for the HTTP request
  189. :return: True if the restoring was successful, False otherwise.
  190. """
  191. if local_path != '':
  192. return upload_file(local_path, path, auth)
  193. else:
  194. return False
  195. if __name__ == "__main__":
  196. # get all necessary data from the command line
  197. argparser = argparse.ArgumentParser(description="Fix broken dates in Nextcloud folders.")
  198. argparser.add_argument("server", help="The base URL of the Nextcloud server.")
  199. argparser.add_argument("username", help="The user to log in as.")
  200. argparser.add_argument("password", help="The password for accessing Nextcloud. Hint: Use an App Token!")
  201. argparser.add_argument(
  202. "-p", "--path",
  203. default="/",
  204. help="The path to search, relative to the user's root. Default: /",
  205. dest="search_path"
  206. )
  207. arguments = argparser.parse_args()
  208. # Prepare HTTP Basic Authentication
  209. auth = requests.auth.HTTPBasicAuth(arguments.username, arguments.password)
  210. # Prepare the path we want to use
  211. mainpath = FILES_PATH_PREFIX + arguments.username + arguments.search_path
  212. # List of all entries with wrong time
  213. wrongtime = []
  214. # Iterate through all folders and check for wrong timestamps
  215. url = arguments.server + mainpath
  216. for entry in propfind(url, auth):
  217. if "last_modified" not in entry or entry["last_modified"] < DATE_THRESHOLD:
  218. wrongtime.append(entry)
  219. print()
  220. # Iterate through all fileids with wrong timestamps and replace with versions with intact timestamp or touch
  221. # done parallel
  222. data = [(entry, arguments, auth) for entry in wrongtime]
  223. pool_obj = multiprocessing.Pool(processes=1)
  224. results = pool_obj.map(restore_file, data)