img_utils.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. #!/usr/bin/env python
  2. # Licensed to the Apache Software Foundation (ASF) under one
  3. # or more contributor license agreements. See the NOTICE file
  4. # distributed with this work for additional information
  5. # regarding copyright ownership. The ASF licenses this file
  6. # to you under the Apache License, Version 2.0 (the
  7. # "License"); you may not use this file except in compliance
  8. # with the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing,
  13. # software distributed under the License is distributed on an
  14. # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. # KIND, either express or implied. See the License for the
  16. # specific language governing permissions and limitations
  17. # under the License.
  18. """Utils for documentation's images."""
  19. import argparse
  20. import logging
  21. import re
  22. from pathlib import Path
  23. from typing import Set, Tuple
  24. log = logging.getLogger(__file__)
  25. log.addHandler(logging.StreamHandler())
  26. root_dir: Path = Path(__file__).parent
  27. img_dir: Path = root_dir.joinpath("img")
  28. doc_dir: Path = root_dir.joinpath("docs")
  29. expect_img_types: Set = {
  30. "jpg",
  31. "png",
  32. }
  33. def build_pattern() -> re.Pattern:
  34. """Build current document image regexp pattern."""
  35. return re.compile(f"(/img.*?\\.({'|'.join(expect_img_types)}))")
  36. def get_files_recurse(path: Path) -> Set:
  37. """Get all files recursively from given :param:`path`."""
  38. res = set()
  39. for p in path.rglob("*"):
  40. if p.is_dir():
  41. continue
  42. res.add(p)
  43. return res
  44. def get_paths_uniq_suffix(paths: Set[Path]) -> Set:
  45. """Get file suffix without dot in given :param:`paths`."""
  46. res = set()
  47. for path in paths:
  48. if path.suffix == "":
  49. log.warning("There is a path %s without suffix.", path)
  50. res.add(path.suffix[1:])
  51. return res
  52. def get_paths_rel_path(paths: Set[Path], rel: Path) -> Set:
  53. """Get files relative path to :param:`rel` with ``/`` prefix from given :param:`paths`."""
  54. return {f"/{path.relative_to(rel)}" for path in paths}
  55. def get_docs_img_path(paths: Set[Path], pattern: re.Pattern) -> Set:
  56. """Get all img syntax from given :param:`paths` using the regexp from :param:`pattern`."""
  57. res = set()
  58. for path in paths:
  59. content = path.read_text()
  60. find = pattern.findall(content)
  61. if find:
  62. res |= {item[0] for item in find}
  63. return res
  64. def del_rel_path(paths: Set[str]) -> None:
  65. """Delete all relative :param:`paths` from current root/docs directory."""
  66. for path in paths:
  67. log.debug("Deleting file in the path %s", path)
  68. root_dir.joinpath(path.lstrip("/")).unlink()
  69. def del_empty_dir_recurse(path: Path) -> None:
  70. """Delete all empty directory recursively from given :param:`paths`."""
  71. for p in path.rglob("*"):
  72. if p.is_dir() and not any(p.iterdir()):
  73. log.debug("Deleting directory in the path %s", p)
  74. p.rmdir()
  75. def diff_two_set(first: Set, second: Set) -> Tuple[set, set]:
  76. """Get two set difference tuple.
  77. :return: Tuple[(first - second), (second - first)]
  78. """
  79. return first.difference(second), second.difference(first)
  80. def check_diff_img_type() -> Tuple[set, set]:
  81. """Check images difference type.
  82. :return: Tuple[(actual - expect), (expect - actual)]
  83. """
  84. img = get_files_recurse(img_dir)
  85. img_suffix = get_paths_uniq_suffix(img)
  86. return diff_two_set(img_suffix, expect_img_types)
  87. def check_diff_img() -> Tuple[set, set]:
  88. """Check images difference files.
  89. :return: Tuple[(in_docs - in_img_dir), (in_img_dir - in_docs)]
  90. """
  91. img = get_files_recurse(img_dir)
  92. docs = get_files_recurse(doc_dir)
  93. img_rel_path = get_paths_rel_path(img, root_dir)
  94. pat = build_pattern()
  95. docs_rel_path = get_docs_img_path(docs, pat)
  96. return diff_two_set(docs_rel_path, img_rel_path)
  97. def check() -> None:
  98. """Runner for `check` sub command."""
  99. img_type_act, img_type_exp = check_diff_img_type()
  100. assert not img_type_act and not img_type_exp, (
  101. f"Images type assert failed: \n"
  102. f"* difference actual types to expect is: {img_type_act if img_type_act else 'None'}\n"
  103. f"* difference expect types to actual is: {img_type_exp if img_type_exp else 'None'}\n"
  104. )
  105. img_docs, img_img = check_diff_img()
  106. assert not img_docs and not img_img, (
  107. f"Images assert failed: \n"
  108. f"* difference `docs` imgs to `img` is: {img_docs if img_docs else 'None'}\n"
  109. f"* difference `img` imgs to `docs` is: {img_img if img_img else 'None'}\n"
  110. )
  111. def prune() -> None:
  112. """Runner for `prune` sub command."""
  113. _, img_img = check_diff_img()
  114. del_rel_path(img_img)
  115. del_empty_dir_recurse(img_dir)
  116. def build_argparse() -> argparse.ArgumentParser:
  117. """Build argparse.ArgumentParser with specific configuration."""
  118. parser = argparse.ArgumentParser(prog="img_utils")
  119. parser.add_argument(
  120. "-v",
  121. "--verbose",
  122. dest="log_level",
  123. action="store_const",
  124. const=logging.DEBUG,
  125. default=logging.INFO,
  126. help="Show verbose or not.",
  127. )
  128. subparsers = parser.add_subparsers(
  129. title="subcommands",
  130. dest="subcommand",
  131. help="Choose one of the subcommand you want to run.",
  132. )
  133. parser_check = subparsers.add_parser(
  134. "check", help="Check whether invalid or missing img exists."
  135. )
  136. parser_check.set_defaults(func=check)
  137. parser_prune = subparsers.add_parser(
  138. "prune", help="Remove img in directory `img` but not use in directory `docs`."
  139. )
  140. parser_prune.set_defaults(func=prune)
  141. # TODO Add subcommand `reorder`
  142. return parser
  143. if __name__ == "__main__":
  144. arg_parser = build_argparse()
  145. args = arg_parser.parse_args()
  146. # args = arg_parser.parse_args(["check"])
  147. log.setLevel(args.log_level)
  148. if args.log_level <= logging.DEBUG:
  149. print("All args is:", args)
  150. args.func()