Spaces:

malvika2003
/

openvino_notebooks

Runtime error

App Files Files Community

openvino_notebooks / .ci /table_of_content.py

malvika2003

Upload folder using huggingface_hub

db5855f verified over 1 year ago

raw

history blame contribute delete

5.62 kB

	import json
	import pathlib
	import argparse
	import re

	TABLE_OF_CONTENT = r"#+\s+Table of content:?"


	def find_tc_in_cell(cell):
	tc_cell = None
	tc_line_number = None
	for i, line in enumerate(cell["source"]):
	if re.match(TABLE_OF_CONTENT, line):
	tc_cell = cell
	tc_line_number = i
	break

	return tc_cell, tc_line_number


	def create_title_for_tc(title):
	title_for_tc = title.lstrip("#").lstrip()
	title_for_tc = re.sub(r"[\[\]\n]", "", title_for_tc)
	title_for_tc = re.sub(r"$http.*$", "", title_for_tc)

	return title_for_tc


	def create_link_for_tc(title):
	link = re.sub(r"[`$^]", "", title)
	link = link.replace(" ", "-")

	return link


	def remove_old_tc(cell, idx):
	if cell is not None:
	for line in cell["source"][idx:]:
	if re.match(r"\s-\s\[.\]$#.$.*", line) or re.match(TABLE_OF_CONTENT, line):
	cell["source"].remove(line)
	return cell


	def get_tc_line(title, title_for_tc, link, tc_list, titles_list):
	# calc indents for Table of content
	try:
	indents_num = (title.index(" ") - 2) * 4
	except:
	indents_num = -1

	if len(tc_list) == 0 or indents_num < 0:
	# when first list item have more than 1 indents the alignment would be broken
	indents_num = 0
	elif indents_num - tc_list[-1].index("-") > 4:
	# when previous list item have n indents and current have n+4+1 it broke the alignment
	indents_num = tc_list[-1].index("-") + 4
	elif indents_num != tc_list[-1].index("-") and title.index(" ") == titles_list[-1].index(" "):
	# when we have several titles with same wrong alignments
	indents_num = tc_list[-1].index("-")

	indents = " " * indents_num + "-" + " "
	line = f"{indents}[{title_for_tc}](#{link})\n"

	return line


	def is_ref_to_top_exists(cell, idx):
	ref_exists = False
	for row in cell[idx + 1 :]:
	row = row.strip()
	if "[back to top ⬆️](#Table-of-content" in row:
	ref_exists = True
	break
	elif row != "":
	# content of block started
	break
	return ref_exists


	def is_markdown(cell):
	return "markdown" == cell["cell_type"]


	def is_title(line):
	return line.strip().startswith("#") and line.strip().lstrip("#").lstrip()


	def generate_table_of_content(notebook_path: pathlib.Path):
	table_of_content = []

	table_of_content_cell = None
	table_of_content_cell_idx = None

	with open(notebook_path, "r", encoding="utf-8") as notebook_file:
	notebook_json = json.load(notebook_file)

	if not notebook_json["cells"]:
	return

	table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(notebook_json["cells"][0])

	all_titles = []
	for cell in filter(is_markdown, notebook_json["cells"][1:]):
	if table_of_content_cell is None:
	table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(cell)
	if not table_of_content_cell is None:
	continue

	titles = [line for line in cell["source"] if is_title(line)]
	for title in titles:
	idx = cell["source"].index(title)
	if not is_ref_to_top_exists(cell["source"], idx):
	if not title.endswith("\n"):
	cell["source"].insert(idx, title + "\n")
	cell["source"].insert(idx + 1, "[back to top ⬆️](#Table-of-contents:)\n")
	cell["source"].insert(idx + 2, "")

	title = title.strip()
	title_for_tc = create_title_for_tc(title)
	link_for_tc = create_link_for_tc(title_for_tc)
	new_line = get_tc_line(title, title_for_tc, link_for_tc, table_of_content, all_titles)

	if table_of_content.count(new_line) > 1:
	print(
	f'WARINING: the title "{title_for_tc}" has already used in titles.\n'
	+ "Navigation will work inccorect, the link will only point to "
	+ "the first encountered title"
	)

	table_of_content.append(new_line)
	all_titles.append(title)

	table_of_content = ["\n", "#### Table of contents:\n\n"] + table_of_content + ["\n"]

	if table_of_content_cell is not None:
	table_of_content_cell = remove_old_tc(table_of_content_cell, table_of_content_cell_idx)

	if table_of_content_cell is not None:
	table_of_content_cell["source"].extend(table_of_content)
	else:
	notebook_json["cells"][0]["source"].extend(table_of_content)

	with open(notebook_path, "w", encoding="utf-8") as in_f:
	json.dump(notebook_json, in_f, ensure_ascii=False, indent=1)


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()

	parser.add_argument(
	"-s",
	"--source",
	help="Please, specify notebook or folder with notebooks.\
	Table of content will be added or modified in each.",
	required=True,
	)

	args = parser.parse_args()
	path_to_source = pathlib.Path(args.source)
	if not path_to_source.exists():
	print(f"Incorrect path to notebook(s) {path_to_source}")
	exit()
	elif path_to_source.is_file():
	generate_table_of_content(path_to_source)
	elif path_to_source.is_dir():
	for notebook in path_to_source.glob("*/.ipynb"):
	generate_table_of_content(notebook)