reposync/scripts/http/getFetch.py

import requests, json, sys
from bs4 import BeautifulSoup as bs4

def tree(idx: int, flag = False) -> str:
	idx += 1
	if idx == 1:
		if flag :
			return "├────"
		return "├──"
	elif idx == 2:
		if flag :
			return "│   ├────"
		return "│   ├──"
	else:
		if flag :
			return "│   " * (idx - 2) + "├────"
		return "│   " * (idx - 2) + "├──"

def parseIndex(url, base = "", idx = 0, dirs = [], ret = []):

	print(f"D {(tree(idx)+url):<40}  {''.join(dirs):>80}")
	html = bs4(requests.get(base + url).text, features="html.parser")

	hrefs = [a["href"] for a in html.find_all('a')]
	hrefs = [i for i in hrefs if i[0] != '?']
	fls = []
	for href in hrefs:
		if href[-1] == "/":  # if dir
			if href[0] != "/" and href[0] != ".":
				parseIndex(href, base + url, idx + 1, dirs + [href], ret)
		else:
			if href[0:2] == "./" and "/" not in href[2:]:
				href = href[2:]
			assert "/" not in href
			print(f"F {(tree(idx, 1)+href):<80}") # if file
			fls.append(href)

	while len(ret) <= idx:
		ret.append({})

	if ''.join(dirs) not in ret[idx].keys():
		ret[idx][''.join(dirs)] = []

	for fl in fls:
		ret[idx][''.join(dirs)].append({fl: base + url + fl})

	return ret


if __name__ == "__main__":

	if len(sys.argv) != 3:
	    print("Usage: createFetch.py [URL] [Path]")
	    sys.exit()

	# urls = {"archlinuxarm": "http://jp.mirror.archlinuxarm.org/", "asahilinux": "https://cdn.asahilinux.org/", "linux-surface": "https://pkg.surfacelinux.com/arch/"}

	if "http" in sys.argv[1]:
	    if input(f"[*] Download from {sys.argv[1]}? ") in "Yy":
	        url = sys.argv[1]
	    else:
	        sys.exit()
	else:
	    print("[*] Not supported")
	    sys.exit()

	bpath = sys.argv[2]
	assert(bpath[-1] == '/')

	print()
	print(f"[*] Downloading File list from {url} with base path {bpath}")

	files = parseIndex('', base = url, dirs = [bpath])
	filename = url.split('/')[2]+'.fetch'

	with open(filename, 'w') as f:
		f.write(json.dumps(files, indent=4))

	print()
	print(f"[*] Saved to {filename}.")