reposync/scripts/http/getFetch.py

80 lines
2.0 KiB
Python

import requests, json, sys
from bs4 import BeautifulSoup as bs4
def tree(idx: int, flag = False) -> str:
idx += 1
if idx == 1:
if flag :
return "├────"
return "├──"
elif idx == 2:
if flag :
return "│ ├────"
return "│ ├──"
else:
if flag :
return "" * (idx - 2) + "├────"
return "" * (idx - 2) + "├──"
def parseIndex(url, base = "", idx = 0, dirs = [], ret = []):
print(f"D {(tree(idx)+url):<40} {''.join(dirs):>80}")
html = bs4(requests.get(base + url).text, features="html.parser")
hrefs = [a["href"] for a in html.find_all('a')]
hrefs = [i for i in hrefs if i[0] != '?']
fls = []
for href in hrefs:
if href[-1] == "/": # if dir
if href[0] != "/" and href[0] != ".":
parseIndex(href, base + url, idx + 1, dirs + [href], ret)
else:
if href[0:2] == "./" and "/" not in href[2:]:
href = href[2:]
assert "/" not in href
print(f"F {(tree(idx, 1)+href):<80}") # if file
fls.append(href)
while len(ret) <= idx:
ret.append({})
if ''.join(dirs) not in ret[idx].keys():
ret[idx][''.join(dirs)] = []
for fl in fls:
ret[idx][''.join(dirs)].append({fl: base + url + fl})
return ret
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: createFetch.py [URL] [Path]")
sys.exit()
# urls = {"archlinuxarm": "http://jp.mirror.archlinuxarm.org/", "asahilinux": "https://cdn.asahilinux.org/", "linux-surface": "https://pkg.surfacelinux.com/arch/"}
if "http" in sys.argv[1]:
if input(f"[*] Download from {sys.argv[1]}? ") in "Yy":
url = sys.argv[1]
else:
sys.exit()
else:
print("[*] Not supported")
sys.exit()
bpath = sys.argv[2]
assert(bpath[-1] == '/')
print()
print(f"[*] Downloading File list from {url} with base path {bpath}")
files = parseIndex('', base = url, dirs = [bpath])
filename = url.split('/')[2]+'.fetch'
with open(filename, 'w') as f:
f.write(json.dumps(files, indent=4))
print()
print(f"[*] Saved to {filename}.")