231 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			231 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
#!/usr/bin/python3
 | 
						|
 | 
						|
import re, math, os, sys, datetime
 | 
						|
 | 
						|
path_list =  ["archlinux", "/archlinuxarm", "/asahilinux",     
 | 
						|
              "/cd-image", "/debian", "/debian-cd", "/fedora", 
 | 
						|
              "/gnu", "/index.html", "/kali", "/kali-images",  
 | 
						|
              "/linux", "/manjaro", "/raspbian", "/static",    
 | 
						|
              "/ubuntu", "/ubuntu-cd", "/ubuntu-old", "/"]
 | 
						|
 | 
						|
def byte_human(size_bytes):
 | 
						|
    if size_bytes == 0:
 | 
						|
       return "0B"
 | 
						|
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
 | 
						|
    i = int(math.floor(math.log(size_bytes, 1024)))
 | 
						|
    p = math.pow(1024, i)
 | 
						|
    s = round(size_bytes / p, 2)
 | 
						|
    return "%s%s" % (s, size_name[i])
 | 
						|
 | 
						|
 | 
						|
def parse_req_line(http_entry):
 | 
						|
    req = {}
 | 
						|
    http_parsed = http_entry.split()
 | 
						|
    if http_parsed:
 | 
						|
        if http_parsed[0] in ["HEAD", "POST", "GET", "OPTION"]:
 | 
						|
            req["method"] = http_parsed[0]
 | 
						|
            if http_parsed[1][0] == "/":
 | 
						|
                req["path"] = http_parsed[1]
 | 
						|
    return req
 | 
						|
 | 
						|
 | 
						|
def get_path_parent(path):
 | 
						|
    path = path.split("?")[0].split("&")[0].split("/")
 | 
						|
    if len(path) == 1:
 | 
						|
        return "/" + path[0]
 | 
						|
    else:
 | 
						|
        return "/".join(path[:2])
 | 
						|
 | 
						|
def get_date_range(all_log):
 | 
						|
    dates = sorted([ i[2] for i in all_log ])
 | 
						|
    dates = [datetime.datetime.strptime(i, "%d/%b/%Y:%H:%M:%S %z").strftime("%m/%d %H:%M:%S") for i in dates]
 | 
						|
    print(f"Date: {dates[0]} ~ {dates[-1]}")
 | 
						|
 | 
						|
 | 
						|
def parse_log_entry(entry):
 | 
						|
    pattern = r'([\d\.]+) - (\S+) \[(.*?)\] "(.*?)" (\d+) (\d+) "(.*?)" "(.*?)" "(.*?)" "(.*?)" sn="(.*?)" rt=([\d\.]+) [^\n]+'
 | 
						|
    
 | 
						|
    match = re.match(pattern, entry)
 | 
						|
    
 | 
						|
    if not match:
 | 
						|
        return None
 | 
						|
 | 
						|
    lm = list(match.groups())
 | 
						|
    assert len(lm) == 12
 | 
						|
 | 
						|
    if "HTTP" not in lm[3]:
 | 
						|
        return
 | 
						|
 | 
						|
    log_entry = {}
 | 
						|
    log_entry["client"]  = lm[0]
 | 
						|
    log_entry["user"]    = lm[1]
 | 
						|
    log_entry["time"]    = datetime.datetime.strptime(lm[2], "%d/%b/%Y:%H:%M:%S %z")
 | 
						|
    log_entry["req"]     = lm[3]
 | 
						|
    log_entry["status"]  = lm[4]
 | 
						|
    log_entry["bytes"]   = lm[5]
 | 
						|
    log_entry["referer"] = lm[6]
 | 
						|
    log_entry["ua"]      = lm[7]
 | 
						|
    log_entry["forward"] = lm[8]
 | 
						|
    log_entry["host"]    = lm[9]
 | 
						|
    log_entry["server"]  = lm[10]
 | 
						|
    log_entry["reqtime"] = lm[11]
 | 
						|
 | 
						|
    return log_entry
 | 
						|
 | 
						|
 | 
						|
def get_all_log_entry(log_file):
 | 
						|
    with open(log_file, "r") as f:
 | 
						|
        full_log = f.read().split("\n")
 | 
						|
    log_entries = [ i for i in [ parse_log_entry(log) for log in full_log[:-1] ] if i ]
 | 
						|
    return log_entries
 | 
						|
 | 
						|
 | 
						|
def main(log_file, logs=[]):
 | 
						|
    all_logs = get_all_log_entry(log_file) if not logs else logs
 | 
						|
 | 
						|
    log_by_date = {}
 | 
						|
    for entry in all_logs:
 | 
						|
        date_day = datetime.datetime.strftime(entry.get("time"), "%Y-%m-%d") 
 | 
						|
        if date_day not in log_by_date:
 | 
						|
            log_by_date[date_day] = []
 | 
						|
        log_by_date[date_day].append(entry)
 | 
						|
 | 
						|
    for day in log_by_date:
 | 
						|
        paths = {}
 | 
						|
        for entry in  log_by_date[day]:
 | 
						|
            req = parse_req_line(entry.get("req"))
 | 
						|
 | 
						|
            if path := req.get("path"):
 | 
						|
                path_parent = get_path_parent(path)
 | 
						|
 | 
						|
                if path_parent not in path_list:
 | 
						|
                    continue
 | 
						|
 | 
						|
                if path_parent not in paths:
 | 
						|
                    paths[path_parent] = [0, 0]
 | 
						|
 | 
						|
                paths[path_parent][0] += int(entry.get("bytes"))
 | 
						|
                paths[path_parent][1] += 1
 | 
						|
 | 
						|
        print(day)
 | 
						|
        print('-'*36)
 | 
						|
        print(f'{"Path":<14} {"Count":<10} Transfer')
 | 
						|
        print('-'*36)
 | 
						|
        for path in sorted(paths):
 | 
						|
            print(f"{path:<14} {paths[path][1]:<10} {byte_human(paths[path][0])}")
 | 
						|
        print()
 | 
						|
    
 | 
						|
 | 
						|
def main_geo(log_file, logs = []):
 | 
						|
    import geoip2.database
 | 
						|
    reader = geoip2.database.Reader('GeoLite2-Country.mmdb')
 | 
						|
        
 | 
						|
    all_logs = get_all_log_entry(log_file) if not logs else logs
 | 
						|
    c = len(all_logs)
 | 
						|
    # print(c)
 | 
						|
    geolocstat = {}
 | 
						|
    for n, log in enumerate(all_logs):
 | 
						|
        req = parse_req_line(log.get("req"))
 | 
						|
        if path := req.get("path"):
 | 
						|
            path_parent = get_path_parent(path)
 | 
						|
 | 
						|
        try:
 | 
						|
            geoloc = reader.country(log['client']).country.iso_code
 | 
						|
        except:
 | 
						|
            geoloc = "XX"
 | 
						|
 | 
						|
        print(f"[{int(100*n/c):>3}%] {geoloc} {log['client']:>15}  {byte_human(int(log['bytes'])):>8}  {path_parent}", file=sys.stderr)
 | 
						|
        if geoloc not in geolocstat:
 | 
						|
            geolocstat[geoloc] = 0
 | 
						|
        geolocstat[geoloc] += int(log['bytes'])
 | 
						|
    print(file=sys.stderr)
 | 
						|
 | 
						|
    sortdict = lambda x : {k: v for k, v in sorted(x.items(), key=lambda item: item[1], reverse=True)}
 | 
						|
    for geo in sortdict(geolocstat):
 | 
						|
        print(geo, byte_human(geolocstat[geo]))
 | 
						|
 | 
						|
def main_date(log_file, logs=[]):
 | 
						|
    all_logs = get_all_log_entry(log_file) if not logs else logs
 | 
						|
    date_sorted = (sorted(all_logs, key=(lambda x: x['time'])))
 | 
						|
    st = datetime.datetime.strftime(date_sorted[0]['time'], "%Y-%m-%d %H:%M:%S")
 | 
						|
    ed = datetime.datetime.strftime(date_sorted[-1]['time'], "%Y-%m-%d %H:%M:%S")
 | 
						|
    total_bytes = sum([int(x['bytes']) for x in all_logs])
 | 
						|
    print(f"------- Log {log_file} -------\n  Date: {st} ~ {ed}\n Entry count: {len(all_logs)}\n Total bytes: {byte_human(total_bytes)}")
 | 
						|
 | 
						|
def main_html(log_file):
 | 
						|
    all_logs = get_all_log_entry(log_file)
 | 
						|
###
 | 
						|
    date_sorted = (sorted(all_logs, key=(lambda x: x['time'])))
 | 
						|
    st = datetime.datetime.strftime(date_sorted[0]['time'], "%Y-%m-%d %H:%M:%S")
 | 
						|
    ed = datetime.datetime.strftime(date_sorted[-1]['time'], "%Y-%m-%d %H:%M:%S")
 | 
						|
    total_bytes = sum([int(x['bytes']) for x in all_logs])
 | 
						|
    print(f"-------- Log Info --------\nDate: {st} ~ {ed}\nEntry count: {len(all_logs)}\nTotal bytes: {byte_human(total_bytes)}\n")
 | 
						|
###
 | 
						|
    print("-------- By Path ---------")
 | 
						|
 | 
						|
    paths = {}
 | 
						|
    for entry in all_logs:
 | 
						|
        req = parse_req_line(entry.get("req"))
 | 
						|
 | 
						|
        if path := req.get("path"):
 | 
						|
            path_parent = get_path_parent(path)
 | 
						|
 | 
						|
            if path_parent not in path_list:
 | 
						|
                continue
 | 
						|
 | 
						|
            if path_parent not in paths:
 | 
						|
                paths[path_parent] = [0, 0]
 | 
						|
 | 
						|
            paths[path_parent][0] += int(entry.get("bytes"))
 | 
						|
            paths[path_parent][1] += 1
 | 
						|
 | 
						|
    for path in sorted(paths):
 | 
						|
        print(f"{path:<14} {paths[path][1]:<10} {byte_human(paths[path][0])}")
 | 
						|
    print()
 | 
						|
###
 | 
						|
    print("------- By Country -------")
 | 
						|
    import geoip2.database
 | 
						|
    reader = geoip2.database.Reader('/srv/mirror/scripts/GeoLite2-Country.mmdb')
 | 
						|
 | 
						|
    geolocstat = {}
 | 
						|
    for n, log in enumerate(all_logs):
 | 
						|
        req = parse_req_line(log.get("req"))
 | 
						|
        if path := req.get("path"):
 | 
						|
            path_parent = get_path_parent(path)
 | 
						|
        try:
 | 
						|
            geoloc = reader.country(log['client']).country.iso_code
 | 
						|
        except:
 | 
						|
            geoloc = "XX"
 | 
						|
        if geoloc not in geolocstat:
 | 
						|
            geolocstat[geoloc] = 0
 | 
						|
        geolocstat[geoloc] += int(log['bytes'])
 | 
						|
 | 
						|
    geos = []
 | 
						|
    sortdict = lambda x : {k: v for k, v in sorted(x.items(), key=lambda item: item[1], reverse=True)}
 | 
						|
    for geo in sortdict(geolocstat):
 | 
						|
        if geo:
 | 
						|
            geos.append(f"{geo}   {byte_human(geolocstat[geo])}")
 | 
						|
    print('\n'.join(geos[:min(len(geos),7)]))
 | 
						|
    print("--------------------------")
 | 
						|
    
 | 
						|
if __name__=="__main__":
 | 
						|
    if len(sys.argv) != 3:
 | 
						|
        print("Error: ngparse {stat,parse,geo} [ log file ]\n* log file must have extended format.")
 | 
						|
        exit()
 | 
						|
    
 | 
						|
    if os.path.exists(sys.argv[2]):
 | 
						|
        logfile = sys.argv[2]
 | 
						|
    else:
 | 
						|
        print("Error: File doesnt exists.")
 | 
						|
        exit
 | 
						|
 | 
						|
    if sys.argv[1] == "stat":
 | 
						|
        main(logfile)
 | 
						|
    elif sys.argv[1] == "geo":
 | 
						|
        main_geo(logfile)
 | 
						|
    elif sys.argv[1] == "date":
 | 
						|
        main_date(logfile)
 | 
						|
    elif sys.argv[1] == "html":
 | 
						|
        main_html(logfile)
 | 
						|
    
 |