from graph import Graph import geoip2.database import os import json class Users(Graph): def __init__(self): super().__init__('users') ''' { count: int size: float ips: { : { count: int size: float base: [ip strings] loc: location } } } ''' self.data = {} relative = os.path.abspath(os.path.dirname(__file__)) self.asn = geoip2.database.Reader(os.path.join(relative, '../MaxMind/GeoLite2-ASN.mmdb')) def process(self, ip, time, request, size, location, log): try: identifier = self.asn.asn(ip).autonomous_system_organization self.validate(self.data, identifier, { 'size': 0, 'count': 0, 'ips': {} }) self.data[identifier]['count'] += 1 self.data[identifier]['size'] += size if location and location.traits.network: baseIp = ip ip = location.traits.network.with_prefixlen self.validate(self.data[identifier]['ips'], ip, { 'count': 0, 'size': 0, 'loc': location, 'base': [] }) self.data[identifier]['ips'][ip]['count'] += 1 self.data[identifier]['ips'][ip]['size'] += size if not self.data[identifier]['ips'][ip]['loc']: self.data[identifier]['ips'][ip]['loc'] = location if baseIp and baseIp not in self.data[identifier]['ips'][ip]['base']: self.data[identifier]['ips'][ip]['base'].append(baseIp) except: pass # asn is unknown, skip return [] def draw(self, path): print('Writing user statistics...') s = sorted(self.data.items(), key=lambda k: k[1]['size'], reverse=True) out = {} for i, (asn, val) in enumerate(s): if i < 10: # top 10 have extra information ips = {} for ip, info in val['ips'].items(): d = { 'count': info['count'], 'size': info['size'], 'base': info['base'], 'country': None, 'state': None, 'city': None, 'continent': None, 'subdivisions': [], } if info['loc']: l = info['loc'] d['country'] = l.country.name d['city'] = l.city.name d['continent'] = l.continent.name d['subdivisions'] = [s.name for s in l.subdivisions] if l.country.name == 'United States' and len(l.subdivisions) > 0: d['state'] = l.subdivisions[0].name ips[ip] = d out[asn] = { 'count': val['count'], 'size': val['size'], 'num_ips': sum([len(v['base']) for v in val['ips'].values()]), 'ips': ips, } else: out[asn] = { 'count': val['count'], 'size': val['size'], 'num_ips': sum([len(v['base']) for v in val['ips'].values()]), 'ips': [], } for ip, info in val['ips'].items(): country = None city = None base = None if info['loc']: country = info['loc'].country.name city = info['loc'].city.name if len(info['base']) > 0: base = info['base'][0] out[asn]['ips'].append([ip, base, country, city]) with open(os.path.join(path, 'users.json'), 'w') as f: f.write(json.dumps(out)) table = [['ip', 'asn', 'country', 'count', 'size']] for asn, val in s: d = {} for ip, info in val['ips'].items(): loc = 'Unknown' if info['loc']: l = info['loc'] if l.country.name == 'United States' and len(l.subdivisions) > 0: loc = '"' + l.subdivisions[0].name + ', United States"' else: loc = l.country.name self.validate(d, loc, {'ip': info['base'][0], 'asn': f'"{asn}"', 'country': loc, 'count': 0, 'size': 0}) d[loc]['count'] += info['count'] d[loc]['size'] += info['size'] lst = [[v['ip'], v['asn'], v['country'], str(v['count']), self.sizeof_fmt(v['size'])] for v in d.values()] lst = sorted(lst, key=lambda k: k[-1], reverse=True) table.extend([ ['SUM', f'"{asn}"', '', str(val['count']), self.sizeof_fmt(val['size'])], *lst]) file = '' for line in table: file += ','.join(line) + '\n' with open(os.path.join(path, 'users.csv'), 'w') as f: f.write(file) def close(self): self.asn.close() # https://stackoverflow.com/questions/1094841/get-a-human-readable-version-of-a-file-size def sizeof_fmt(self, num, suffix="B"): num *= 1024 * 1024 * 1024 for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"): if abs(num) < 1024.0: return f"{num:3.1f} {unit}{suffix}" num /= 1024.0 return f"{num:.1f} Yi{suffix}"