from log import Log, Line
from typing import List
import re
import dateutil.parser
import datetime

class Apache(Log):
    def __init__(self, paths: List[str]):
        super().__init__(
            'Apache',
            re.compile(r'^(\d+\.\d+\.\d+\.\d+)(?:.*)\[(.+)\] "GET (.+) HTTP.*" (\d+) (\d+)'),
            paths
        )

    def _readLine(self, line, parts, maxmind):
        [ip, time, request, status, size] = parts

        if status != '200' or not self.isValidRequest(request):
            return [None, 1]

        # apache time format has constant length, so this should be consistent
        time = dateutil.parser.parse(time[:11] + ' ' + time[12:]).astimezone(tz=datetime.timezone.utc)
        time = time.date() # daily time steps

        size = float(size) / 1073741824.0 # byte to gib

        try:
            location = maxmind.city(ip)
        except:
            location = None
        
        return [Line(ip, time, request, size, location, line[:-1]), 0]
