from typing import Tuple, List, Optional
from datetime import datetime
from geoip2.models import City
from glob import glob
import re

# processed line data
class Line:
    def __init__(self, ip: str, time: datetime, request: str, size: float, location: Optional[City], base: str):
        self.ip = ip
        self.time = time
        self.request = request
        self.size = size
        self.location = location
        self.base = base
    
    def pack(self):
        return [self.ip, self.time, self.request, self.size, self.location]

class Log:
    def __init__(self, name: str, regex, paths: List[str]):
        self.name = name
        self.regex = regex
        self.files = []
        self.handles = [] # note that handles are removed as we reach their eof (if using getNext)

        # for progress bar
        self.totalLines = 0
        self.totalRead = 0
        self.progress = {} # { file: { cur: int, max: int }}

        self.EXT_REGEX = re.compile(r'.*(\.nc|\.tar\.gz)$')
        self.PATH_REGEX = re.compile(r'^\/(star_gnssro|starro|task_icvs_gpsro)\/.*')

        for p in paths:
            self.files += glob(p)
        
        if len(self.files) == 0:
            raise FileNotFoundError(f'No files found for {self.name} in {paths}')

    def open(self):
        for file in self.files:
            # https://stackoverflow.com/questions/845058/how-to-get-the-line-count-of-a-large-file-cheaply-in-python
            with open(file, 'rb') as f:
                num = sum(1 for _ in f)
                self.totalLines += num
                self.progress[file] = { 'cur': 0, 'max': num }

            self.handles.append(open(file, 'r'))
    
    def close(self):
        for handle in self.handles:
            handle.close()
        self.handles = []

    def reset(self):
        for handle in self.handles:
            handle.seek(0)
        
        for key in self.progress.keys():
            self.progress[key]['cur'] = 0
        self.totalRead = 0
    
    # get the next valid line in ascending time order
    # assumes self.files is sorted (.sort(...))
    def getNext(self, maxmind) -> Optional[Line]:
        line = None
        while not line and len(self.handles) > 0:
            [l, success] = self.readLine(self.handles[0], maxmind)
            if success == 2: # eof, remove file from queue
                self.handles[0].close()
                self.handles.pop(0)
            elif success == 0: # valid line
                line = l
                break

        return line

    # returns processed line, success code
    # 0 = success
    # 1 = could not parse
    # 2 = eof
    def readLine(self, handle, maxmind) -> Tuple[Optional[Line], int]:
        line = handle.readline()
        if not line:
            return [None, 2]

        self.progress[handle.name]['cur'] += 1
        self.totalRead += 1

        parts = re.match(self.regex, line)

        if not parts:
            return [None, 1]

        return self._readLine(line, parts.groups(), maxmind)

    # to be implemented by child classes
    # is given the log line, the processed line (re.match(self.regex, line).groups()), maxmind database
    def _readLine(self, line: str, parts: List[str], maxmind):
        raise NotImplementedError()

    # give self.files, sort them by ascending time order. We check time by reading each file until we find a valid time
    # assumes that the file time ranges are disjoint and in ascending time order
    def sort(self, maxmind):
        fileSort = {}
        for handle in self.handles:
            time = None
            while True:
                [line, success] = self.readLine(handle, maxmind)
                if success == 1: # invalid line
                    continue
                elif success == 2: # eof
                    break
        
                time = line.time
                break
            if time:
                fileSort[handle.name] = time

        # sort files and handles by time (i <3 one liners)
        if len(fileSort) == 0:
            raise FileNotFoundError(f'All {self.name} files are invalid (do not contain valid log line)')

        self.files, self.handles = [list(l) for l in zip(*((f, h) for [f, _], h in sorted(zip(fileSort.items(), self.handles), key=lambda k: k[0][1])))]

        self.reset()

    def isValidRequest(self, request: str):
        # valid - .nc, .tar.gz in starro, star_gnssro, or task_icvs_gnssro
        return re.match(self.PATH_REGEX, request) and re.match(self.EXT_REGEX, request)
