import os
import subprocess
from glob import glob
from re import sub
import numpy as np
from datetime import datetime, timedelta
from tools import to_datetime, get_sat_freq
from struct import unpack, calcsize

lsat_names = dict(GN04=4, GN05=5, YM08=8)
for keys in list(lsat_names.keys()):
    lsat_names[lsat_names[keys]] = keys

logfile = f"progress_rt_{datetime.now().strftime('%Y%m%d')}.txt"
def log_message(msg):
    with open(logfile, 'a') as fid:
        fid.write(f"{datetime.now().strftime('%H:%M:%S')} {msg}\n")

def read_opnGns():


                    print(f'{datetime.now():%H:%M:%S} - {occFileName}')

def gather_data():
    # log_message('- Start gathering RO profiles.')
    # gnsfiles = glob('./L1_extract/opnGns/*_bin')
    # log_message(f"-- {len(gnsfiles)} files to process")
    # T0 = datetime(1980,1,6)
    # os.makedirs('/dev/shm/tcliu', exist_ok=True)
    # tStart_min = datetime.now() + timedelta(days=1)
    # tEnd_max = T0
    # for gnsfile in gnsfiles:
    #     tmpfile = os.path.join('/dev/shm/tcliu', os.path.basename(gnsfile) + '_txt')
    #     dtstr = sub(r'.*_(\d{4}\.\d{3})\.\d{3}\.\d\d\.\d\d_.*',r'\1',tmpfile)
    #     leo = lsat_names[int(sub(r'.*_\d{4}\.\d{3}\.(\d{3})\.\d\d\.\d\d_.*',r'\1',tmpfile))]
    #     ant = int(sub(r'.*_\d{4}\.\d{3}\.\d{3}\.\d\d\.(\d\d)_.*',r'\1',tmpfile))
    #     subprocess.run(f'./dump_opnGns_external.pl {gnsfile} > {tmpfile}', shell=True, check=True)
    #     grep_cmd = f'grep -E "^[0-9]+   0" {tmpfile} -n -C 1'
    #     cmdout = subprocess.getoutput(grep_cmd).strip().split('\n')
    #     lineid = [int(sub(r':.*', '', cmdout[i])) - 1 for i in range(1, len(cmdout), 4)]
    #     PRNs = [sub(r'.*\s([A-Z][0-9][0-9])\s.*', r'\1', cmdout[i]) for i in range(1, len(cmdout), 4)]
    #     hrvars = [sub(r'.*frac ', '', cmdout[i]) for i in range(2, len(cmdout), 4)]
    #     hrvars = [[hrvars[j][k:k + 20] for k in range(0, len(hrvars[j]), 20)] for j in range(len(hrvars))]
    #     total_lines = int(subprocess.getoutput(f'wc -l < {tmpfile}').strip())
    #     lineid_all = np.vstack([lineid, np.append([x - 1 for x in lineid[1:]], total_lines)]).astype('int')
    #     nRO = lineid_all.shape[1]
    #     log_message(f"-- Processing {tmpfile}: {nRO} profiles")
    #     timestamp = datetime.now().strftime('%H%M%S%f')
    #     for ll in range(nRO):
    #         start, end = lineid_all[:, ll]
    #         tempfile_all = f'/dev/shm/tcliu/tempfile_all_{timestamp}.txt'
    #         subprocess.run(f'head {tmpfile} -n {end} | tail -n +{start} > {tempfile_all}', shell=True, check=True)
    #         grep_nan = subprocess.getoutput(f'grep -E "NaN" -m 1 -in {tempfile_all}')
    #         if grep_nan:
    #             trimtil = int(sub(r':.*', '', grep_nan)) - 4
    #             subprocess.run(f'head {tempfile_all} -n {trimtil} > {tempfile_all}_new', shell=True, check=True)
    #             subprocess.run(f'mv {tempfile_all}_new {tempfile_all}', shell=True, check=True)
    #         tempfile_lr = f'/dev/shm/tcliu/tempfile_lr_{timestamp}.txt'
    #         tempfile_hr = f'/dev/shm/tcliu/tempfile_hr_{timestamp}.txt'
    #         subprocess.run(f'grep -E "^[0-9]+" {tempfile_all} | sed s/[A-Z]// > {tempfile_lr}', shell=True, check=True)
    #         subprocess.run(f'grep -E "^\\s+[0-9]" {tempfile_all} > {tempfile_hr}', shell=True, check=True)
    #         lrdata = np.loadtxt(tempfile_lr)
    #         hrdata = np.loadtxt(tempfile_hr)
    #         idx = np.cumsum([1 if i > 0 and hrdata[i, 0] < hrdata[i - 1, 0] else 0 for i in range(hrdata.shape[0])])
    #         hrtime = hrdata[:,0]+(lrdata[idx,0]-lrdata[0,0])
    #         tStart, tEnd = [T0 + timedelta(seconds=lrdata[0, 0] + hrtime[x]) for x in [0, -1]]
    #         if tStart < tStart_min:
    #             tStart_min = tStart
    #         if tEnd > tEnd_max:
    #             tEnd_max = tEnd
    #         occFileName = f"t01_RO/occTab_{leo}.{dtstr}.{tStart:%Y%m%d%H%M%S}.{tEnd:%Y%m%d%H%M%S}.A{ant:02d}.{PRNs[ll]}_txt.npz"
    #         data = np.column_stack([np.full(hrtime.shape,lrdata[0,0]), hrtime, hrdata[:,1:]])
    #         data[:, 2] = data[:, 2] + data[:, 3]
    #         data[:, 4] = data[:, 4] + data[:, 5]
    #         header = ["GPS_seconds", "time_offset"] + [v.strip() for v in hrvars[ll]]
    #         np.savez(occFileName, data=data, header=header)
    #     for tfile in glob(f'/dev/shm/tcliu/tempfile_*_{timestamp}.txt'):
    #         os.remove(tfile)
    #     os.remove(tmpfile)

    log_message('- Start gathering RO profiles.')
    obs_type = 'CLDSXXXX'
    T0 = datetime(1980, 1, 6)
    gnsfiles = glob('./L1_extract/opnGns/*_bin')
    log_message(f"-- {len(gnsfiles)} files to process")
    tStart_min = datetime.now() + timedelta(days=1)
    tEnd_max = T0
    for gnsfile in gnsfiles:
        dtstr = sub(r'.*_(\d{4}\.\d{3})\.\d{3}\.\d\d\.\d\d_.*',r'\1',gnsfile)
        leo = lsat_names[int(sub(r'.*_\d{4}\.\d{3}\.(\d{3})\.\d\d\.\d\d_.*',r'\1',gnsfile))]
        ant = int(sub(r'.*_\d{4}\.\d{3}\.\d{3}\.\d\d\.(\d\d)_.*',r'\1',gnsfile))
        nRO = 0
        with open(gnsfile, 'rb') as fid:
            fid.seek(-9,2)
            ver = unpack('>B', fid.read(1))[0]
            sats = unpack('>8s', fid.read(8))[0].strip().decode()
            if ver == 2:
                sat_preset = 32
            elif ver == 3:
                sat_preset = 64
            hr_offset = dict()
            end_byte = fid.seek(-(sat_preset * 8 + 4) * len(sats) - 9, 2)
            start_byte = {end_byte: 'EOF'}
            for sat in sats:
                hr_offset[sat] = unpack('>i', fid.read(4))[0] / 1E9
                token = unpack(f'>{sat_preset}q', fid.read(sat_preset * 8))
                for ii in range(len(token)):
                    if token[ii] != -1:
                        start_byte[token[ii]] = f'{sat}{ii+1:02d}' # no need, there are one-or-more profiles per gnss
            fid.seek(0)
            while fid.tell() < end_byte:
                gsec, status, ant, gtype, svn, prn, rate, nlr, nhr = unpack('>IBBsHBHBB', fid.read(14))
                fid.seek(nlr * 2, 1) # low rate header (skip)
                if status == 0: # beginning of a new profile in planetiq
                    header = ["GPS_seconds", "time_offset"]
                    hr_dtype = '>'
                    for _ in range(nhr):
                        bits = unpack('>H',fid.read(2))[0]
                        header.append(f"{obs_type[bits>>13]}{(bits>>10)%8+1}{64+(bits>>5)%32:c}"+
                                      (f" ({64+bits%32:c})" if bits%32 else ""))
                        hr_dtype += fid.read(1).decode()
                    hr_dtype = sub('S', 'H', sub('s', 'h', hr_dtype)) # 2-byte int
                    hr_dlen = calcsize(hr_dtype)
                    data = []
                    gsec_0 = gsec
                    gsat = f'{gtype.decode()}{prn:02d}'
                else:
                    fid.seek(nhr * 3, 1)
                gsec_diff = gsec - gsec_0 + hr_offset[gtype.decode()]
                fid.seek(nlr * 8, 1) # low rate data (skip)
                for _ in range(rate):
                    data.append((gsec_0, int.from_bytes(fid.read(3), 'big') / 1E7 + gsec_diff) +
                         unpack(hr_dtype, fid.read(hr_dlen)))
                gsec_n, status_n, _, gtype_n, _, prn_n, _, _, _ = unpack('>IBBsHBHBB', fid.read(14))
                fid.seek(-14, 1)
                # if fid.tell() in start_byte or gsec_n-gsec<1 or gsec_n-gsec>5 or gtype_n!=gtype or prn_n!=prn or status_n==0:
                if fid.tell() == end_byte or status_n == 0: # for planetiq, status == 0 means new profile
                    data = np.array(data)
                    tStart, tEnd = [T0 + timedelta(seconds=gsec_0 + data[x,1]) for x in [0, -1]]
                    if tStart < tStart_min:
                        tStart_min = tStart
                    if tEnd > tEnd_max:
                        tEnd_max = tEnd
                    data[:, 2] = data[:, 2] + data[:, 3] # planetiq, L1 + L1M
                    data[:, 4] = data[:, 4] + data[:, 5] # planetiq, L2 + L2M
                    occFileName = f"t01_RO/occTab_{leo}.{dtstr}.{tStart:%Y%m%d%H%M%S}.{tEnd:%Y%m%d%H%M%S}.A{ant:02d}.{gsat}_txt.npz"
                    np.savez(occFileName, data=data, header=header)
                    nRO += 1
        log_message(f"-- Processed {gnsfile}: {nRO} profiles")

    log_message('- Start gathering REF profiles.')
    import pickle
    header = ['GPS_seconds', 'time_offset', 'L1C', 'L2X', 'S1C', 'S2X']
    crxfiles = glob('./L1_extract/podRx3/*crx')
    dtstr = sub(r'.*_(\d{4}\.\d{3})\.\d{3}\.\d\d\.\d\d_.*', r'\1', crxfiles[-1])
    dt = datetime.strptime(dtstr, '%Y.%j')
    leo = lsat_names[int(sub(r'.*_\d{4}\.\d{3}\.(\d{3})\.\d\d\.\d\d_.*', r'\1', crxfiles[-1]))]
    ant = int(sub(r'.*_\d{4}\.\d{3}\.\d{3}\.\d\d\.(\d\d)_.*', r'\1', crxfiles[-1]))
    crxfileout = f's01_podRx3/podRx3_{leo}.{dtstr}.{ant:02d}.pkl'
    crxfileprev = f's01_podRx3/podRx3_{leo}.{dt-timedelta(days=1):%Y.%j}.{ant:02d}.pkl'
    if os.path.exists(crxfileout):
        log_message(f'-- Combining {crxfileout}')
        with open(crxfileout, 'rb') as fid:
            pkldata = pickle.load(fid)
            dsat = pkldata['dsat']
            dtime = pkldata['dtime']
            dataall = pkldata['dataall']
    elif os.path.exists(crxfileprev):
        log_message(f'-- Retrieving {crxfileprev}')
        with open(crxfileprev, 'rb') as fid:
            pkldata = pickle.load(fid)
            dsat = pkldata['dsat']
            dtime = pkldata['dtime']
            dataall = pkldata['dataall']
        for dnum in range(len(dsat)):
            tsidx = np.where(np.diff(np.hstack([[T0], dtime[dnum]])) > timedelta(seconds=3))[0].astype('int')
            teidx = np.hstack([tsidx[1:], tsidx[:1]]) - 1
            tidx = tsidx[dtime[dnum][teidx] > min(tStart_min, dt - timedelta(hours=1))]
            if tidx.size > 0:
                dtime[dnum] = dtime[dnum][tidx[0]:]
                dataall[dnum] = dataall[dnum][tidx[0]:, :]
            else: # assign empty array with .size == 0 to be removed afterward
                dtime[dnum] = dtime[dnum][:0]
                dataall[dnum] = dataall[dnum][:0]
        didx = np.where([tt.size > 0 for tt in dtime])[0]
        dsat = [dsat[x] for x in didx]
        dtime = [np.array(dtime[x]) for x in didx]
        dataall = [np.array(dataall[x]) for x in didx]
    else:
        log_message(f'-- Creating {crxfileout}')
        dsat = []
        dtime = []
        dataall = []
    for crxfile in crxfiles:
        is_head = True
        for line in open(crxfile, 'r'):
            if is_head:
                if 'END OF HEADER' in line:
                    is_head = False
                continue
            if line.startswith(">"):
                dt_now = to_datetime(line[1:].split())
            elif line[0] in ('G','E'):
                gtmp = [line[ii:ii + 14] for ii in range(3, len(line) - 1, 16)]
                gtmp = [float(ss) if ss.strip() else np.nan for ss in gtmp]
                gsat = line[:3]
                if gsat not in dsat:
                    dsat.append(gsat)
                    dtime.append(np.array([dt_now]))
                    dataall.append(np.array([gtmp]))
                else:
                    gnum = dsat.index(gsat)
                    if dt_now > dtime[gnum][-1]:
                        dtime[gnum]  = np.hstack([dtime[gnum], [dt_now]])
                        dataall[gnum] = np.vstack([dataall[gnum],[gtmp]])
                    elif dt_now in dtime[gnum]:
                        tidx = np.searchsorted(dtime[gnum], dt_now)
                        dataall[gnum][tidx] = [pp if np.isfinite(pp) else qq for pp, qq in
                                               zip(dataall[gnum][tidx], gtmp)]
                    else:
                        tidx = np.searchsorted(dtime[gnum], dt_now)
                        dtime[gnum] = np.hstack([dtime[gnum][:tidx], [dt_now], dtime[gnum][tidx:]])
                        dataall[gnum] = np.vstack([dataall[gnum][:tidx], [gtmp], dataall[gnum][tidx:]])
    didx = np.argsort(dsat)
    dsat = [dsat[x] for x in didx]
    dtime = [np.array(dtime[x]) for x in didx]
    dataall = [np.array(dataall[x]) for x in didx]
    pkldata = {'dsat': dsat, 'dtime': dtime, 'dataall': dataall}
    with open(crxfileout, 'wb') as fid:
        pickle.dump(pkldata, fid)
    c0 = 299792458
    for dnum in range(len(dsat)):
        f1,f2,_ = get_sat_freq(dsat[dnum])
        idx = np.all(np.isfinite(dataall[dnum]), axis=1)
        dtime[dnum] = dtime[dnum][idx]
        dataall[dnum] = dataall[dnum][idx]
        idx = [0] + list(np.where(np.diff(dtime[dnum]) > timedelta(seconds=3))[0] + 1) + [len(dtime[dnum])]
        for ss in range(len(idx) - 1):
            if idx[ss + 1] - idx[ss] < 60 or dtime[dnum][idx[ss + 1] - 1] < tStart_min or dtime[dnum][idx[ss]] > tEnd_max:
                continue
            gps_sec = (dtime[dnum][idx[ss]] - T0) / timedelta(seconds=1)
            tdiff = (dtime[dnum][idx[ss]:idx[ss + 1]] - dtime[dnum][idx[ss]]) / timedelta(seconds=1)
            data = np.column_stack((
                gps_sec + np.zeros_like(tdiff),
                tdiff,
                dataall[dnum][idx[ss]:idx[ss+1], [1, 4, 2, 5]] * np.array([c0 / f1, c0 / f2, 1, 1])
            )).astype('float64')
            tStart, tEnd = [T0 + timedelta(seconds=x[0] + x[1]) for x in data[[0, -1], :2]]
            occFileName = f't01_REF/occTab_{leo}.{dtstr}.{tStart:%Y%m%d%H%M%S}.{tEnd:%Y%m%d%H%M%S}.A{ant:02d}.{dsat[dnum]}_txt.npz'
            np.savez(occFileName, data=data, header=header)

    log_message('- Start gathering leoAtt profiles.')
    attfiles = glob('./L1_extract/leoAtt/*txt')
    leo_sats = np.unique([lsat_names[int(sub(r'.*_\d{4}\.\d{3}\.(\d{3})\.\d\d_.*', r'\1', x))] for x in attfiles])
    vars_list = ['att_x', 'att_y', 'att_z', 'att_w', 'ang_x', 'ang_y', 'ang_z',
                 'pe_x', 'pe_y', 'pe_z', 've_x', 've_y', 've_z', 'pi_x', 'pi_y', 'pi_z', 'vi_x', 'vi_y', 'vi_z', 'sad_a','sad_b1','sad_b2']
    for ff in leo_sats:
        log_message(f"-- Processing {ff}...")
        attfiles = glob(f'./L1_extract/leoAtt/*.*.{lsat_names[ff]:03d}.*_txt')
        dt = datetime.strptime(sub(r'.*_(\d{4}\.\d{3})\.\d{3}\.\d\d_.*', r'\1', attfiles[0]), '%Y.%j')
        attfileout = f'./s01_leoAtt/leoAtt_{ff}.{dt.strftime("%Y.%j")}.npz'
        dts0 = []
        data0 = []
        tmp0 = np.full((len(vars_list),),np.nan)
        for attfile in attfiles:
            for line in open(attfile,'r'):
                if line.startswith("tim"):
                    if dts0:
                        data0.append(tmp0.copy())
                        tmp0[:] = np.nan
                    dts0.append(to_datetime(line[3:].split()))
                elif line.startswith("att"):
                    tmp0[:4] = np.fromstring(line[3:], sep=" ")[1:-1]
                elif line.startswith("ang"):
                    tmp0[4:7] = np.fromstring(line[3:], sep=" ")
                elif line.startswith("pve"):
                    tmp0[7:13] = np.fromstring(line[3:], sep=" ")
                elif line.startswith("pvi"):
                    tmp0[13:19] = np.fromstring(line[3:], sep=" ")
                elif line.startswith("sad"):
                    tmp = np.fromstring(line[3:], sep=" ")
                    tmp0[19:19+len(tmp)] = tmp
            data0.append(tmp0.copy())
        dts0 = np.array(dts0)
        data0 = np.array(data0)
        if os.path.isfile(attfileout):
            with np.load(attfileout, allow_pickle=True) as fid:
                data = fid['data']
                dts = fid['dts']
            idx = ~np.isin(dts, dts0)
            dts = np.hstack([dts[idx],dts0])
            data = np.vstack([data[idx], data0])
            idx = np.argsort(dts)
            dts = dts[idx]
            data = data[idx]
        else:
            dts = dts0
            data = data0
        np.savez(attfileout, data=data, dts=dts, vars=vars_list)

    log_message('- Start gathering leoOrb profiles.')
    orbfiles = glob('./L1_extract/rcvOrb/*sp3')
    leo_sats = np.unique([sub(r'.*_\d{4}\.\d{3}\.(....)\.\d\d_.*', r'\1', x) for x in orbfiles])
    for ff in leo_sats:
        log_message(f"-- Processing {ff}...")
        orbfiles = glob(f'./L1_extract/rcvOrb/*.{ff}.*_sp3')
        dt = datetime.strptime(sub(r'.*_(\d{4}\.\d{3})\.....\.\d\d_.*', r'\1', orbfiles[0]), '%Y.%j')
        orbfileout = f'./s01_leoOrb/leoOrb_{ff}.{dt.strftime("%Y.%j")}.npz'
        dts0 = []
        data0 = []
        tmp0 = np.full((8,),np.nan)
        for orbfile in orbfiles:
            for line in open(orbfile,'r'):
                if line.startswith("*"):
                    if dts0:
                        data0.append(tmp0.copy())
                        tmp0[:] = np.nan
                    dts0.append(to_datetime(line[1:].split()))
                elif line.startswith("P"):
                    tmp0[:4] = np.array([line[x:x+14] for x in range(4,60,14)]).astype('float')
                elif line.startswith("V"):
                    tmp0[4:] = np.array([line[x:x+14] for x in range(4,60,14)]).astype('float')
            data0.append(tmp0.copy())
        dts0, idx = np.unique(np.array(dts0), return_index=True)
        data0 = np.array(data0)[idx]
        if os.path.isfile(orbfileout):
            with np.load(orbfileout, allow_pickle=True) as fid:
                data = fid['data']
                dts = fid['dts']
            idx = ~np.isin(dts, dts0)
            dts = np.hstack([dts[idx],dts0])
            data = np.vstack([data[idx], data0])
            idx = np.argsort(dts)
            dts = dts[idx]
            data = data[idx]
        else:
            dts = dts0
            data = data0
        np.savez(orbfileout, data=data, dts=dts)

    log_message('- Completed gathering profiles.')