#!/usr/bin/env python3

import os,sys,math
import numpy as np
import cv2

import matplotlib
matplotlib.use('tkagg') #CV2 causes some gear grinding with matplotlib's version of Qt. This fixes it via magic bullshit.
import matplotlib.pyplot as plt

import scipy
import scipy.interpolate as interp

import threading #this is actually useless for multithreaded execution!

import multiprocessing as mp
from multiprocessing import shared_memory

cos=np.cos
sin=np.sin
tan=np.tan
atan=np.arctan
pi = np.pi
sqrt = np.sqrt
exp = np.exp

"""
Script to stitch multiple output images into a full sky hdr image.

Scroll down to the bottom for the main routine.
"""

def load_img(fname):
    """
    Data plumbing to fix all the stupid things that opencv does in terms of color plane order, etc
    output will be an RGBA image (Nx,Ny,4), float32, scaled from 0 to 1, and 
    """

    try:
        img = cv2.imread(fname,cv2.IMREAD_UNCHANGED)
    except:
        print("load_img: error: could not open {} as an image with cv2".format(fname))
        return None
    
    Nx = img.shape[0]
    Ny = img.shape[1]

    imgout = np.zeros((Nx,Ny,4),dtype=np.float32)

    if(len(img.shape)<=2):
        #grayscale?
        imgout[:,:,0] = img[:,:]
        imgout[:,:,1] = img[:,:]
        imgout[:,:,2] = img[:,:]
        imgout[:,:,3] = 255.0

        imgout = imgout/255.0
        imgout = np.clip(imgout,0.0,1.0)
        
        imgout = np.transpose(imgout,(1,0,2))
    else:
        if(img.shape[2]==3):
            #BGR image
            
            imgout[:,:,0] = img[:,:,2]
            imgout[:,:,1] = img[:,:,1]
            imgout[:,:,2] = img[:,:,0]
            imgout[:,:,3] = 255.0

            imgout = imgout/255.0
            imgout = np.clip(imgout,0.0,1.0)
            
            imgout = np.transpose(imgout,(1,0,2))

        elif(img.shape[2]>=4):
            #BGRA image
            
            imgout[:,:,0] = img[:,:,2]
            imgout[:,:,1] = img[:,:,1]
            imgout[:,:,2] = img[:,:,0]
            imgout[:,:,3] = img[:,:,3]

            imgout = imgout/255.0
            imgout = np.clip(imgout,0.0,1.0)
            
            imgout = np.transpose(imgout,(1,0,2))

    return imgout

def save_img(fname,img):
    """
    takes a floating point (0,1) normalized image 
        greyscale: (Nx,Ny) 
        RGB: (Nx,Ny,3) 
        RGBA: (Nx,Ny,4)
    
    saves it to fname via cv2
    """

    Ny = img.shape[0]
    Nx = img.shape[1]
    if(len(img.shape)==2):
        Ncp = 1
        img = img.copy()
        img = img.transpose()
    else:
        Ncp = img.shape[2]
        img = img.copy()
        img = img.transpose(1,0,2)

    im2 = np.zeros((Nx,Ny,4),dtype=np.float32)
    im3 = np.zeros((Nx,Ny,4),dtype=np.uint8)

    if(Ncp==1):
        im2[:,:,0] = img[:,:]
        im2[:,:,1] = img[:,:]
        im2[:,:,2] = img[:,:]
        im2[:,:,3] = 1.0
    elif(Ncp==3):
        im2[:,:,0] = img[:,:,2]
        im2[:,:,1] = img[:,:,1]
        im2[:,:,2] = img[:,:,0]
        im2[:,:,3] = 1.0
    elif(Ncp>=4):
        im2[:,:,0] = img[:,:,2]
        im2[:,:,1] = img[:,:,1]
        im2[:,:,2] = img[:,:,0]
        im2[:,:,3] = img[:,:,3]
    else:
        print("save_img: no way to handle Ncp={}".format(Ncp))

    im2 = im2*255.0
    im2 = np.clip(im2,0.0,255.0)
    im3 = np.uint8(im2)

    try:
        cv2.imwrite(fname,im3)
    except:
        print("save_img: error: could not write to file {} fname".format(fname))


    return

def test_img_loadsave():

    img = load_img("./testimg.png")
    print(img.shape)


    plt.imshow(img.transpose(1,0,2))
    plt.show()

    save_img("./testout.png",img)

    img = np.random.rand(255,128)

    save_img("./testout2.png",img)

    img = np.random.rand(255,128,3)

    save_img("./testout3.png",img)


###
### Some vector math
###

def hodge_dual(v):
    """
    Hodge dual of a vector. The hodge dual is an operation in differentail forms that (in 3d)
    returns a bivector for a vector, the bivector being related to the generator of a rotation matrix.
    """
    v = np.array(v)

    M = np.zeros((3,3))
    M[0,1] = v[2]
    M[1,2] = v[0]
    M[2,0] = v[1]
    M[1,0] = -v[2]
    M[2,1] = -v[0]
    M[0,2] = -v[1]

    M = -M
    return M

def rotmat_axisangle(axis,angle):
    """
    rotmat_axisangle(axis,angle)
    axis - the axis of rotation
    angle - the angle [rad] to rotate about the axis
    returns: 3x3 rotation matrix
    """
    axis = np.array(axis)
    
    E = np.eye(3)
    H = hodge_dual(axis)
    H2 = np.matmul(H,H)

    R = E + H*sin(angle) + H2*(1.0-cos(angle))

    return R

def rotx(angle):
    R = np.array([[1,0,0],[0,cos(angle),-sin(angle)],[0,sin(angle),cos(angle)]])
    return R

def roty(angle):
    R = np.array([[cos(angle),0,sin(angle)],[0,1,0],[-sin(angle),0,cos(angle)]])
    return R

def rotz(angle):
    R = np.array([[cos(angle),-sin(angle),0],[sin(angle),cos(angle),0],[0,0,1]])
    return R

def rot_eulerxyz(x,y,z):

    Rx = rotx(x)
    Ry = roty(y)
    Rz = rotz(z)

    R = np.matmul(Rz,np.matmul(Ry,Rx))

    return R

def rot_cameraxyz(x,y,z):
    R1 = rotx(-90*pi/180)
    R2 = rot_eulerxyz(x,y,z)

    R = np.matmul(R2,R1)

    return R

def rot_eulerzyx(x,y,z):

    Rx = rotx(x)
    Ry = roty(y)
    Rz = rotz(z)
    R = np.matmul(Rx,np.matmul(Ry,Rz))

    return R

def extrinsic(R,t):

    R = np.array(R).copy()
    t = np.array(t).copy()

    t = np.reshape(t,[3,1])

    Rinv = np.linalg.inv(R)
    c = -np.matmul(Rinv,t)

    Ex = np.zeros([4,4])
    Ex[0:3,0:3] = Rinv[:,:]
    Ex[0:3,3] = c.flatten()
    Ex[3,3] = 1.0

    return Ex

def intrinsic_eqasp(fov):
    """
    Equal aspect ratio intrinsic matrix
    """
    In = np.zeros([4,4])
    alpha = fov/2.0
    fi = np.tan(alpha)

    In[0,0] = 1.0
    In[1,1] = -1.0 #invert y
    In[2,2] = fi
    In[3,3] = 1.0 #to allow inversion

    return In

def ray_to_normscreen(Ex,In,ray):
    """
    ray_to_normscreen(Ex,In,ray)
    Ex - 4x4 extrinsic (modelview) matrix
    In - 4x4 projection matrix
    ray - 3x1 ray vector to project to the normalized (-1,1) x (-1,1) screen coordinates
    of a camera with a given orientation, position, and field of view described by Ex and In
    """
    ray = ray.copy()
    rayp = np.zeros([4,1]) #projective ray
    rayp[0:3,0] = np.squeeze(ray)
    rayp[3,0] = 1.0

    r2 = np.matmul(Ex,rayp)
    r3 = np.matmul(In,r2)
    if(r3[2,0]>0.0):
        sx = r3[0,0]/r3[2,0]
        sy = r3[1,0]/r3[2,0]
    else:
        sx = -2.0
        sy = -2.0 #offscreen

    return [sx,sy]

def vnorm(vect):
    vect = np.array(vect)
    vect = vect.copy()

    snrm = np.sqrt(np.sum(vect*vect))
    return snrm

def vnormalize(vect):
    vect = np.array(vect)
    vect = vect.copy()
    snrm = vnorm(vect)
    if(snrm>0.0):
        vect = vect/snrm
    else:
        vect = np.zeros(vect.shape)
    
    return vect

def normscreen_to_ray(Ex,In,sx,sy):

    Ininv = np.linalg.inv(In)
    Exinv = np.linalg.inv(Ex)

    sp = np.zeros([4,1])
    sp[0] = sx
    sp[1] = sy
    sp[2] = 1.0
    sp[3] = 1.0
    rp1 = np.matmul(Ininv,sp)

    rp2 = np.squeeze(rp1[0:3,0])
    rp3 = vnormalize(rp2)

    rp4 = np.zeros([4,1])
    rp4[0:3,0] = rp3[:]

    rp5 = np.matmul(Exinv,rp4)
    ray = np.squeeze(rp5[0:3,0])

    return ray


###
### End vector math
###

def _local_makeint(istr):
    try:
        i = int(istr)
    except:
        i = 0
    return i

def imfilefilter_preload(scenedir,flst):

    fnames = []
    Nimgs = 0
    angles = []

    for f in flst:
        f2 = os.path.join(scenedir,f)
        fb = os.path.splitext(f)[0]
        fext = os.path.splitext(f)[1]

        if(fext.lower()==".png"):
            fsplit = fb.split("_")
            if(len(fsplit)==3):
                i1s = fsplit[1]
                i2s = fsplit[2]
                i1 = _local_makeint(i1s)
                i2 = _local_makeint(i2s)

                fnames.append(f2)
                angles.append([i1,0,i2])
                Nimgs += 1

    angles = np.float32(angles)
    
    return [Nimgs,angles,fnames]

def load_all_images(scenedir, verbose=True):

    flst = os.listdir(scenedir)
    [Nimgs,angles,fnames] = imfilefilter_preload(scenedir,flst)

    imgs = None
    Nx = 0
    Ny = 0
    for I in range(0,Nimgs):
        if(verbose==True):
            print("Loading {} of {}".format(I,Nimgs))

        img = load_img(fnames[I])
        if(I==0):
            Nx = img.shape[0]
            Ny = img.shape[1]
        else:
            if(img.shape[0]!=Nx or img.shape[1]!=Ny):
                print("Error: Image {}, {} has size {},{}, not {},{}".format(I,fnames[I],img.shape[0],img.shape[1],Nx,Ny))
                return None
        
        if(I==0):
            imgs = np.zeros((Nx,Ny,4,Nimgs),dtype=np.float32,order='F')
        
        imgs[:,:,:,I] = img[:,:,:]

    return [angles, imgs]


def ray_to_normscreen(Ex,In,ray)
    """
    ray_to_normscreen(Ex,In,ray)
    Is a given ray within the normalized screen coordinates of a camera described by Ex/In matrices?
    """
    sx = -2.0
    sy = -2.0
    byesno = False
    [sx,sy] = ray_to_normscreen(Ex,In,ray)
    if(sx>=-1.0 and sx<=1.0 and sy>=-1.0 and sy<=1.0):
        byesno = True

    return [byesno, sx, sy]

def check_cameraz(angles):
    #Debug script to check camera orientations
    #Camera z vectors should form a set covering the full spherical field of view if all the rotations are set up right

    Nimgs = angles.shape[0]

    zvs = np.zeros([3,Nimgs])
    for I in range(0,Nimgs):
        R = rot_cameraxyz(angles[I,0]*pi/180,angles[I,1]*pi/180,angles[I,2]*pi/180)
        print("{} {} {}".format(angles[I,0],angles[I,1],angles[I,2]))
        print(R)
        zv = np.squeeze(R[0:3,2])
        print(zv)
        zvs[:,I] = zv[:]
    
    # plt.figure()
    # plt.plot(angles[:,0],angles[:,2],'kd')

    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    plt.plot(zvs[0,:],zvs[1,:],zvs[2,:],'kd')
    ax.set_xlim([-1,1])
    ax.set_ylim([-1,1])
    ax.set_zlim([-1,1])
    plt.show()


    return

def process_pixel(px,py,angles,imgs,fov,Nxout,Nyout):
    """
    Eventually, figure out a way to vectorize this, or rewrite in C

    This takes a given output pixel px,py [0,Nxout) [0,Nyout)
    calculates a ray corresponding to the pixel for the lat-lon sky image
    and searches the available source images for which ones the ray lies within the field of view of.
    It pulls rgba from the source images and returns either an average or first available.
    """

    Nx = imgs.shape[0]
    Ny = imgs.shape[1]
    Nimgs = imgs.shape[3]

    Nxc = np.arange(0,Nx)
    Nyc = np.arange(0,Ny)

    rgba = np.zeros(4)
    rgba_N = 0

    rgba = np.float32([0,0,0,0])

    ax = px/(Nxout)*2.0*pi
    ay = -2.0*(py/(Nyout-1)-0.5)*pi/2

    ray = np.float32([cos(ax)*cos(ay),sin(ax)*cos(ay),sin(ay)])

    # rgba = np.float32([ray[0],ray[1],ray[2],1]) #debug
    # rgba = 0.5*rgba+0.5
    
    for I in range(0,Nimgs):


        R = rot_cameraxyz(angles[I,0]*pi/180,angles[I,1]*pi/180,angles[I,2]*pi/180)
        Ex = extrinsic(R,[0,0,0])
        In = intrinsic_eqasp(fov)

        [byesno,sx,sy] = iswithin_img(ray,Ex,In)

        #debug
        #rgba = np.float32([ray[0]*0.5+0.5,ray[1]*0.5+0.5,0.5*ray[2]+0.5,1])

        if(byesno):
            #interpolate images
            ra = imgs[:,:,0,I]
            ga = imgs[:,:,1,I]
            ba = imgs[:,:,2,I]
            aa = imgs[:,:,3,I]

            ppx = (sx+1.0)/2.0*(Nx-1.0)
            ppy = (-sy+1.0)/2.0*(Ny-1.0)
            
            ri = interp.interpn((Nxc,Nyc),ra,(ppx,ppy),bounds_error=False,fill_value=0.0)
            gi = interp.interpn((Nxc,Nyc),ga,(ppx,ppy),bounds_error=False,fill_value=0.0)
            bi = interp.interpn((Nxc,Nyc),ba,(ppx,ppy),bounds_error=False,fill_value=0.0)
            ai = interp.interpn((Nxc,Nyc),aa,(ppx,ppy),bounds_error=False,fill_value=0.0)
            
            
            rgba1 = np.squeeze(np.float32([ri,gi,bi,ai]))
            rgba = rgba + rgba1
            rgba_N += 1

            #debug
            #rgba = np.float32([0.5*sx+0.5,0.5*sy+0.5,0,1])
            #rgba = np.float32([angles[I,0]/360,0,0,1])

            break

    if(rgba_N>0):
        rgba = rgba/rgba_N


    return rgba

def proc_pixelrange(angles,sm_imgs,imgs_shape,fov,sm_imgout,imgout_shape,threadnum,nthreads):
    """
    Thread function for processing a range of pixels.
    angles - the list of camera angles
    sm_imgs - the shared memory buffer for the source images
    imgs_shape - the shape of the source images
    fov - the field of view angle for the camera source images [rad]
    sm_imgout - the shared memory buffer for the output images
    imgout_shape - the shape of the output image buffer
    threadnum - which thread is this?
    nthreads - total number of threads
    """

    Nxout = imgout_shape[0]
    Nyout = imgout_shape[1]

    #Thread indexing
    #Is - stride index - how many elements should each thread process?
    #I0,I1 - elements processed by this thread
    #N - total number of pixels
    N = Nxout*Nyout
    Is = int(np.floor(N/nthreads))+1
    I0 = threadnum*Is
    I1 = (threadnum+1)*Is
    if(I0>N):
        I0 = N
    if(I1>N):
        I1 = N

    #Unpack shared memory arrays
    s_imgs = np.ndarray(imgs_shape,dtype=np.float32,buffer=sm_imgs.buf)
    s_imgout = np.ndarray(imgout_shape,dtype=np.float32,buffer=sm_imgout.buf)

    for I in range(I0,I1):
        py = int(np.floor(I/Nxout))
        px = I%Nxout

        rgba = process_pixel(px,py,angles,s_imgs,fov,Nxout,Nyout)
        s_imgout[px,py,:] = rgba.flatten()[:] #save rgba to shared-memory output array


    return

def threaded_proc(angles,imgs,fov,imgout):
    """
    Thread manager for parallel processing.
    The actual threading library doesn't work for this purpose because of GIL
    multiprocessing does allow parallel execution, so "threads" are "multiprocessing" Processes

    Responsibility for painting in pixels is split among a number of threads.

    While this is faster than single threaded, it is still pretty slow by compiled code or GPU code standards, and
    I'll probably write a compiled submodule for this in the future. For portability's sake, though, here is the
    native python version of this.
    """
    nthreads = os.cpu_count()-2
    if(nthreads<=0):
        nthreads=1

    threads = []

    #create shared memory arrays
    sm_imgs = shared_memory.SharedMemory(create=True, size=imgs.nbytes)
    s_imgs = np.ndarray(imgs.shape,dtype=np.float32,buffer=sm_imgs.buf)
    s_imgs[:] = imgs[:]

    sm_imgout = shared_memory.SharedMemory(create=True, size=imgout.nbytes)
    s_imgout = np.ndarray(imgout.shape,dtype=np.float32,buffer=sm_imgout.buf)


    for I in range(0,nthreads):
        t = mp.Process(target=proc_pixelrange,args=(angles,sm_imgs,s_imgs.shape,fov,sm_imgout,s_imgout.shape,I,nthreads))
        threads.append(t)
    
    for I in range(0,nthreads):
        threads[I].start()

    for I in range(0,nthreads):
        threads[I].join()
        print("{} %% complete".format((I+1)/nthreads*100))

    imgout[:] = s_imgout[:]

    sm_imgs.close()
    sm_imgs.unlink()
    sm_imgout.close()
    sm_imgout.unlink()

    return imgout

def main():

    [angles,imgs] = load_all_images("./scenerenders")
    fnout = "./scenehdr2.png"

    Nxout = 200 #Dimensions of the output scene image
    Nyout = int(np.floor(Nxout/2.0))

    fov = 50.0*pi/180.0 #field of view angle of the renders in [radians]
    
    imgout = np.zeros((Nxout,Nyout,4),dtype=np.float32)

    # single threaded implementation
    # for I in range(0,Nxout):
    #     print("processing {:1.2f} %% complete.".format(I/Nxout*100.0))
    #     for J in range(0,Nyout):
    #         rgba = process_pixel(I,J,angles,imgs,fov,Nxout,Nyout)
    #         imgout[I,J,:] = rgba[:]
    
    imgout = threaded_proc(angles,imgs,fov,imgout)

    save_img(fnout,imgout)

    return


if(__name__=="__main__"):
    main()
    #test_eulermatrices()

    exit(0)