import numpy as np

class RBF:

    def __init__(self , aw , av , au , asig , gamma ,h = 3 ):

        #  Initialize all parameters

        self.X = np.zeros((3,1))
        self.h = h
        self.mu = np.zeros( (3,h) )
        self.sigma = np.ones( (1,h) )

        self.K =np.zeros( (3,1) )
        self.Vprev = 0
        self.V = 0

        self.w = np.zeros( (3, h) )
        self.v = np.zeros( (1, h) )
        self.output = np.zeros( (h,1) )
        
        # Learning Rates

        self.aw = aw
        self.av = av
        self.au = au 
        self.asig = asig
        self.gamma = gamma

        

    def HiddenLayer(self):

        # Description : Takes in the state vector at a given time step and computes the output vector for the next layer

        output = np.zeros( (self.h,1) )

        
        for i in range(self.h):
            phi_j = np.exp( - np.linalg.norm( self.X - self.mu[:,i]  )**2 /( 2*self.sigma[0][i]**2 )  )
            output[i] =  phi_j

        self.output = output
    
    def OutputLayer(self):

        # Description : Takes in output from Hiddenlayer and computes Ki,Kp and Kd values

        self.K = self.w.dot(self.output)

        # print(self.K)
        self.Vprev = self.V
        self.V = self.v.dot(self.output)
    
    def Update(self,y_ref,yt_0,yt_1,yt_2,yt_3):

        # Update Params for next episode

        del_TD = 0.5 * ( y_ref - yt_0 )**2 + self.gamma*self.V - self.Vprev

        # Update w matrix 
        self.w[0] = self.w[0] - self.aw *  del_TD*(yt_1 - yt_2)*self.output.T  
        self.w[1] = self.w[1] + self.aw *  del_TD*self.X[0,0]*self.output.T 
        self.w[2] = self.w[2] + self.aw *  del_TD*(yt_1 - 2*yt_2 + yt_3)*self.output.T 

        # Updating the v value
        v_prev = self.v
        self.v = self.v + self.av * del_TD * self.output.T

        # Updating the centers and widths of hidden layers

        # print("Printing Shapes of Stuff")

        # print("Shape of self.au :", v_prev)

        for i in range(self.h):
            self.mu[:,i] = self.mu[:,i] + self.au*del_TD*v_prev[0][i]*self.output[i]*(self.X- self.mu[:,i])[:,0]/self.sigma[0][i]**2

        for i in range(self.h):
            self.sigma[0][i] = self.sigma[0][i] + self.asig*del_TD*del_TD*v_prev[0][i]*self.output[i]*( np.linalg.norm(self.X- self.sigma[0][i]) )/self.sigma[0][i]**3
        

        # print(self.K)