import numpy as np

class Critic:

    def __init__(self , aw , av , au  , gamma ,h = 3 ):

        #  Initialize all parameters

        self.X = np.zeros((3,1))
        self.h = h
        self.wh = np.zeros( (h,3) )


        # critic
        self.Vprev = 0
        self.V = 0


        # critic
        self.v = np.zeros( (1, h) )


        self.output = np.zeros( (h,1) )
        
        # Learning Rates


        # critic

        self.av = av

        # both
        self.au = au 

        # critic 
        self.gamma = gamma

        
    def HiddenLayer(self):

        # Description : Takes in the state vector at a given time step and computes the output vector for the next layer

        output = 1/(1 + np.exp(self.wh.dot(self.X)) )

        
        self.output = output
    
    def OutputLayer(self):

        # Description : Takes in output from Hiddenlayer and computes Ki,Kp and Kd values


        # critic
        self.Vprev = self.V
        self.V = self.v.dot(self.output)
    
    def Update(self,y_ref,yt_0,yt_1,yt_2,yt_3):

        # Update Params for next episode

        del_TD = 0.5 * ( y_ref - yt_0 )**2 + self.gamma*self.V - self.Vprev


        # critic

        # Updating the v value
        v_prev = self.v
        self.v = self.v + self.av * del_TD * self.output.T

        
        for i in range(self.h):
            self.wh[i,0] = self.wh[i,0] + self.au*del_TD*v_prev[0][i]*self.output[i]*( 1 - self.output[i] )*self.X[0]
            self.wh[i,1] = self.wh[i,1] + self.au*del_TD*v_prev[0][i]*self.output[i]*( 1 - self.output[i] )*self.X[1]
            self.wh[i,2] = self.wh[i,2] + self.au*del_TD*v_prev[0][i]*self.output[i]*( 1 - self.output[i] )*self.X[2]

        return v_prev