(HCSP-OMEGA VERIFIED DEEP RL CONTROLLER)
(* ================================================================ *)
(* HCSP-OMEGA VERIFIED DEEP RL CONTROLLER *)
(* Corrected Coq Version *)
(* ================================================================ *)
Require Import Reals.
Require Import Psatz.
Require Import Lra.
Open Scope R_scope.
(* ================================================================ *)
(* STATE SPACE *)
(* ================================================================ *)
Record State := mkState {
psi : R;
s : R;
r : R;
n : R
}.
Definition bounded (x:R) : Prop :=
0 <= x <= 1.
Definition Valid (st:State) : Prop :=
bounded (psi st) /\
bounded (s st) /\
bounded (r st) /\
bounded (n st).
(* ================================================================ *)
(* PARAMETERS *)
(* ================================================================ *)
Parameter alpha gamma eta dt : R.
Hypothesis alpha_range : 0 < alpha <= 1.
Hypothesis gamma_pos : gamma > 0.
Hypothesis eta_pos : eta > 0.
Hypothesis dt_small : 0 < dt <= 0.01.
(* ================================================================ *)
(* ACTIVATION *)
(* ================================================================ *)
Definition activation (x:R) : R :=
Rmin 1 (Rmax 0 x).
Lemma activation_bound :
forall x, 0 <= activation x <= 1.
Proof.
intro x.
unfold activation.
split.
- apply Rmin_glb.
+ lra.
+ apply Rmax_l.
- apply Rmin_l.
Qed.
(* ================================================================ *)
(* POLICY *)
(* ================================================================ *)
Definition hidden (st:State) : R :=
eta * (s st * r st) - gamma * psi st.
Definition deep_policy (st:State) : R :=
activation (hidden st - n st).
Lemma policy_bound :
forall st,
0 <= deep_policy st <= 1.
Proof.
intro st.
apply activation_bound.
Qed.
(* ================================================================ *)
(* LEARNING STEP *)
(* ================================================================ *)
Definition step (st:State) : R :=
psi st + alpha * deep_policy st * dt.
Definition Next (st:State) : State :=
mkState
(activation (step st))
(s st)
(r st)
(n st).
(* ================================================================ *)
(* SAFETY PROPERTIES *)
(* ================================================================ *)
Theorem Safety :
forall st,
Valid st ->
Valid (Next st).
Proof.
intros st H.
destruct H as [Hpsi [Hs [Hr Hn]]].
unfold Valid, Next.
simpl.
repeat split.
- apply activation_bound.
- exact Hs.
- exact Hr.
- exact Hn.
Qed.
(* ================================================================ *)
(* DETERMINISM *)
(* ================================================================ *)
Theorem Deterministic :
forall st,
exists st', st' = Next st.
Proof.
intro st.
exists (Next st).
reflexivity.
Qed.
(* ================================================================ *)
(* STEP MONOTONICITY *)
(* ================================================================ *)
Theorem step_nonnegative_increment :
forall st,
Valid st ->
psi st <= step st.
Proof.
intros st H.
unfold step.
destruct H as [Hpsi [Hs [Hr Hn]]].
destruct alpha_range as [Ha _].
destruct dt_small as [Hd _].
destruct (policy_bound st) as [Hp _].
assert (Hprod : 0 <= alpha * deep_policy st * dt).
{
apply Rmult_le_pos.
- apply Rmult_le_pos.
+ lra.
+ exact Hp.
- lra.
}
lra.
Qed.
(* ================================================================ *)
(* LYAPUNOV FUNCTION *)
(* ================================================================ *)
Definition V (st:State) : R :=
(psi st - 1/2)^2 + n st.
Lemma V_nonnegative :
forall st,
Valid st ->
0 <= V st.
Proof.
intros st H.
unfold V.
destruct H as [_ [_ [_ Hn]]].
destruct Hn as [Hn0 _].
assert (0 <= (psi st - 1/2)^2) by apply Rle_0_sqr.
lra.
Qed.
(* ================================================================ *)
(* ENERGY BOUNDED *)
(* ================================================================ *)
Theorem V_bounded :
forall st,
Valid st ->
0 <= V st <= 5/4.
Proof.
intros st H.
unfold V.
destruct H as [Hpsi [_ [_ Hn]]].
destruct Hpsi as [H0 H1].
destruct Hn as [Hn0 Hn1].
split.
- apply Rplus_le_le_0_compat.
+ apply Rle_0_sqr.
+ lra.
- assert (Habs : Rabs (psi st - 1/2) <= 1/2).
{
assert (-1/2 <= psi st - 1/2) by lra.
assert (psi st - 1/2 <= 1/2) by lra.
apply Rabs_le; split; assumption.
}
assert ((psi st - 1/2)^2 <= (1/2)^2).
{
apply Rle_trans with (r2 := (Rabs (psi st - 1/2))^2).
- apply Rle_0_sqr.
- rewrite <- Rabs_sqr.
apply Rmult_le_compat; try lra.
+ apply Rabs_pos.
+ exact Habs.
}
replace ((1/2)^2) with (1/4) by field.
lra.
Qed.
(* ================================================================ *)
(* NEXT STATE PSI BOUND *)
(* ================================================================ *)
Theorem Next_valid_psi :
forall st,
0 <= psi (Next st) <= 1.
Proof.
intro st.
unfold Next.
simpl.
apply activation_bound.
Qed.
(* ================================================================ *)
(* END *)
(* ================================================================ *)
تعليقات
إرسال تعليق