Theory Tree_Automata_Pumping (Isabelle2019: June 2019)

theory Tree_Automata_Pumping
  imports TA.Tree_Automata
begin

(* We need to deal with non deterministic tree automatas,
   to show the pumping lemma we need to find cycles on the derivation
   of terms with depth greater number of states.

  assumes "card (ta_states A) < depth t" and "finite (ta_states A)"
      and "q ∈ ta_res A t" and "ground t"
    shows "∃ s v p. t ⊵ s ∧ s ⊳ v ∧ p ∈ ta_res A v ∧ p ∈ ta_res A s"

  we only get t ⟶* q, v ⟶ p, s ⟶ p, but we have no chance to conclude
  that the state p appears in the derivation t ⟶* q, because our derivation is
  not deterministic and there could be a cycle in the derivation of t which does not
  end in state q.
 *)

abbreviation "derivation_ctxt ts Cs ≡ Suc (length Cs) = length ts ∧
  (∀ i < length Cs. (Cs ! i) ⟨ts ! i⟩ = ts ! Suc i)"

abbreviation "derivation_ctxt_st A ts Cs qs ≡ length qs = length ts ∧ Suc (length Cs) = length ts ∧
  (∀ i < length Cs. qs ! Suc i ∈ ta_res A (Cs ! i)⟨Var (qs ! i)⟩)"

abbreviation "derivation_sound A ts qs ≡ length qs = length ts ∧
  (∀ i < length qs. qs ! i ∈ ta_res A (ts ! i))"
 
definition "derivation A ts Cs qs ⟷ derivation_ctxt ts Cs ∧
  derivation_ctxt_st A ts Cs qs ∧ derivation_sound A ts qs"


(* Context compositions from left *)
lemma ctxt_comp_lhs_not_hole:
  assumes "C ≠ □"
  shows "C ∘⇩_c D ≠ □"
  using assms by (cases C; cases D) auto

lemma ctxt_comp_rhs_not_hole:
  assumes "D ≠ □"
  shows "C ∘⇩_c D ≠ □"
  using assms by (cases C; cases D) auto

lemma fold_ctxt_comp_nt_empty_acc:
  assumes "D ≠ □"
  shows "fold (∘⇩_c) Cs D ≠ □"
  using assms by (induct Cs arbitrary: D) (auto simp add: ctxt_comp_rhs_not_hole)

lemma fold_ctxt_comp_nt_empty:
  assumes "C ∈ set Cs" and "C ≠ □"
  shows "fold (∘⇩_c) Cs D ≠ □" using assms
  by (induct Cs arbitrary: D) (auto simp: ctxt_comp_lhs_not_hole fold_ctxt_comp_nt_empty_acc)

(* Rep of context *)

lemma empty_ctxt_power [simp]:
  "□ ^ n = □"
  by (induct n) auto

lemma ctxt_comp_n_pres_ground [intro]:
  assumes "ground_ctxt C"
  shows "ground_ctxt (C^n)"
  using assms by (induct n arbitrary: C) auto

lemma ctxt_comp_n_funas [simp]:
  "(f, v) ∈ funas_ctxt (C^n) ⟹ (f, v) ∈ funas_ctxt C"
  by (induct n arbitrary: C) auto

lemma ctxt_comp_n_pres_funas [intro]:
  assumes "funas_ctxt C ⊆ ℱ"
  shows "funas_ctxt (C^n) ⊆ ℱ"
  using assms by (induct n arbitrary: C) auto

lemma ctxt_comp_not_hole:
  assumes "C ≠ □" and "n ≠ 0"
  shows "C^n ≠ □"
  using assms by (induct n arbitrary: C) (auto elim!: ctxt_compose.elims)

lemma ctxt_comp_n_suc [simp]:
  shows "(C^(Suc n))⟨t⟩ = (C^n)⟨C⟨t⟩⟩"
  by (induct n arbitrary: C) auto

lemma ctxt_comp_reach:
  assumes "p ∈ ta_res A C⟨Var p⟩"
  shows "p ∈ ta_res A (C^n)⟨Var p⟩"
  using assms by (induct n arbitrary: C) (auto intro: ta_res_ctxt)


(* Connecting position to term depth and trivial depth lemmas *)

lemma args_depth_less [simp]:
  assumes "u ∈ set ss"
  shows "depth u < depth (Fun f ss)"
  using assms by (induct ss) (auto simp: less_Suc_eq_le)

lemma subterm_depth_less:
  assumes "s ⊳ t"
  shows "depth t < depth s"
  using assms by (induct s t rule: supt.induct) (auto intro: less_trans)

lemma poss_length_depth:
  shows "∃ p ∈ poss t. length p = depth t"
proof (induct t)
  case (Fun f ts)
  then show ?case
  proof (cases ts)
    case [simp]: (Cons a list)
    have "ts ≠ [] ⟹ ∃ s. f s = Max (f ` set ts) ∧ s ∈ set ts" for ts f
    using Max_in[of "f ` set ts"] by (auto simp: image_iff)
    from this[of ts depth] obtain s where s: "depth s = Max (depth ` set ts) ∧ s ∈ set ts"
      by auto
    then show ?thesis using Fun[of s] in_set_idx[OF conjunct2[OF s]] by force
  qed auto
qed auto

lemma poss_length_bounded_by_depth:
  assumes "p ∈ poss t"
  shows "length p ≤ depth t" using assms
  by (induct t arbitrary: p) (auto intro!: Suc_leI, meson args_depth_less dual_order.strict_trans2 nth_mem)

(* Connecting depth to ctxt repetition *)

lemma depth_ctxt_nt_hole_inc:
  assumes "C ≠ □"
  shows "depth t < depth C⟨t⟩" using assms
  using subterm_depth_less[of t "C⟨t⟩"]
  by (simp add: nectxt_imp_supt_ctxt subterm_depth_less) 

lemma depth_ctxt_less_eq:
  "depth t ≤ depth C⟨t⟩" using depth_ctxt_nt_hole_inc less_imp_le
  by (cases C, simp) blast  

lemma ctxt_comp_n_not_hole_depth_inc:
  assumes "C ≠ □"
  shows "depth (C^n)⟨t⟩ < depth (C^(Suc n))⟨t⟩"
  using assms by (induct n arbitrary: C t) (auto simp: ctxt_comp_not_hole depth_ctxt_nt_hole_inc)

lemma ctxt_comp_n_lower_bound:
  assumes "C ≠ □"
  shows "n < depth (C^(Suc n))⟨t⟩"
  using assms
proof (induct n arbitrary: C)
  case 0 then show ?case using ctxt_comp_not_hole depth_ctxt_nt_hole_inc gr_implies_not_zero by blast
next
  case (Suc n) then show ?case using ctxt_comp_n_not_hole_depth_inc less_trans_Suc by blast  
qed

lemma ta_res_ctxt_n_loop:
  assumes "q ∈ ta_res 𝒜 t" "q ∈ ta_res 𝒜 C⟨Var q⟩"
  shows " q ∈ ta_res 𝒜 (C^n)⟨t⟩"
  using assms by (induct n) (auto simp: ta_res_ctxt)

lemma ctxt_compose_funs_ctxt [simp]:
  "funs_ctxt (C ∘⇩_c D) = funs_ctxt C ∪ funs_ctxt D"
  by (induct C arbitrary: D) auto

lemma ctxt_compose_vars_ctxt [simp]:
  "vars_ctxt (C ∘⇩_c D) = vars_ctxt C ∪ vars_ctxt D"
  by (induct C arbitrary: D) auto

lemma ctxt_power_funs_vars_0 [simp]:
  assumes "n = 0"
  shows "funs_ctxt (C^n) = {}" "vars_ctxt (C^n) = {}" 
  using assms by auto

lemma ctxt_power_funs_vars_n [simp]:
  assumes "n ≠ 0"
  shows "funs_ctxt (C^n) = funs_ctxt C" "vars_ctxt (C^n) = vars_ctxt C" 
  using assms by (induct n arbitrary: C, auto) fastforce+ 


(* Collect all terms in a path via positions *)

fun terms_pos where
  "terms_pos s [] = [s]"
| "terms_pos s (p # ps) = terms_pos (s |_ [p]) ps @ [s]"

lemma subt_at_poss [simp]:
  assumes "a # p ∈ poss s"
  shows "p ∈ poss (s |_ [a])"
  using assms by (metis append_Cons append_self_conv2 poss_append_poss)

lemma terms_pos_length [simp]:
  shows "length (terms_pos t p) = Suc (length p)"
  by (induct p arbitrary: t) auto

lemma terms_pos_last [simp]:
  assumes "i = length p"
  shows "terms_pos t p ! i = t" using assms
  by (induct p arbitrary: t) (auto simp add: append_Cons_nth_middle)

lemma terms_pos_subterm:
  assumes "p ∈ poss t" and "s ∈ set (terms_pos t p)"
  shows "t ⊵ s" using assms
  using assms
proof (induct p arbitrary: t s)
  case (Cons a p)
  from Cons(1)[of "t |_ [a]"] Cons(2-) show ?case apply auto
    using empty_pos_in_poss poss_Cons_poss subt_at_imp_supteq supteq_trans by blast 
qed auto

lemma terms_pos_differ_subterm:
  assumes "p ∈ poss t" and "i < length (terms_pos t p)"
     and "j < length (terms_pos t p)" and "i < j"
   shows "terms_pos t p ! i ⊲ terms_pos t p ! j"
  using assms
proof (induct p arbitrary: t i j)
  case (Cons a p)
  from Cons(1)[of "t |_ [a]" i j] Cons(2-) show ?case
    apply (cases "j = length (a # p)") apply (auto simp add: append_Cons_nth_middle append_Cons_nth_left)
    by (metis empty_pos_in_poss nth_mem poss_Cons_poss subt_at_nepos_imp_supt subt_at_poss
      subterm.order.strict_trans1 terms_pos_length terms_pos_subterm)  
qed auto

lemma distinct_terms_pos:
  assumes "p ∈ poss t"
  shows "distinct (terms_pos t p)" using assms
proof (induct p arbitrary: t)
  case (Cons a p)
  have "⋀i. i < Suc (length p) ⟹ t ⊳ (terms_pos (t |_ [a]) p) ! i"
    using terms_pos_differ_subterm[OF Cons(2), of _  "Suc (length p)"] by (auto simp: nth_append) 
  then show ?case using  Cons(1)[of "t |_ [a]"] Cons(2-)
    by (auto simp: in_set_conv_nth) (metis supt_not_sym)
qed auto


lemma term_chain_depth:
  assumes "depth t = n"
  shows "∃ p ∈ poss t. length (terms_pos t p) = (n + 1)"
proof -
  obtain p where p: "p ∈ poss t" "length p = depth t"
    using poss_length_depth[of t] by blast
  from terms_pos_length[of t p] this show ?thesis using assms
    by auto
qed

lemma ta_res_derivation_chain_terms_pos_exist:
  assumes "p ∈ poss t" and "q ∈ ta_res A t"
  shows "∃ Cs qs. derivation A (terms_pos t p) Cs qs ∧ last qs = q"
  using assms         
proof (induct p arbitrary: t q)
  case Nil
  then show ?case by (auto simp: derivation_def intro!: exI[of _ "[q]"])
next
  case (Cons a p)
  from Cons(2) have poss: "p ∈ poss (t |_ [a])" by auto
  from Cons(2) obtain C where C: "C⟨t |_ [a]⟩ = t"
    by (meson ctxt_supt_id empty_pos_in_poss poss_Cons_poss)
  from C ta_res_ctxt_decompose Cons(3) obtain q' where
    res: "q' ∈ ta_res A (t |_ [a])" "q ∈ ta_res A C⟨Var q'⟩"
    by force
  from Cons(1)[OF _ res(1)] Cons(2-) C obtain Cs qs where
    der: "derivation A (terms_pos (t |_ [a]) p) Cs qs ∧ last qs = q'"
    by (auto simp del: terms_pos.simps)
  {fix i assume "i < Suc (length Cs)"
    then have "derivation_ctxt (terms_pos (t |_ [a]) p @ [t]) (Cs @ [C])" using C[symmetric] poss der
      by (cases "i = length Cs") (auto simp: derivation_def nth_append)}
  note der_ctxt = this
  {fix i assume "i < Suc (length Cs)"
    then have "derivation_ctxt_st A (terms_pos (t |_ [a]) p @ [t]) (Cs @ [C]) (qs @ [q])"
      using der poss C res(2) last_conv_nth[of qs]
      by (cases "i = length Cs", auto simp add: derivation_def nth_append not_less less_Suc_eq) fastforce+}
  then show ?case using C poss res(1) der_ctxt der
    by (auto simp: derivation_def intro!: exI[of _ "Cs @ [C]"] exI[of _ "qs @ [q]"])
      (simp add: Cons.prems(2) nth_append_Cons)
qed

lemma derivation_ctxt_terms_pos_nt_empty:
  assumes "p ∈ poss t" and "derivation_ctxt (terms_pos t p) Cs" and "C ∈ set Cs"
  shows "C ≠ □"
  using assms by (auto simp: in_set_conv_nth)
    (metis Suc_mono assms(2) ctxt_apply_term.simps(1) distinct_terms_pos lessI less_SucI less_irrefl_nat nth_eq_iff_index_eq)

lemma derivation_ctxt_terms_pos_sub_list_nt_empty:
  assumes "p ∈ poss t" and "derivation_ctxt (terms_pos t p) Cs"
    and "i < length Cs" and "j ≤ length Cs" and "i < j"
  shows "fold (∘⇩_c) (take (j - i) (drop i Cs)) □ ≠ □"
proof -
  have "∃ C. C ∈ set (take (j - i) (drop i Cs))"
    using assms(3-) not_le by fastforce
  then obtain C where w: "C ∈ set (take (j - i) (drop i Cs))" by blast
  then have "C ≠ □"
    by auto (meson assms(1, 2) derivation_ctxt_terms_pos_nt_empty in_set_dropD in_set_takeD) 
  then show ?thesis by (auto simp: fold_ctxt_comp_nt_empty[OF w])
qed

lemma derivation_ctxt_comp_term:
  assumes "derivation_ctxt ts Cs"
    and "i < length Cs" and "j ≤ length Cs" and "i < j"
  shows "(fold (∘⇩_c) (take (j - i) (drop i Cs)) □)⟨ts ! i⟩ = ts ! j"
  using assms
proof (induct "j - i" arbitrary: j i)
  case (Suc x)
  then obtain n where j [simp]: "j = Suc n" by (meson lessE)
  then have r: "x = n - i" "Suc n - i = 1 + (n - i)" using Suc(2, 6) by linarith+
  then show ?case using Suc(1)[OF r(1)] Suc(2-) unfolding j r(2) take_add[of "n - i" 1]
    by (cases "i = n") (auto simp: take_Suc_conv_app_nth)
qed auto

lemma derivation_ctxt_comp_states:
  assumes "derivation_ctxt_st A ts Cs qs"
    and "i < length Cs" and "j ≤ length Cs" and "i < j"
  shows "qs ! j ∈ ta_res A (fold (∘⇩_c) (take (j - i) (drop i Cs)) □)⟨Var (qs ! i)⟩"
  using assms
proof (induct "j - i" arbitrary: j i)
  case (Suc x)
  then obtain n where j [simp]: "j = Suc n" by (meson lessE)
  then have r: "x = n - i" "Suc n - i = 1 + (n - i)" using Suc(2, 6) by linarith+  
  then show ?case using Suc(1)[OF r(1)] Suc(2-) unfolding j r(2) take_add[of "n - i" 1]
    by (cases "i = n") (auto simp: take_Suc_conv_app_nth ta_res_ctxt)
qed auto

lemma terms_pos_ground:
  assumes "ground t" and "p ∈ poss t"
  shows "∀ s ∈ set (terms_pos t p). ground s"
  using assms by (induct p arbitrary: t, auto)
    (metis ctxt_supt_id empty_pos_in_poss ground_ctxt_apply poss_Cons_poss subt_at_poss)


(* TODO replace ta_eps_ta_states in Tree_Automata, as this version is more generall
   and the other version follows trivially *)
lemma ta_eps_ta_states:
  assumes "(q, q') ∈ (ta_eps TA)^*"
  shows "q' ∈ {q} ∪ ta_states TA" 
  using assms by (induct)(auto simp: ta_states_def)

lemma list_card_smaller_contains_eq_elemens:
  assumes "length qs = n" and "card (set qs) < n"
  shows "∃ i < length qs. ∃ j < length qs. i < j ∧ qs ! i = qs ! j"
  using assms by auto (metis distinct_card distinct_conv_nth linorder_neqE_nat) 

(* Main lemma *)

lemma pigeonhole_tree_automata:
  assumes "card (ta_states A) < depth t" and "finite (ta_states A)"
      and "q ∈ ta_res A t" and "ground t"
    shows "∃ C C2 v p. C2 ≠ □ ∧ C⟨C2⟨v⟩⟩ = t ∧ p ∈ ta_res A v ∧ p ∈ ta_res A C2⟨Var p⟩ ∧ q ∈ ta_res A C⟨Var p⟩"
proof -
  obtain p n where p: "p ∈ poss t"  "depth t = n" and
    card: "card (ta_states A) < n" "length (terms_pos t p) = (n + 1)"
    using assms(1) term_chain_depth by blast
  from ta_res_derivation_chain_terms_pos_exist[OF p(1) assms(3)] obtain Cs qs where
    derivation: "derivation A (terms_pos t p) Cs qs ∧ last qs = q" by blast
  then have d_ctxt: "derivation_ctxt_st A (terms_pos t p) Cs qs" "derivation_ctxt (terms_pos t p) Cs"
    by (auto simp: derivation_def)
  then have l: "length Cs = length qs - 1" by (auto simp: derivation_def)
  from derivation have sub: "set qs ⊆ ta_states A" "length qs = length (terms_pos t p)" unfolding derivation_def
    using ta_res_states[of "t |_ i" A for i] terms_pos_ground[OF assms(4) p(1)]
    by (auto) (metis in_mono in_set_conv_nth ta_res_states terms_pos_length)
  then have "∃ i < length (butlast qs). ∃ j < length (butlast qs). i < j ∧ (butlast qs) ! i = (butlast qs) ! j"
    using card(1, 2) assms(2) card_mono[OF assms(2) sub(1)]
    using list_card_smaller_contains_eq_elemens[of "butlast qs" n] unfolding length_tl
    by auto (meson card_mono dual_order.strict_trans2 dual_order.trans in_set_butlastD subsetI)
  then obtain i j where len: "i < length Cs" "j < length Cs" and less: "i < j" and st: "qs ! i = qs ! j"
    unfolding l length_butlast by (auto simp: nth_butlast)
  then have gt_0: "0 < length Cs" and gt_j: "0 < j" using len less less_trans by auto
  have "fold (∘⇩_c) (take (j - i) (drop i Cs)) □ ≠ □"
    using derivation_ctxt_terms_pos_sub_list_nt_empty[OF p(1) d_ctxt(2) len(1) order.strict_implies_order[OF len(2)] less] .
  moreover have "(fold (∘⇩_c) (take (length Cs - j) (drop j Cs)) □)⟨terms_pos t p ! j⟩ = terms_pos t p ! length Cs"
    using derivation_ctxt_comp_term[OF d_ctxt(2) len(2) _ len(2)] len(2) by auto
  moreover have "(fold (∘⇩_c) (take (j - i) (drop i Cs)) □)⟨terms_pos t p ! i⟩ = terms_pos t p ! j"
    using derivation_ctxt_comp_term[OF d_ctxt(2) len(1) _ less] len(2) by auto
  moreover have "qs ! j ∈ ta_res A (terms_pos t p ! i)" using derivation len
    by (auto simp: derivation_def st[symmetric])
  moreover have "qs ! j ∈ ta_res A (fold (∘⇩_c) (take (j - i) (drop i Cs)) □)⟨Var (qs ! i)⟩"
    using derivation_ctxt_comp_states[OF d_ctxt(1) len(1) _ less] len(2) st by simp
  moreover have "q ∈ ta_res A (fold (∘⇩_c) (take (length Cs - j) (drop j Cs)) □)⟨Var (qs ! j)⟩"
    using derivation_ctxt_comp_states[OF d_ctxt(1) len(2) _ len(2)] conjunct2[OF derivation]
    by (auto simp: l sub(2)) (metis Suc_inject Zero_not_Suc d_ctxt(1) l last_conv_nth list.size(3) terms_pos_length)
  ultimately show ?thesis using st d_ctxt(1) by (metis Suc_inject terms_pos_last terms_pos_length)
qed

end