Catalog/VC/ShatterFunctionLemma/Full.lean
1import Catalog.VC.ShatterFunction.Full
2import Catalog.VC.VCDimension.Full
3import Mathlib.Algebra.Order.BigOperators.Group.Finset
4
5namespace Catalog.VC.ShatterFunctionLemma
6
7/-- The binomial sum `Φ_d(m) = ∑_{k=0}^{d} C(m, k)`. Upper bound for the
8 shatter function of a family with VC-dimension at most `d`. -/
9def binomialSum (d m : ℕ) : ℕ :=
10 (Finset.range (d + 1)).sum (fun k => m.choose k)
11
12section Helpers
13
14variable {α : Type*} [DecidableEq α] [Fintype α]
15
16/-- Restrict a set family to a subtype determined by `Y`. -/
17noncomputable def restrictFamily (𝒜 : Finset (Finset α)) (Y : Finset α) :
18 Finset (Finset { x // x ∈ Y }) :=
19 𝒜.image (fun S => S.subtype (· ∈ Y))
20
21omit [Fintype α] in
22private theorem map_inter_subtype {p : α → Prop} [DecidablePred p]
23 (T : Finset { x // p x }) (S : Finset α) :
24 (T.map (Function.Embedding.subtype p)) ∩ S =
25 (T ∩ S.subtype p).map (Function.Embedding.subtype p) := by
26 ext x
27 simp only [Finset.mem_inter, Finset.mem_map, Finset.mem_subtype,
28 Function.Embedding.subtype_apply]
29 constructor
30 · rintro ⟨⟨a, ha, rfl⟩, hxS⟩
31 exact ⟨a, ⟨ha, hxS⟩, rfl⟩
32 · rintro ⟨a, ⟨ha, haS⟩, rfl⟩
33 exact ⟨⟨a, ha, rfl⟩, haS⟩
34
35omit [Fintype α] in
36private theorem card_restrictFamily_eq_card_trace (𝒜 : Finset (Finset α)) (Y : Finset α) :
37 (restrictFamily 𝒜 Y).card = (Catalog.VC.ShatterFunction.trace 𝒜 Y).card := by
38 suffices h : (restrictFamily 𝒜 Y).image (Finset.map (Function.Embedding.subtype (· ∈ Y)))
39 = Catalog.VC.ShatterFunction.trace 𝒜 Y by
40 rw [← h, Finset.card_image_of_injective _ (Finset.map_injective _)]
41 simp only [restrictFamily, Catalog.VC.ShatterFunction.trace, Finset.image_image]
42 congr 1; funext S
43 simp only [Function.comp_apply, Finset.subtype_map]
44 ext x; simp [Finset.mem_filter, Finset.mem_inter]
45
46omit [Fintype α] in
47private theorem shatters_map_of_shatters_restrictFamily
48 (𝒜 : Finset (Finset α)) (Y : Finset α)
49 (T : Finset { x // x ∈ Y }) (hT : (restrictFamily 𝒜 Y).Shatters T) :
50 𝒜.Shatters (T.map (Function.Embedding.subtype (· ∈ Y))) := by
51 intro V hV
52 -- Construct the preimage U ⊆ T
53 set U := T.filter (fun (a : { x // x ∈ Y }) => a.val ∈ V) with hU_def
54 have hUT : U ⊆ T := Finset.filter_subset _ _
55 -- Show U.map embed = V
56 have hUV : U.map (Function.Embedding.subtype (· ∈ Y)) = V := by
57 ext x
58 simp only [hU_def, Finset.mem_map, Finset.mem_filter, Function.Embedding.subtype_apply]
59 constructor
60 · rintro ⟨a, ⟨_, hav⟩, rfl⟩; exact hav
61 · intro hx
62 obtain ⟨a, haT, rfl⟩ := Finset.mem_map.mp (hV hx)
63 exact ⟨a, ⟨haT, hx⟩, rfl⟩
64 -- Apply shattering hypothesis to get A' ∈ restrictFamily with T ∩ A' = U
65 obtain ⟨A', hA'mem, hA'eq⟩ := hT hUT
66 -- Unfold restrictFamily: A' = S.subtype p for some S ∈ 𝒜
67 simp only [restrictFamily, Finset.mem_image] at hA'mem
68 obtain ⟨S, hS𝒜, rfl⟩ := hA'mem
69 -- S is our witness
70 exact ⟨S, hS𝒜, by rw [map_inter_subtype, hA'eq, hUV]⟩
71
72omit [Fintype α] in
73private theorem vcDim_restrictFamily_le (𝒜 : Finset (Finset α)) (Y : Finset α) :
74 (restrictFamily 𝒜 Y).vcDim ≤ 𝒜.vcDim := by
75 simp only [Finset.vcDim]
76 apply Finset.sup_le
77 intro T hT
78 rw [Finset.mem_shatterer] at hT
79 rw [← Finset.card_map (Function.Embedding.subtype (· ∈ Y))]
80 exact (shatters_map_of_shatters_restrictFamily 𝒜 Y T hT).card_le_vcDim
81
82private theorem binomialSum_eq_Iic_sum (d m : ℕ) :
83 binomialSum d m = (Finset.Iic d).sum (fun k => m.choose k) := by
84 unfold binomialSum; rw [Nat.range_succ_eq_Iic]
85
86private theorem Iic_sum_choose_mono (m : ℕ) {a b : ℕ} (h : a ≤ b) :
87 (Finset.Iic a).sum (fun k => m.choose k) ≤ (Finset.Iic b).sum (fun k => m.choose k) :=
88 Finset.sum_le_sum_of_subset_of_nonneg (Finset.Iic_subset_Iic.mpr h)
89 (fun _ _ _ => Nat.zero_le _)
90
91end Helpers
92
93/-- The shatter function of a family with VC-dimension at most `d` is bounded
94 by the binomial sum: `π_𝒜(m) ≤ Φ_d(m)` for all `m`. (Lemma 5.9) -/
95theorem shatterFunctionLemma {α : Type*} [DecidableEq α] [Fintype α]
96 (𝒜 : Finset (Finset α)) {d : ℕ} (hd : 𝒜.vcDim ≤ d) (m : ℕ) :
97 Catalog.VC.ShatterFunction.shatterFun 𝒜 m ≤ binomialSum d m := by
98 unfold Catalog.VC.ShatterFunction.shatterFun
99 apply Finset.sup_le
100 intro Y hY
101 rw [Finset.mem_filter] at hY
102 obtain ⟨_, hYm⟩ := hY
103 calc (Catalog.VC.ShatterFunction.trace 𝒜 Y).card
104 = (restrictFamily 𝒜 Y).card := (card_restrictFamily_eq_card_trace 𝒜 Y).symm
105 _ ≤ (restrictFamily 𝒜 Y).shatterer.card := Finset.card_le_card_shatterer _
106 _ ≤ (Finset.Iic (restrictFamily 𝒜 Y).vcDim).sum
107 (fun k => (Fintype.card { x // x ∈ Y }).choose k) :=
108 Finset.card_shatterer_le_sum_vcDim
109 _ = (Finset.Iic (restrictFamily 𝒜 Y).vcDim).sum (fun k => m.choose k) := by
110 congr 1; rw [Fintype.card_coe, hYm]
111 _ ≤ (Finset.Iic d).sum (fun k => m.choose k) :=
112 Iic_sum_choose_mono m (le_trans (vcDim_restrictFamily_le 𝒜 Y) hd)
113 _ = binomialSum d m := (binomialSum_eq_Iic_sum d m).symm
114
115end Catalog.VC.ShatterFunctionLemma
116