Catalog/VC/ShatterFunctionLemma/Full.lean
| 1 | import Catalog.VC.ShatterFunction.Full |
| 2 | import Catalog.VC.VCDimension.Full |
| 3 | import Mathlib.Algebra.Order.BigOperators.Group.Finset |
| 4 | |
| 5 | namespace Catalog.VC.ShatterFunctionLemma |
| 6 | |
| 7 | /-- The binomial sum `Φ_d(m) = ∑_{k=0}^{d} C(m, k)`. Upper bound for the |
| 8 | shatter function of a family with VC-dimension at most `d`. -/ |
| 9 | def binomialSum (d m : ℕ) : ℕ := |
| 10 | (Finset.range (d + 1)).sum (fun k => m.choose k) |
| 11 | |
| 12 | section Helpers |
| 13 | |
| 14 | variable {α : Type*} [DecidableEq α] [Fintype α] |
| 15 | |
| 16 | /-- Restrict a set family to a subtype determined by `Y`. -/ |
| 17 | noncomputable def restrictFamily (𝒜 : Finset (Finset α)) (Y : Finset α) : |
| 18 | Finset (Finset { x // x ∈ Y }) := |
| 19 | 𝒜.image (fun S => S.subtype (· ∈ Y)) |
| 20 | |
| 21 | omit [Fintype α] in |
| 22 | private theorem map_inter_subtype {p : α → Prop} [DecidablePred p] |
| 23 | (T : Finset { x // p x }) (S : Finset α) : |
| 24 | (T.map (Function.Embedding.subtype p)) ∩ S = |
| 25 | (T ∩ S.subtype p).map (Function.Embedding.subtype p) := by |
| 26 | ext x |
| 27 | simp only [Finset.mem_inter, Finset.mem_map, Finset.mem_subtype, |
| 28 | Function.Embedding.subtype_apply] |
| 29 | constructor |
| 30 | · rintro ⟨⟨a, ha, rfl⟩, hxS⟩ |
| 31 | exact ⟨a, ⟨ha, hxS⟩, rfl⟩ |
| 32 | · rintro ⟨a, ⟨ha, haS⟩, rfl⟩ |
| 33 | exact ⟨⟨a, ha, rfl⟩, haS⟩ |
| 34 | |
| 35 | omit [Fintype α] in |
| 36 | private theorem card_restrictFamily_eq_card_trace (𝒜 : Finset (Finset α)) (Y : Finset α) : |
| 37 | (restrictFamily 𝒜 Y).card = (Catalog.VC.ShatterFunction.trace 𝒜 Y).card := by |
| 38 | suffices h : (restrictFamily 𝒜 Y).image (Finset.map (Function.Embedding.subtype (· ∈ Y))) |
| 39 | = Catalog.VC.ShatterFunction.trace 𝒜 Y by |
| 40 | rw [← h, Finset.card_image_of_injective _ (Finset.map_injective _)] |
| 41 | simp only [restrictFamily, Catalog.VC.ShatterFunction.trace, Finset.image_image] |
| 42 | congr 1; funext S |
| 43 | simp only [Function.comp_apply, Finset.subtype_map] |
| 44 | ext x; simp [Finset.mem_filter, Finset.mem_inter] |
| 45 | |
| 46 | omit [Fintype α] in |
| 47 | private theorem shatters_map_of_shatters_restrictFamily |
| 48 | (𝒜 : Finset (Finset α)) (Y : Finset α) |
| 49 | (T : Finset { x // x ∈ Y }) (hT : (restrictFamily 𝒜 Y).Shatters T) : |
| 50 | 𝒜.Shatters (T.map (Function.Embedding.subtype (· ∈ Y))) := by |
| 51 | intro V hV |
| 52 | -- Construct the preimage U ⊆ T |
| 53 | set U := T.filter (fun (a : { x // x ∈ Y }) => a.val ∈ V) with hU_def |
| 54 | have hUT : U ⊆ T := Finset.filter_subset _ _ |
| 55 | -- Show U.map embed = V |
| 56 | have hUV : U.map (Function.Embedding.subtype (· ∈ Y)) = V := by |
| 57 | ext x |
| 58 | simp only [hU_def, Finset.mem_map, Finset.mem_filter, Function.Embedding.subtype_apply] |
| 59 | constructor |
| 60 | · rintro ⟨a, ⟨_, hav⟩, rfl⟩; exact hav |
| 61 | · intro hx |
| 62 | obtain ⟨a, haT, rfl⟩ := Finset.mem_map.mp (hV hx) |
| 63 | exact ⟨a, ⟨haT, hx⟩, rfl⟩ |
| 64 | -- Apply shattering hypothesis to get A' ∈ restrictFamily with T ∩ A' = U |
| 65 | obtain ⟨A', hA'mem, hA'eq⟩ := hT hUT |
| 66 | -- Unfold restrictFamily: A' = S.subtype p for some S ∈ 𝒜 |
| 67 | simp only [restrictFamily, Finset.mem_image] at hA'mem |
| 68 | obtain ⟨S, hS𝒜, rfl⟩ := hA'mem |
| 69 | -- S is our witness |
| 70 | exact ⟨S, hS𝒜, by rw [map_inter_subtype, hA'eq, hUV]⟩ |
| 71 | |
| 72 | omit [Fintype α] in |
| 73 | private theorem vcDim_restrictFamily_le (𝒜 : Finset (Finset α)) (Y : Finset α) : |
| 74 | (restrictFamily 𝒜 Y).vcDim ≤ 𝒜.vcDim := by |
| 75 | simp only [Finset.vcDim] |
| 76 | apply Finset.sup_le |
| 77 | intro T hT |
| 78 | rw [Finset.mem_shatterer] at hT |
| 79 | rw [← Finset.card_map (Function.Embedding.subtype (· ∈ Y))] |
| 80 | exact (shatters_map_of_shatters_restrictFamily 𝒜 Y T hT).card_le_vcDim |
| 81 | |
| 82 | private theorem binomialSum_eq_Iic_sum (d m : ℕ) : |
| 83 | binomialSum d m = (Finset.Iic d).sum (fun k => m.choose k) := by |
| 84 | unfold binomialSum; rw [Nat.range_succ_eq_Iic] |
| 85 | |
| 86 | private theorem Iic_sum_choose_mono (m : ℕ) {a b : ℕ} (h : a ≤ b) : |
| 87 | (Finset.Iic a).sum (fun k => m.choose k) ≤ (Finset.Iic b).sum (fun k => m.choose k) := |
| 88 | Finset.sum_le_sum_of_subset_of_nonneg (Finset.Iic_subset_Iic.mpr h) |
| 89 | (fun _ _ _ => Nat.zero_le _) |
| 90 | |
| 91 | end Helpers |
| 92 | |
| 93 | /-- The shatter function of a family with VC-dimension at most `d` is bounded |
| 94 | by the binomial sum: `π_𝒜(m) ≤ Φ_d(m)` for all `m`. (Lemma 5.9) -/ |
| 95 | theorem shatterFunctionLemma {α : Type*} [DecidableEq α] [Fintype α] |
| 96 | (𝒜 : Finset (Finset α)) {d : ℕ} (hd : 𝒜.vcDim ≤ d) (m : ℕ) : |
| 97 | Catalog.VC.ShatterFunction.shatterFun 𝒜 m ≤ binomialSum d m := by |
| 98 | unfold Catalog.VC.ShatterFunction.shatterFun |
| 99 | apply Finset.sup_le |
| 100 | intro Y hY |
| 101 | rw [Finset.mem_filter] at hY |
| 102 | obtain ⟨_, hYm⟩ := hY |
| 103 | calc (Catalog.VC.ShatterFunction.trace 𝒜 Y).card |
| 104 | = (restrictFamily 𝒜 Y).card := (card_restrictFamily_eq_card_trace 𝒜 Y).symm |
| 105 | _ ≤ (restrictFamily 𝒜 Y).shatterer.card := Finset.card_le_card_shatterer _ |
| 106 | _ ≤ (Finset.Iic (restrictFamily 𝒜 Y).vcDim).sum |
| 107 | (fun k => (Fintype.card { x // x ∈ Y }).choose k) := |
| 108 | Finset.card_shatterer_le_sum_vcDim |
| 109 | _ = (Finset.Iic (restrictFamily 𝒜 Y).vcDim).sum (fun k => m.choose k) := by |
| 110 | congr 1; rw [Fintype.card_coe, hYm] |
| 111 | _ ≤ (Finset.Iic d).sum (fun k => m.choose k) := |
| 112 | Iic_sum_choose_mono m (le_trans (vcDim_restrictFamily_le 𝒜 Y) hd) |
| 113 | _ = binomialSum d m := (binomialSum_eq_Iic_sum d m).symm |
| 114 | |
| 115 | end Catalog.VC.ShatterFunctionLemma |
| 116 |