Catalog/Sparsity/ChernoffBound/Full.lean
| 1 | import Mathlib.Probability.Independence.Basic |
| 2 | import Mathlib.Probability.Moments.Basic |
| 3 | import Mathlib.MeasureTheory.Integral.Bochner.Basic |
| 4 | import Mathlib.Analysis.SpecialFunctions.Log.Deriv |
| 5 | import Mathlib.Analysis.SpecialFunctions.Log.NegMulLog |
| 6 | import Mathlib.Analysis.Calculus.Deriv.MeanValue |
| 7 | |
| 8 | namespace Catalog.Sparsity.ChernoffBound |
| 9 | |
| 10 | open MeasureTheory ProbabilityTheory Real Finset |
| 11 | open scoped ENNReal |
| 12 | |
| 13 | /-! ## Bernoulli MGF bound |
| 14 | |
| 15 | For X ∈ {0,1} a.s. with E[X] = p, the MGF satisfies |
| 16 | mgf X μ t ≤ exp(p·(eᵗ − 1)). |
| 17 | |
| 18 | Proof idea: mgf X μ t = E[eᵗˣ] = (1−p)·1 + p·eᵗ = 1 + p·(eᵗ−1) ≤ exp(p·(eᵗ−1)) |
| 19 | using `Real.add_one_le_exp`. -/ |
| 20 | |
| 21 | lemma bernoulli_mgf_le |
| 22 | {Ω : Type*} [MeasurableSpace Ω] {μ : Measure Ω} [IsProbabilityMeasure μ] |
| 23 | {X : Ω → ℝ} {p : ℝ} (t : ℝ) |
| 24 | (_hp : p ∈ Set.Icc 0 1) |
| 25 | (hX_meas : Measurable X) |
| 26 | (hX_bern : ∀ᵐ ω ∂μ, X ω = 0 ∨ X ω = 1) |
| 27 | (hX_mean : ∫ ω, X ω ∂μ = p) : |
| 28 | mgf X μ t ≤ exp (p * (exp t - 1)) := by |
| 29 | simp only [mgf] |
| 30 | -- Rewrite exp(t*X ω) = 1 + X ω * (exp t - 1) a.e. using Bernoulli property |
| 31 | have h_ae : (fun x => rexp (t * X x)) =ᵐ[μ] (fun ω => 1 + X ω * (rexp t - 1)) := by |
| 32 | filter_upwards [hX_bern] with ω hω |
| 33 | rcases hω with h | h <;> simp [h] |
| 34 | rw [integral_congr_ae h_ae] |
| 35 | -- X is integrable (bounded in {0,1} a.e., hence dominated by constant 1) |
| 36 | have hX_int : Integrable X μ := by |
| 37 | apply Integrable.mono (integrable_const (1 : ℝ)) hX_meas.aestronglyMeasurable |
| 38 | filter_upwards [hX_bern] with ω hω |
| 39 | rcases hω with h | h <;> simp [h] |
| 40 | -- Split: ∫(1 + X*(exp t − 1)) = 1 + p*(exp t − 1) |
| 41 | rw [integral_add (integrable_const _) (hX_int.mul_const _)] |
| 42 | simp [integral_mul_const, hX_mean] |
| 43 | -- Apply 1 + x ≤ exp x |
| 44 | linarith [Real.add_one_le_exp (p * (rexp t - 1))] |
| 45 | |
| 46 | /-! ## Real-analysis inequalities for tail optimization -/ |
| 47 | |
| 48 | /-- Upper tail inequality: (1+δ)·log(1+δ) − δ ≥ δ²/3 for δ ∈ (0,1). |
| 49 | |
| 50 | Proof: Use 2δ/(δ+2) ≤ log(1+δ) from `le_log_one_add_of_nonneg`, multiply by (1+δ), |
| 51 | compute (1+δ)·2δ/(δ+2) − δ = δ²/(δ+2) ≥ δ²/3 since δ+2 ≤ 3. -/ |
| 52 | lemma upper_log_inequality {δ : ℝ} (hδ : δ ∈ Set.Ioo 0 1) : |
| 53 | δ ^ 2 / 3 ≤ (1 + δ) * log (1 + δ) - δ := by |
| 54 | have hδ1 : 0 ≤ δ := hδ.1.le |
| 55 | have h_denom : (0 : ℝ) < δ + 2 := by linarith |
| 56 | have h_log : 2 * δ / (δ + 2) ≤ log (1 + δ) := Real.le_log_one_add_of_nonneg hδ1 |
| 57 | have h_mul : (1 + δ) * (2 * δ / (δ + 2)) ≤ (1 + δ) * log (1 + δ) := |
| 58 | mul_le_mul_of_nonneg_left h_log (by linarith) |
| 59 | have h_simp : (1 + δ) * (2 * δ / (δ + 2)) - δ = δ ^ 2 / (δ + 2) := by |
| 60 | field_simp; ring |
| 61 | -- δ²/(δ+2) ≥ δ²/3 since δ+2 ≤ 3 |
| 62 | have h_bound : δ ^ 2 / 3 ≤ δ ^ 2 / (δ + 2) := |
| 63 | div_le_div_of_nonneg_left (sq_nonneg δ) h_denom (by linarith [hδ.2]) |
| 64 | linarith |
| 65 | |
| 66 | /-- Lower tail inequality: δ + (1−δ)·log(1−δ) ≥ δ²/3 for δ ∈ (0,1). |
| 67 | |
| 68 | Proof: Let f(x) = x + (1−x)·log(1−x) − x²/3. Show f(0) = 0 and f' ≥ 0 on (0,1) |
| 69 | via −log(1−x) ≥ 2x/(2−x) ≥ 2x/3 (using `le_log_one_add_of_nonneg`), so f is |
| 70 | monotone increasing, hence f(δ) ≥ f(0) = 0. -/ |
| 71 | lemma lower_log_inequality {δ : ℝ} (hδ : δ ∈ Set.Ioo 0 1) : |
| 72 | δ ^ 2 / 3 ≤ δ + (1 - δ) * log (1 - δ) := by |
| 73 | suffices h : 0 ≤ δ + (1 - δ) * log (1 - δ) - δ ^ 2 / 3 by linarith |
| 74 | -- Define f and show it is monotone increasing from f(0) = 0 |
| 75 | let f : ℝ → ℝ := fun x => x + (1 - x) * Real.log (1 - x) - x ^ 2 / 3 |
| 76 | -- HasDerivAt for f on (0,1): derivative = −log(1−x) − 2x/3 |
| 77 | have hd_at : ∀ x : ℝ, x ∈ Set.Ioo (0:ℝ) 1 → |
| 78 | HasDerivAt f (-Real.log (1 - x) - 2 * x / 3) x := by |
| 79 | intro x hx |
| 80 | have h1mx_ne : (1 : ℝ) - x ≠ 0 := by linarith [hx.2] |
| 81 | have hd_1mx : HasDerivAt (fun x => 1 - x) (-1) x := by |
| 82 | simpa using (hasDerivAt_const x (1:ℝ)).sub (hasDerivAt_id x) |
| 83 | -- (1−x)·log(1−x) = −negMulLog(1−x), derivative = −log(1−x) − 1 |
| 84 | have hd_nml : HasDerivAt (fun x => (1 - x) * Real.log (1 - x)) |
| 85 | (-Real.log (1 - x) - 1) x := by |
| 86 | have key : (fun x : ℝ => (1 - x) * Real.log (1 - x)) = |
| 87 | fun x => -Real.negMulLog (1 - x) := by |
| 88 | ext y; simp [Real.negMulLog_def] |
| 89 | rw [key] |
| 90 | have h := ((Real.hasDerivAt_negMulLog h1mx_ne).comp x hd_1mx).neg |
| 91 | convert h using 1; ring |
| 92 | have hd_sq : HasDerivAt (fun x => x ^ 2 / 3) (2 * x / 3) x := by |
| 93 | convert (hasDerivAt_pow 2 x).div_const (3 : ℝ) using 1 |
| 94 | push_cast; ring |
| 95 | have hd_sum : HasDerivAt (fun x => x + (1 - x) * Real.log (1 - x)) |
| 96 | (1 + (-Real.log (1 - x) - 1)) x := (hasDerivAt_id x).add hd_nml |
| 97 | have hd_f : HasDerivAt f (1 + (-Real.log (1 - x) - 1) - 2 * x / 3) x := |
| 98 | hd_sum.sub hd_sq |
| 99 | convert hd_f using 1; ring |
| 100 | -- f is monotone on [0,1] |
| 101 | have hf_mono : MonotoneOn f (Set.Icc 0 1) := by |
| 102 | apply monotoneOn_of_deriv_nonneg (convex_Icc 0 1) |
| 103 | · -- Continuity: f = x − negMulLog(1−x) − x²/3, all terms continuous |
| 104 | have : f = fun x => x - Real.negMulLog (1 - x) - x ^ 2 / 3 := by |
| 105 | ext x; simp [f, Real.negMulLog_def] |
| 106 | rw [this]; fun_prop |
| 107 | · -- Differentiability on interior (0,1) |
| 108 | rw [interior_Icc] |
| 109 | exact fun x hx => (hd_at x hx).differentiableAt.differentiableWithinAt |
| 110 | · -- Derivative ≥ 0: −log(1−x) − 2x/3 ≥ 0 |
| 111 | rw [interior_Icc] |
| 112 | intro x hx |
| 113 | have h1mx_pos : 0 < 1 - x := by linarith [hx.2] |
| 114 | have h2mx_pos : 0 < 2 - x := by linarith [hx.2] |
| 115 | rw [(hd_at x hx).deriv] |
| 116 | -- −log(1−x) > x ≥ 2x/3, using log(y) < y − 1 for y ≠ 1 |
| 117 | have h_log_strict : Real.log (1 - x) < -x := by |
| 118 | have h1mx_ne1 : (1 : ℝ) - x ≠ 1 := by linarith [hx.1] |
| 119 | linarith [Real.log_lt_sub_one_of_pos h1mx_pos h1mx_ne1] |
| 120 | linarith [hx.1.le] |
| 121 | -- f(0) = 0, so f(δ) ≥ 0 |
| 122 | have hf0 : f 0 = 0 := by simp [f, Real.log_one] |
| 123 | have hfδ : f 0 ≤ f δ := |
| 124 | hf_mono (Set.left_mem_Icc.mpr (by linarith [hδ.2])) ⟨hδ.1.le, hδ.2.le⟩ hδ.1.le |
| 125 | exact hf0 ▸ hfδ |
| 126 | |
| 127 | /-! ## Tail bounds -/ |
| 128 | |
| 129 | /-- Upper tail: P(S ≥ (1+δ)μ) ≤ exp(−δ²μ/3). |
| 130 | |
| 131 | Proof outline: |
| 132 | 1. Apply `measure_ge_le_exp_mul_mgf` with t = log(1+δ) > 0 |
| 133 | 2. Factor mgf via `iIndepFun.mgf_sum` |
| 134 | 3. Bound each factor via `bernoulli_mgf_le` |
| 135 | 4. Simplify with t = log(1+δ) so exp(t)−1 = δ |
| 136 | 5. Apply `upper_log_inequality` -/ |
| 137 | lemma chernoff_upper_tail |
| 138 | {Ω : Type*} [MeasurableSpace Ω] {μ : Measure Ω} [IsProbabilityMeasure μ] |
| 139 | {n : ℕ} {X : Fin n → Ω → ℝ} {p : ℝ} |
| 140 | (hp : p ∈ Set.Icc 0 1) |
| 141 | (hX_meas : ∀ i, Measurable (X i)) |
| 142 | (hX_indep : iIndepFun X μ) |
| 143 | (hX_bern : ∀ i, ∀ᵐ ω ∂μ, X i ω = 0 ∨ X i ω = 1) |
| 144 | (hX_mean : ∀ i, ∫ ω, X i ω ∂μ = p) |
| 145 | {δ : ℝ} (hδ : δ ∈ Set.Ioo 0 1) : |
| 146 | μ.real {ω | (1 + δ) * (↑n * p) ≤ ∑ i : Fin n, X i ω} ≤ |
| 147 | exp (-(δ ^ 2 * (↑n * p) / 3)) := by |
| 148 | have hδ1_pos : (0 : ℝ) < 1 + δ := by linarith [hδ.1] |
| 149 | set t := Real.log (1 + δ) with ht_def |
| 150 | have ht_pos : 0 < t := Real.log_pos (by linarith [hδ.1]) |
| 151 | have hexp_t : Real.exp t = 1 + δ := Real.exp_log hδ1_pos |
| 152 | have hnp_nn : (0 : ℝ) ≤ ↑n * p := mul_nonneg (Nat.cast_nonneg _) hp.1 |
| 153 | -- Combine a.e. Bernoulli conditions into one |
| 154 | have h_bern_ae : ∀ᵐ ω ∂μ, ∀ i : Fin n, X i ω = 0 ∨ X i ω = 1 := |
| 155 | Filter.eventually_all.mpr hX_bern |
| 156 | -- Measurability of the sum S ω = ∑ Xi ω |
| 157 | have h_meas_S : Measurable (fun ω => ∑ i : Fin n, X i ω) := |
| 158 | Finset.measurable_sum Finset.univ (fun i _ => hX_meas i) |
| 159 | -- Integrability: S ω ≤ n a.e., so exp(t*S) ≤ exp(t*n) |
| 160 | have h_int : Integrable (fun ω => Real.exp (t * ∑ i : Fin n, X i ω)) μ := by |
| 161 | apply Integrable.mono' (integrable_const (Real.exp (t * ↑n))) |
| 162 | · exact (measurable_const.mul h_meas_S).exp.aestronglyMeasurable |
| 163 | · filter_upwards [h_bern_ae] with ω hω |
| 164 | simp only [Real.norm_of_nonneg (Real.exp_nonneg _)] |
| 165 | apply Real.exp_le_exp.mpr (mul_le_mul_of_nonneg_left _ ht_pos.le) |
| 166 | calc ∑ i : Fin n, X i ω |
| 167 | ≤ ∑ _i : Fin n, (1 : ℝ) := |
| 168 | Finset.sum_le_sum (fun i _ => by rcases hω i with h | h <;> simp [h]) |
| 169 | _ = ↑n := by simp |
| 170 | -- Chernoff-Markov step: μ.real {S ≥ ε} ≤ exp(-t·ε) · mgf(S) |
| 171 | -- h_chernoff uses mgf (fun ω => ∑ Xi ω) μ t; bridge to mgf (∑ Xi) μ t via Finset.sum_apply |
| 172 | have h_chernoff := ProbabilityTheory.measure_ge_le_exp_mul_mgf |
| 173 | ((1 + δ) * (↑n * p)) ht_pos.le h_int |
| 174 | have h_sum_fun_eq : (fun ω : Ω => ∑ i : Fin n, X i ω) = ∑ i : Fin n, X i := by |
| 175 | ext ω; simp [Finset.sum_apply] |
| 176 | rw [h_sum_fun_eq] at h_chernoff |
| 177 | -- Factor: mgf(∑ Xi) = ∏ mgf(Xi) |
| 178 | have h_mgf : mgf (∑ i : Fin n, X i) μ t = ∏ i : Fin n, mgf (X i) μ t := |
| 179 | hX_indep.mgf_sum (fun i => hX_meas i) Finset.univ |
| 180 | -- Each factor ≤ exp(p * δ) [since exp(t) - 1 = δ] |
| 181 | have h_fac : ∀ i : Fin n, mgf (X i) μ t ≤ Real.exp (p * δ) := fun i => |
| 182 | calc mgf (X i) μ t |
| 183 | ≤ Real.exp (p * (Real.exp t - 1)) := |
| 184 | bernoulli_mgf_le t hp (hX_meas i) (hX_bern i) (hX_mean i) |
| 185 | _ = Real.exp (p * δ) := by rw [hexp_t]; congr 1; ring |
| 186 | -- Product bound: ∏ mgf(Xi) ≤ exp(n*p*δ) |
| 187 | have h_prod : ∏ i : Fin n, mgf (X i) μ t ≤ Real.exp (↑n * p * δ) := |
| 188 | calc ∏ i : Fin n, mgf (X i) μ t |
| 189 | ≤ ∏ _i : Fin n, Real.exp (p * δ) := |
| 190 | Finset.prod_le_prod (fun i _ => mgf_nonneg) (fun i _ => h_fac i) |
| 191 | _ = Real.exp (↑n * p * δ) := by |
| 192 | simp only [Finset.prod_const, Finset.card_univ, Fintype.card_fin] |
| 193 | rw [← Real.exp_nat_mul]; congr 1; ring |
| 194 | -- Final estimate: combine and apply upper_log_inequality |
| 195 | calc μ.real {ω | (1 + δ) * (↑n * p) ≤ ∑ i : Fin n, X i ω} |
| 196 | ≤ Real.exp (-t * ((1 + δ) * (↑n * p))) * mgf (∑ i : Fin n, X i) μ t := |
| 197 | h_chernoff |
| 198 | _ = Real.exp (-t * ((1 + δ) * (↑n * p))) * (∏ i : Fin n, mgf (X i) μ t) := by |
| 199 | rw [h_mgf] |
| 200 | _ ≤ Real.exp (-t * ((1 + δ) * (↑n * p))) * Real.exp (↑n * p * δ) := |
| 201 | mul_le_mul_of_nonneg_left h_prod (Real.exp_nonneg _) |
| 202 | _ ≤ Real.exp (-(δ ^ 2 * (↑n * p) / 3)) := by |
| 203 | rw [← Real.exp_add] |
| 204 | apply Real.exp_le_exp.mpr |
| 205 | have h_ineq : δ ^ 2 / 3 ≤ (1 + δ) * t - δ := upper_log_inequality hδ |
| 206 | nlinarith [mul_nonneg hnp_nn (by linarith : (0 : ℝ) ≤ (1 + δ) * t - δ - δ ^ 2 / 3)] |
| 207 | |
| 208 | /-- Lower tail: P(S ≤ (1−δ)μ) ≤ exp(−δ²μ/3). |
| 209 | |
| 210 | Proof outline: |
| 211 | 1. Apply `measure_le_le_exp_mul_mgf` with t = log(1−δ) < 0 |
| 212 | 2. Factor mgf via `iIndepFun.mgf_sum` |
| 213 | 3. Bound each factor via `bernoulli_mgf_le` |
| 214 | 4. Simplify with t = log(1−δ) so exp(t)−1 = −δ |
| 215 | 5. Apply `lower_log_inequality` -/ |
| 216 | lemma chernoff_lower_tail |
| 217 | {Ω : Type*} [MeasurableSpace Ω] {μ : Measure Ω} [IsProbabilityMeasure μ] |
| 218 | {n : ℕ} {X : Fin n → Ω → ℝ} {p : ℝ} |
| 219 | (hp : p ∈ Set.Icc 0 1) |
| 220 | (hX_meas : ∀ i, Measurable (X i)) |
| 221 | (hX_indep : iIndepFun X μ) |
| 222 | (hX_bern : ∀ i, ∀ᵐ ω ∂μ, X i ω = 0 ∨ X i ω = 1) |
| 223 | (hX_mean : ∀ i, ∫ ω, X i ω ∂μ = p) |
| 224 | {δ : ℝ} (hδ : δ ∈ Set.Ioo 0 1) : |
| 225 | μ.real {ω | ∑ i : Fin n, X i ω ≤ (1 - δ) * (↑n * p)} ≤ |
| 226 | exp (-(δ ^ 2 * (↑n * p) / 3)) := by |
| 227 | have h1mδ_pos : (0 : ℝ) < 1 - δ := by linarith [hδ.2] |
| 228 | set t := Real.log (1 - δ) with ht_def |
| 229 | have ht_neg : t < 0 := Real.log_neg h1mδ_pos (by linarith [hδ.1]) |
| 230 | have hexp_t : Real.exp t = 1 - δ := Real.exp_log h1mδ_pos |
| 231 | have hnp_nn : (0 : ℝ) ≤ ↑n * p := mul_nonneg (Nat.cast_nonneg _) hp.1 |
| 232 | have h_bern_ae : ∀ᵐ ω ∂μ, ∀ i : Fin n, X i ω = 0 ∨ X i ω = 1 := |
| 233 | Filter.eventually_all.mpr hX_bern |
| 234 | have h_meas_S : Measurable (fun ω => ∑ i : Fin n, X i ω) := |
| 235 | Finset.measurable_sum Finset.univ (fun i _ => hX_meas i) |
| 236 | -- Integrability: t < 0 and S ≥ 0 a.e., so t*S ≤ 0, exp(t*S) ≤ 1 |
| 237 | have h_int : Integrable (fun ω => Real.exp (t * ∑ i : Fin n, X i ω)) μ := by |
| 238 | apply Integrable.mono' (integrable_const 1) |
| 239 | · exact (measurable_const.mul h_meas_S).exp.aestronglyMeasurable |
| 240 | · filter_upwards [h_bern_ae] with ω hω |
| 241 | simp only [Real.norm_of_nonneg (Real.exp_nonneg _)] |
| 242 | rw [Real.exp_le_one_iff] |
| 243 | apply mul_nonpos_of_nonpos_of_nonneg ht_neg.le |
| 244 | exact Finset.sum_nonneg (fun i _ => by rcases hω i with h | h <;> simp [h]) |
| 245 | have h_chernoff := ProbabilityTheory.measure_le_le_exp_mul_mgf |
| 246 | ((1 - δ) * (↑n * p)) ht_neg.le h_int |
| 247 | have h_sum_fun_eq : (fun ω : Ω => ∑ i : Fin n, X i ω) = ∑ i : Fin n, X i := by |
| 248 | ext ω; simp [Finset.sum_apply] |
| 249 | rw [h_sum_fun_eq] at h_chernoff |
| 250 | have h_mgf : mgf (∑ i : Fin n, X i) μ t = ∏ i : Fin n, mgf (X i) μ t := |
| 251 | hX_indep.mgf_sum (fun i => hX_meas i) Finset.univ |
| 252 | -- Each factor ≤ exp(-p*δ) [since exp(t) - 1 = -δ] |
| 253 | have h_fac : ∀ i : Fin n, mgf (X i) μ t ≤ Real.exp (-(p * δ)) := fun i => |
| 254 | calc mgf (X i) μ t |
| 255 | ≤ Real.exp (p * (Real.exp t - 1)) := |
| 256 | bernoulli_mgf_le t hp (hX_meas i) (hX_bern i) (hX_mean i) |
| 257 | _ = Real.exp (-(p * δ)) := by rw [hexp_t]; congr 1; ring |
| 258 | have h_prod : ∏ i : Fin n, mgf (X i) μ t ≤ Real.exp (-(↑n * p * δ)) := |
| 259 | calc ∏ i : Fin n, mgf (X i) μ t |
| 260 | ≤ ∏ _i : Fin n, Real.exp (-(p * δ)) := |
| 261 | Finset.prod_le_prod (fun i _ => mgf_nonneg) (fun i _ => h_fac i) |
| 262 | _ = Real.exp (-(↑n * p * δ)) := by |
| 263 | simp only [Finset.prod_const, Finset.card_univ, Fintype.card_fin] |
| 264 | rw [← Real.exp_nat_mul]; congr 1; ring |
| 265 | calc μ.real {ω | ∑ i : Fin n, X i ω ≤ (1 - δ) * (↑n * p)} |
| 266 | ≤ Real.exp (-t * ((1 - δ) * (↑n * p))) * mgf (∑ i : Fin n, X i) μ t := |
| 267 | h_chernoff |
| 268 | _ = Real.exp (-t * ((1 - δ) * (↑n * p))) * (∏ i : Fin n, mgf (X i) μ t) := by |
| 269 | rw [h_mgf] |
| 270 | _ ≤ Real.exp (-t * ((1 - δ) * (↑n * p))) * Real.exp (-(↑n * p * δ)) := |
| 271 | mul_le_mul_of_nonneg_left h_prod (Real.exp_nonneg _) |
| 272 | _ ≤ Real.exp (-(δ ^ 2 * (↑n * p) / 3)) := by |
| 273 | rw [← Real.exp_add] |
| 274 | apply Real.exp_le_exp.mpr |
| 275 | have h_ineq : δ ^ 2 / 3 ≤ δ + (1 - δ) * t := lower_log_inequality hδ |
| 276 | nlinarith [mul_nonneg hnp_nn (by linarith : (0 : ℝ) ≤ δ + (1 - δ) * t - δ ^ 2 / 3)] |
| 277 | |
| 278 | /-! ## Main theorem -/ |
| 279 | |
| 280 | /-- Theorem 3.3 (Chernoff's bound). For i.i.d. Bernoulli(p) random variables |
| 281 | X₁, ..., Xₙ with sum S and mean μ = np, for every δ ∈ (0, 1): |
| 282 | P(|S - μ| ≥ δμ) ≤ 2 exp(-δ²μ/3). |
| 283 | |
| 284 | Proof: split absolute value into upper/lower tails, union bound, apply |
| 285 | `chernoff_upper_tail` and `chernoff_lower_tail`. -/ |
| 286 | theorem chernoffBound |
| 287 | {Ω : Type*} [MeasurableSpace Ω] {μ : MeasureTheory.Measure Ω} |
| 288 | [MeasureTheory.IsProbabilityMeasure μ] |
| 289 | {n : ℕ} {X : Fin n → Ω → ℝ} {p : ℝ} |
| 290 | (hp : p ∈ Set.Icc 0 1) |
| 291 | (hX_meas : ∀ i, Measurable (X i)) |
| 292 | (hX_indep : ProbabilityTheory.iIndepFun X μ) |
| 293 | (hX_bern : ∀ i, ∀ᵐ ω ∂μ, X i ω = 0 ∨ X i ω = 1) |
| 294 | (hX_mean : ∀ i, ∫ ω, X i ω ∂μ = p) |
| 295 | {δ : ℝ} (hδ : δ ∈ Set.Ioo 0 1) : |
| 296 | μ.real {ω | δ * (↑n * p) ≤ |(∑ i : Fin n, X i ω) - ↑n * p|} ≤ |
| 297 | 2 * Real.exp (-(δ ^ 2 * (↑n * p) / 3)) := by |
| 298 | -- Shorthand sets |
| 299 | let A := {ω : Ω | (1 + δ) * (↑n * p) ≤ ∑ i : Fin n, X i ω} |
| 300 | let B := {ω : Ω | ∑ i : Fin n, X i ω ≤ (1 - δ) * (↑n * p)} |
| 301 | -- {|S − np| ≥ δnp} ⊆ A ∪ B |
| 302 | have h_subset : {ω : Ω | δ * (↑n * p) ≤ |(∑ i : Fin n, X i ω) - ↑n * p|} ⊆ A ∪ B := by |
| 303 | intro ω hω |
| 304 | simp only [A, B, Set.mem_union, Set.mem_setOf_eq] |
| 305 | rcases lt_or_ge ((∑ i : Fin n, X i ω) - ↑n * p) 0 with h | h |
| 306 | · right |
| 307 | have : δ * (↑n * p) ≤ -((∑ i : Fin n, X i ω) - ↑n * p) := |
| 308 | (abs_of_neg h) ▸ hω |
| 309 | linarith |
| 310 | · left |
| 311 | have : δ * (↑n * p) ≤ (∑ i : Fin n, X i ω) - ↑n * p := |
| 312 | (abs_of_nonneg h) ▸ hω |
| 313 | linarith |
| 314 | -- Tail bounds |
| 315 | have h_upper : μ.real A ≤ Real.exp (-(δ ^ 2 * (↑n * p) / 3)) := |
| 316 | chernoff_upper_tail hp hX_meas hX_indep hX_bern hX_mean hδ |
| 317 | have h_lower : μ.real B ≤ Real.exp (-(δ ^ 2 * (↑n * p) / 3)) := |
| 318 | chernoff_lower_tail hp hX_meas hX_indep hX_bern hX_mean hδ |
| 319 | -- Combine via union bound |
| 320 | calc μ.real {ω | δ * (↑n * p) ≤ |(∑ i : Fin n, X i ω) - ↑n * p|} |
| 321 | ≤ μ.real (A ∪ B) := measureReal_mono h_subset (measure_ne_top μ _) |
| 322 | _ ≤ μ.real A + μ.real B := measureReal_union_le A B |
| 323 | _ ≤ Real.exp (-(δ ^ 2 * (↑n * p) / 3)) + Real.exp (-(δ ^ 2 * (↑n * p) / 3)) := |
| 324 | add_le_add h_upper h_lower |
| 325 | _ = 2 * Real.exp (-(δ ^ 2 * (↑n * p) / 3)) := by ring |
| 326 | |
| 327 | end Catalog.Sparsity.ChernoffBound |
| 328 |