FM Stats Short Half
This commit is contained in:
parent
e131589053
commit
1d98734702
|
@ -0,0 +1,348 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%matplotlib widget\n",
|
||||||
|
"from collections.abc import Iterable\n",
|
||||||
|
"\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from scipy.stats import chi2\n",
|
||||||
|
"import math\n",
|
||||||
|
"from fractions import Fraction as F"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Main\n",
|
||||||
|
"This contains automations for A-level Further Maths ordered in the same way as they are on [integral maths](https://my.integralmaths.org/)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def std_deviation(variance):\n",
|
||||||
|
" return math.sqrt(variance)\n",
|
||||||
|
"\n",
|
||||||
|
"def NOT(p):\n",
|
||||||
|
" return 1 - p\n",
|
||||||
|
"\n",
|
||||||
|
"# Independent events\n",
|
||||||
|
"def AND(*ps):\n",
|
||||||
|
" return math.prod(ps)\n",
|
||||||
|
"\n",
|
||||||
|
"def OR(*ps):\n",
|
||||||
|
" return 1 - AND(map(NOT, ps))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Permutations and Combinations\n",
|
||||||
|
"These just use the `math.perm` and `math.comb` functions throughout, which are defined as follows.\n",
|
||||||
|
"\n",
|
||||||
|
"- Permutations (pick) are ordered\n",
|
||||||
|
"- Combinations (choose) are unordered"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def permutations(n: int, take: int) -> int:\n",
|
||||||
|
" return int(math.factorial(n) / math.factorial(n - take))\n",
|
||||||
|
"\n",
|
||||||
|
"def combinations(n: int, take: int) -> int:\n",
|
||||||
|
" return int(permutations(n, take) / math.factorial(take))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Discrete Random Variables (DRVs)\n",
|
||||||
|
"\n",
|
||||||
|
"This section includes basic stats operations on DRVs.\n",
|
||||||
|
"\n",
|
||||||
|
"Note:\n",
|
||||||
|
"- Expected value (expectation) = mean\n",
|
||||||
|
"- Standard deviation = sqrt(variance)\n",
|
||||||
|
"- $E(aX + b) = aE(X) + b$\n",
|
||||||
|
"- $Var(aX + b) = a^2 Var(X)$"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class DiscreteRandomVariable:\n",
|
||||||
|
" values: list[F]\n",
|
||||||
|
" probabilities: list[F]\n",
|
||||||
|
" size: int\n",
|
||||||
|
"\n",
|
||||||
|
" # items are in the form of (value, probability)\n",
|
||||||
|
" # assumed that sum(probabilities) = 1\n",
|
||||||
|
" def __init__(self, items: list[tuple[F, F]]):\n",
|
||||||
|
" self.values = []\n",
|
||||||
|
" self.probabilities = []\n",
|
||||||
|
" for item in items:\n",
|
||||||
|
" self.values.append(item[0])\n",
|
||||||
|
" self.probabilities.append(item[1])\n",
|
||||||
|
" self.size = len(items)\n",
|
||||||
|
"\n",
|
||||||
|
" def copy(self):\n",
|
||||||
|
" c = DiscreteRandomVariable([])\n",
|
||||||
|
" c.values = self.values.copy()\n",
|
||||||
|
" c.probabilities = self.probabilities.copy()\n",
|
||||||
|
" c.size = self.size\n",
|
||||||
|
" return c\n",
|
||||||
|
"\n",
|
||||||
|
" def expectation(self):\n",
|
||||||
|
" return sum(map(math.prod, zip(self.values, self.probabilities)))\n",
|
||||||
|
"\n",
|
||||||
|
" def variance(self):\n",
|
||||||
|
" X2 = self.copy()\n",
|
||||||
|
" X2.values = map(lambda x : x**2, X2.values)\n",
|
||||||
|
" return X2.expectation() - self.expectation()**2\n",
|
||||||
|
"\n",
|
||||||
|
" def variance_alt(self):\n",
|
||||||
|
" u = self.expectation()\n",
|
||||||
|
"\n",
|
||||||
|
" X_u = self.copy()\n",
|
||||||
|
" X_u.values = map(lambda x : (x - u) ** 2, X_u.values)\n",
|
||||||
|
"\n",
|
||||||
|
" return X_u.expectation()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Discrete Distributions\n",
|
||||||
|
"### Binomial\n",
|
||||||
|
"- n independent trials\n",
|
||||||
|
"- all trials have a probability p of success\n",
|
||||||
|
"- $ X \\sim B(n, p) $\n",
|
||||||
|
"\n",
|
||||||
|
"### Poisson\n",
|
||||||
|
"- infinite independent trials\n",
|
||||||
|
"- ... at a uniform mean rate\n",
|
||||||
|
"- these are defined by their mean (or expected value), λ\n",
|
||||||
|
"- mean = variance\n",
|
||||||
|
"- given 2 PDs, X and Y with respective means x and y, X + Y has mean x + y. assumes independent X and Y\n",
|
||||||
|
"- $ X \\sim P(λ) $\n",
|
||||||
|
"\n",
|
||||||
|
"### Geometric\n",
|
||||||
|
"- trials until success\n",
|
||||||
|
"- all trials have a probability p of success\n",
|
||||||
|
"- $ X \\sim Geo(p) $\n",
|
||||||
|
"\n",
|
||||||
|
"### Discrete Uniform\n",
|
||||||
|
"- Specific case of a DRV"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class BinomialDistribution:\n",
|
||||||
|
" n: int\n",
|
||||||
|
" p: F\n",
|
||||||
|
" \n",
|
||||||
|
" def __init__(self, n: int, p: F):\n",
|
||||||
|
" self.n, self.p = n, p\n",
|
||||||
|
"\n",
|
||||||
|
" def expectation(self):\n",
|
||||||
|
" return self.n * self.p\n",
|
||||||
|
"\n",
|
||||||
|
" def variance(self):\n",
|
||||||
|
" return self.n * self.p * NOT(self.p)\n",
|
||||||
|
" \n",
|
||||||
|
" def P(self, x: int):\n",
|
||||||
|
" return combinations(self.n, x) * self.p**x * NOT(self.p)**(self.n - x)\n",
|
||||||
|
"\n",
|
||||||
|
"class PoissonDistribution:\n",
|
||||||
|
" u: F\n",
|
||||||
|
"\n",
|
||||||
|
" def __init__(self, u: F):\n",
|
||||||
|
" self.u = u\n",
|
||||||
|
"\n",
|
||||||
|
" def expectation(self):\n",
|
||||||
|
" return self.u\n",
|
||||||
|
"\n",
|
||||||
|
" def variance(self):\n",
|
||||||
|
" return self.u\n",
|
||||||
|
"\n",
|
||||||
|
" def P(self, x: int):\n",
|
||||||
|
" return math.e**-self.u * self.u**x / math.factorial(x)\n",
|
||||||
|
"\n",
|
||||||
|
"class GeometricDistribution:\n",
|
||||||
|
" p: F\n",
|
||||||
|
"\n",
|
||||||
|
" def __init__(self, p: F):\n",
|
||||||
|
" self.p = p\n",
|
||||||
|
" \n",
|
||||||
|
" def expectation(self):\n",
|
||||||
|
" return 1 / self.p\n",
|
||||||
|
"\n",
|
||||||
|
" def variance(self):\n",
|
||||||
|
" return (1 - self.p) / self.p**2\n",
|
||||||
|
" \n",
|
||||||
|
" def P(self, x: int):\n",
|
||||||
|
" return self.p * NOT(self.p)**(x - 1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Chi-squared Tests\n",
|
||||||
|
"\n",
|
||||||
|
"Chi squared stat:\n",
|
||||||
|
"\n",
|
||||||
|
"$ \\frac{(observed - expected)^2}{expected} $\n",
|
||||||
|
"\n",
|
||||||
|
"### Distribution Test\n",
|
||||||
|
"Expected values are calculated by distribution.\n",
|
||||||
|
"\n",
|
||||||
|
"### Independence Test\n",
|
||||||
|
"Expected values are calculated assuming independence using row and column totals.\n",
|
||||||
|
"\n",
|
||||||
|
"= $ \\frac{rowTotal \\times columnTotal}{total} $"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def chi2_stat(observed: list[int], expected: list[int]) -> int:\n",
|
||||||
|
" return sum([\n",
|
||||||
|
" (obs - exp)**2 / exp\n",
|
||||||
|
" for obs, exp in zip(observed, expected)\n",
|
||||||
|
" ])\n",
|
||||||
|
"\n",
|
||||||
|
"def independent_expected(observed: list[list[int]]) -> list[list[int]]:\n",
|
||||||
|
" row_totals = [sum(row) for row in observed]\n",
|
||||||
|
" col_totals = [sum(col) for col in zip(*observed)]\n",
|
||||||
|
" total = sum(row_totals)\n",
|
||||||
|
"\n",
|
||||||
|
" return [\n",
|
||||||
|
" [\n",
|
||||||
|
" row_totals[x] * col_totals[y] / total\n",
|
||||||
|
" for y in range(len(observed[0]))\n",
|
||||||
|
" ]\n",
|
||||||
|
" for x in range(len(observed))\n",
|
||||||
|
" ]\n",
|
||||||
|
"\n",
|
||||||
|
"def flatten(l: list[any]) -> list[any]:\n",
|
||||||
|
" return list(np.array(l).flatten())\n",
|
||||||
|
"\n",
|
||||||
|
"def chi2_critical_value(significance_level: float, degrees_of_freedom: int) -> float:\n",
|
||||||
|
" return chi2.ppf(1 - significance_level, df=degrees_of_freedom)\n",
|
||||||
|
"\n",
|
||||||
|
"def degrees_of_freedom(values: list[list[int]]) -> int:\n",
|
||||||
|
" return (len(values) - 1) * (len(values[0]) - 1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Bivariate Data\n",
|
||||||
|
"\n",
|
||||||
|
"### Product Moment Correlation\n",
|
||||||
|
"$ r = \\frac{\\sum{(x_i - \\bar{x})(y_i - \\bar{y})}}{\\sqrt{\\sum{(x_i - \\bar{x})^2} \\times \\sum{(y_i - \\bar{y})^2}}} $\n",
|
||||||
|
"- $ -1 < r < 1 $\n",
|
||||||
|
"- positive $ r $: positive correlation\n",
|
||||||
|
"- negative $ r $: negative correlation\n",
|
||||||
|
"- $ r = 0 $: no correlation\n",
|
||||||
|
"\n",
|
||||||
|
"### Spearman's Rank Correlation\n",
|
||||||
|
"$ r_s = 1 - \\frac{6\\sum{(x_i - y_i)^2}}{n(n^2 - 1)} $\n",
|
||||||
|
"- used when:\n",
|
||||||
|
" - data is given in a ranked form\n",
|
||||||
|
" - data is not from a bivariate normal distribution (is not linear)\n",
|
||||||
|
"- $ -1 < r_s < 1 $\n",
|
||||||
|
"- positive $ r_s $: positive correlation (not necessarily linear)\n",
|
||||||
|
"- negative $ r_s $: negative correlation (not necessarily linear)\n",
|
||||||
|
"- $ r_s = 0 $: no correlation\n",
|
||||||
|
"\n",
|
||||||
|
"### Linear Regression\n",
|
||||||
|
"$ y = \\bar{y} - b\\bar{x} + bx $ where $ b = \\frac{\\sum{(x_i - \\bar{x})(y_i - \\bar{y})}}{\\sum{(x_i - \\bar{x})^2}} $"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def product_moment_cc(x: list[int], y: list[int]) -> F:\n",
|
||||||
|
" x_avg = F(sum(x), len(x))\n",
|
||||||
|
" y_avg = F(sum(y), len(y))\n",
|
||||||
|
" return sum([\n",
|
||||||
|
" (x_i - x_avg) * (y_i - y_avg)\n",
|
||||||
|
" for x_i, y_i in zip(x, y)\n",
|
||||||
|
" ]) / math.sqrt(sum([\n",
|
||||||
|
" (x_i - x_avg)**2\n",
|
||||||
|
" for x_i in x\n",
|
||||||
|
" ]) * sum([\n",
|
||||||
|
" (y_i - y_avg)**2\n",
|
||||||
|
" for y_i in y\n",
|
||||||
|
" ]))\n",
|
||||||
|
"\n",
|
||||||
|
"def spearman_rank_cc(x: list[int], y: list[int]) -> F:\n",
|
||||||
|
" n = len(x)\n",
|
||||||
|
" return 1 - F(6 * sum([\n",
|
||||||
|
" (x_i - y_i)**2\n",
|
||||||
|
" for x_i, y_i in zip(x, y)\n",
|
||||||
|
" ]), n * (n**2 - 1))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.2"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4,
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
Loading…
Reference in New Issue