From 1d98734702655a2892d1a13eadeb78c737cc9273 Mon Sep 17 00:00:00 2001 From: Gleb Date: Wed, 21 Dec 2022 21:11:18 +0000 Subject: [PATCH] FM Stats Short Half --- main.ipynb | 348 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 main.ipynb diff --git a/main.ipynb b/main.ipynb new file mode 100644 index 0000000..d70d134 --- /dev/null +++ b/main.ipynb @@ -0,0 +1,348 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib widget\n", + "from collections.abc import Iterable\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from scipy.stats import chi2\n", + "import math\n", + "from fractions import Fraction as F" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Main\n", + "This contains automations for A-level Further Maths ordered in the same way as they are on [integral maths](https://my.integralmaths.org/)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def std_deviation(variance):\n", + " return math.sqrt(variance)\n", + "\n", + "def NOT(p):\n", + " return 1 - p\n", + "\n", + "# Independent events\n", + "def AND(*ps):\n", + " return math.prod(ps)\n", + "\n", + "def OR(*ps):\n", + " return 1 - AND(map(NOT, ps))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Permutations and Combinations\n", + "These just use the `math.perm` and `math.comb` functions throughout, which are defined as follows.\n", + "\n", + "- Permutations (pick) are ordered\n", + "- Combinations (choose) are unordered" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def permutations(n: int, take: int) -> int:\n", + " return int(math.factorial(n) / math.factorial(n - take))\n", + "\n", + "def combinations(n: int, take: int) -> int:\n", + " return int(permutations(n, take) / math.factorial(take))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Discrete Random Variables (DRVs)\n", + "\n", + "This section includes basic stats operations on DRVs.\n", + "\n", + "Note:\n", + "- Expected value (expectation) = mean\n", + "- Standard deviation = sqrt(variance)\n", + "- $E(aX + b) = aE(X) + b$\n", + "- $Var(aX + b) = a^2 Var(X)$" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class DiscreteRandomVariable:\n", + " values: list[F]\n", + " probabilities: list[F]\n", + " size: int\n", + "\n", + " # items are in the form of (value, probability)\n", + " # assumed that sum(probabilities) = 1\n", + " def __init__(self, items: list[tuple[F, F]]):\n", + " self.values = []\n", + " self.probabilities = []\n", + " for item in items:\n", + " self.values.append(item[0])\n", + " self.probabilities.append(item[1])\n", + " self.size = len(items)\n", + "\n", + " def copy(self):\n", + " c = DiscreteRandomVariable([])\n", + " c.values = self.values.copy()\n", + " c.probabilities = self.probabilities.copy()\n", + " c.size = self.size\n", + " return c\n", + "\n", + " def expectation(self):\n", + " return sum(map(math.prod, zip(self.values, self.probabilities)))\n", + "\n", + " def variance(self):\n", + " X2 = self.copy()\n", + " X2.values = map(lambda x : x**2, X2.values)\n", + " return X2.expectation() - self.expectation()**2\n", + "\n", + " def variance_alt(self):\n", + " u = self.expectation()\n", + "\n", + " X_u = self.copy()\n", + " X_u.values = map(lambda x : (x - u) ** 2, X_u.values)\n", + "\n", + " return X_u.expectation()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Discrete Distributions\n", + "### Binomial\n", + "- n independent trials\n", + "- all trials have a probability p of success\n", + "- $ X \\sim B(n, p) $\n", + "\n", + "### Poisson\n", + "- infinite independent trials\n", + "- ... at a uniform mean rate\n", + "- these are defined by their mean (or expected value), λ\n", + "- mean = variance\n", + "- given 2 PDs, X and Y with respective means x and y, X + Y has mean x + y. assumes independent X and Y\n", + "- $ X \\sim P(λ) $\n", + "\n", + "### Geometric\n", + "- trials until success\n", + "- all trials have a probability p of success\n", + "- $ X \\sim Geo(p) $\n", + "\n", + "### Discrete Uniform\n", + "- Specific case of a DRV" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "class BinomialDistribution:\n", + " n: int\n", + " p: F\n", + " \n", + " def __init__(self, n: int, p: F):\n", + " self.n, self.p = n, p\n", + "\n", + " def expectation(self):\n", + " return self.n * self.p\n", + "\n", + " def variance(self):\n", + " return self.n * self.p * NOT(self.p)\n", + " \n", + " def P(self, x: int):\n", + " return combinations(self.n, x) * self.p**x * NOT(self.p)**(self.n - x)\n", + "\n", + "class PoissonDistribution:\n", + " u: F\n", + "\n", + " def __init__(self, u: F):\n", + " self.u = u\n", + "\n", + " def expectation(self):\n", + " return self.u\n", + "\n", + " def variance(self):\n", + " return self.u\n", + "\n", + " def P(self, x: int):\n", + " return math.e**-self.u * self.u**x / math.factorial(x)\n", + "\n", + "class GeometricDistribution:\n", + " p: F\n", + "\n", + " def __init__(self, p: F):\n", + " self.p = p\n", + " \n", + " def expectation(self):\n", + " return 1 / self.p\n", + "\n", + " def variance(self):\n", + " return (1 - self.p) / self.p**2\n", + " \n", + " def P(self, x: int):\n", + " return self.p * NOT(self.p)**(x - 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chi-squared Tests\n", + "\n", + "Chi squared stat:\n", + "\n", + "$ \\frac{(observed - expected)^2}{expected} $\n", + "\n", + "### Distribution Test\n", + "Expected values are calculated by distribution.\n", + "\n", + "### Independence Test\n", + "Expected values are calculated assuming independence using row and column totals.\n", + "\n", + "= $ \\frac{rowTotal \\times columnTotal}{total} $" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def chi2_stat(observed: list[int], expected: list[int]) -> int:\n", + " return sum([\n", + " (obs - exp)**2 / exp\n", + " for obs, exp in zip(observed, expected)\n", + " ])\n", + "\n", + "def independent_expected(observed: list[list[int]]) -> list[list[int]]:\n", + " row_totals = [sum(row) for row in observed]\n", + " col_totals = [sum(col) for col in zip(*observed)]\n", + " total = sum(row_totals)\n", + "\n", + " return [\n", + " [\n", + " row_totals[x] * col_totals[y] / total\n", + " for y in range(len(observed[0]))\n", + " ]\n", + " for x in range(len(observed))\n", + " ]\n", + "\n", + "def flatten(l: list[any]) -> list[any]:\n", + " return list(np.array(l).flatten())\n", + "\n", + "def chi2_critical_value(significance_level: float, degrees_of_freedom: int) -> float:\n", + " return chi2.ppf(1 - significance_level, df=degrees_of_freedom)\n", + "\n", + "def degrees_of_freedom(values: list[list[int]]) -> int:\n", + " return (len(values) - 1) * (len(values[0]) - 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bivariate Data\n", + "\n", + "### Product Moment Correlation\n", + "$ r = \\frac{\\sum{(x_i - \\bar{x})(y_i - \\bar{y})}}{\\sqrt{\\sum{(x_i - \\bar{x})^2} \\times \\sum{(y_i - \\bar{y})^2}}} $\n", + "- $ -1 < r < 1 $\n", + "- positive $ r $: positive correlation\n", + "- negative $ r $: negative correlation\n", + "- $ r = 0 $: no correlation\n", + "\n", + "### Spearman's Rank Correlation\n", + "$ r_s = 1 - \\frac{6\\sum{(x_i - y_i)^2}}{n(n^2 - 1)} $\n", + "- used when:\n", + " - data is given in a ranked form\n", + " - data is not from a bivariate normal distribution (is not linear)\n", + "- $ -1 < r_s < 1 $\n", + "- positive $ r_s $: positive correlation (not necessarily linear)\n", + "- negative $ r_s $: negative correlation (not necessarily linear)\n", + "- $ r_s = 0 $: no correlation\n", + "\n", + "### Linear Regression\n", + "$ y = \\bar{y} - b\\bar{x} + bx $ where $ b = \\frac{\\sum{(x_i - \\bar{x})(y_i - \\bar{y})}}{\\sum{(x_i - \\bar{x})^2}} $" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def product_moment_cc(x: list[int], y: list[int]) -> F:\n", + " x_avg = F(sum(x), len(x))\n", + " y_avg = F(sum(y), len(y))\n", + " return sum([\n", + " (x_i - x_avg) * (y_i - y_avg)\n", + " for x_i, y_i in zip(x, y)\n", + " ]) / math.sqrt(sum([\n", + " (x_i - x_avg)**2\n", + " for x_i in x\n", + " ]) * sum([\n", + " (y_i - y_avg)**2\n", + " for y_i in y\n", + " ]))\n", + "\n", + "def spearman_rank_cc(x: list[int], y: list[int]) -> F:\n", + " n = len(x)\n", + " return 1 - F(6 * sum([\n", + " (x_i - y_i)**2\n", + " for x_i, y_i in zip(x, y)\n", + " ]), n * (n**2 - 1))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}