In [1]:
from sympy import *
init_printing()
In [2]:
from sympy.utilities.iterables import partitions, multiset, multiset_permutations
import math
In [3]:
n = Symbol('n')  # sample size
mu = IndexedBase("mu")  # central moments

Suppose $X_i$ are i.i.d. with mean $\mu$ and central moments $\mu_k = E(X-\mu)^k$.

The following function computes $E\left[\left(\sum_{i=1}^n (X_i-\mu)^2\right)^p \left(\sum_{i=1}^n (X_i-\mu)\right)^q \right]$ in terms of $\mu_k$, it will be used later to compute moments of sample variance.

In [4]:
# The follow algorithm is modified from Algorithm 1 of 
# Vegas-Sánchez-Ferrero, Gonzalo, et al. "A direct calculation of moments of the sample variance." 
# Mathematics and Computers in Simulation 82.5 (2012): 790-804
# in order to express in terms of central moments.

def ESX2pSXq(n, p, q, mu = IndexedBase("mu")):
    total = 0
    M = p + q
    for s in range(1, M + 1):
        nCs = binomial(n, s)
        parts = partitions(M, s, size = True)
        for size, part in parts:
            if size != s:
                continue
            part = [k for k, v in part.items() for j in range(v)]
            sigma = int(math.factorial(len(part)) / prod([math.factorial(x) for x in multiset(part).values()]))
            perms = multiset_permutations([i + 1 for i, v in enumerate(part) for j in range(v)])
            for perm in perms:
                c = [0]*M
                for j in range(1, M+1):
                    for i, x in enumerate(perm):
                        if x == j:
                            if i < p:
                                c[j - 1] += 2
                            else:
                                c[j - 1] += 1
                if 1 in c:
                    continue
                e = 1
                for j in range(0, M):
                    if c[j] > 0:
                        e *= mu[c[j]]
                total += nCs * sigma * e
    return total

Example 1: $E\left(\sum_{i=1}^n (X_i-\mu)^2\right) = n Var(X_i)$

In [5]:
ESX2pSXq(n, 1, 0)
Out[5]:
$\displaystyle n {\mu}_{2}$

Example 2: $E\left[\sum_{i=1}^n (X_i-\mu)\right]^2 = n^2 Var(\bar X)$

In [6]:
ESX2pSXq(n, 0, 2)
Out[6]:
$\displaystyle n {\mu}_{2}$

We are ready to compute the moments of sample variance $V_n=S_n^2$ because

$$ \begin{align*} E(V_n^k) &= E\left[\left( \frac{1}{n-1}\sum_{i=1}^n (X_i-\bar X)^2 \right)^k\right] \\ & =E\left[\left( \frac{1}{n-1} \left( \sum_{i=1}^n (X_i-\mu)^2 - \sum_{i=1}^n(\bar X - \mu)^2 \right) \right)^k\right] \\ &= \left( \frac{1}{n-1} \right)^k \sum_{r=0}^k \binom{n}{r} (-1)^{k-r} E\left[\left(\sum_{i=1}^n (X_i-\mu)^2\right)^r \left(\sum_{i=1}^n (\bar X -\mu)^2\right)^{k-r} \right] \\ &= \left( \frac{1}{n-1} \right)^k \sum_{r=0}^k \binom{n}{r} (-1)^{k-r} n^{r-k} E\left[\left(\sum_{i=1}^n (X_i-\mu)^2\right)^r \left(\sum_{i=1}^n (X_i-\mu)\right)^{2(k-r)} \right] \end{align*} $$

In [7]:
def momentV(n, k, mu = IndexedBase("mu")):
    if k == 0:
        return 1
    total = 0
    for r in range(k + 1):
        total += binomial(k, r) * n**r * (-1)**(k-r) * ESX2pSXq(n, r, 2*(k-r), mu)
    return total / n**k / (n-1)**k
In [8]:
m = [simplify(momentV(n, j)) for j in range(0, 5)]
m
Out[8]:
$\displaystyle \left[ 1, \ {\mu}_{2}, \ \frac{n^{2} {\mu}_{2}^{2} - 2 n {\mu}_{2}^{2} + n {\mu}_{4} + 3 {\mu}_{2}^{2} - {\mu}_{4}}{n \left(n - 1\right)}, \ \frac{n^{4} {\mu}_{2}^{3} - 5 n^{3} {\mu}_{2}^{3} + 3 n^{3} {\mu}_{2} {\mu}_{4} + 15 n^{2} {\mu}_{2}^{3} - 9 n^{2} {\mu}_{2} {\mu}_{4} - 6 n^{2} {\mu}_{3}^{2} + n^{2} {\mu}_{6} - 33 n {\mu}_{2}^{3} + 21 n {\mu}_{2} {\mu}_{4} + 12 n {\mu}_{3}^{2} - 2 n {\mu}_{6} + 30 {\mu}_{2}^{3} - 15 {\mu}_{2} {\mu}_{4} - 10 {\mu}_{3}^{2} + {\mu}_{6}}{n^{2} \left(n - 1\right)^{2}}, \ \frac{n^{6} {\mu}_{2}^{4} - 9 n^{5} {\mu}_{2}^{4} + 6 n^{5} {\mu}_{2}^{2} {\mu}_{4} + 44 n^{4} {\mu}_{2}^{4} - 36 n^{4} {\mu}_{2}^{2} {\mu}_{4} - 24 n^{4} {\mu}_{2} {\mu}_{3}^{2} + 4 n^{4} {\mu}_{2} {\mu}_{6} + 3 n^{4} {\mu}_{4}^{2} - 174 n^{3} {\mu}_{2}^{4} + 144 n^{3} {\mu}_{2}^{2} {\mu}_{4} + 168 n^{3} {\mu}_{2} {\mu}_{3}^{2} - 16 n^{3} {\mu}_{2} {\mu}_{6} - 24 n^{3} {\mu}_{3} {\mu}_{5} - 12 n^{3} {\mu}_{4}^{2} + n^{3} {\mu}_{8} + 513 n^{2} {\mu}_{2}^{4} - 432 n^{2} {\mu}_{2}^{2} {\mu}_{4} - 520 n^{2} {\mu}_{2} {\mu}_{3}^{2} + 48 n^{2} {\mu}_{2} {\mu}_{6} + 72 n^{2} {\mu}_{3} {\mu}_{5} + 42 n^{2} {\mu}_{4}^{2} - 3 n^{2} {\mu}_{8} - 885 n {\mu}_{2}^{4} + 690 n {\mu}_{2}^{2} {\mu}_{4} + 840 n {\mu}_{2} {\mu}_{3}^{2} - 64 n {\mu}_{2} {\mu}_{6} - 104 n {\mu}_{3} {\mu}_{5} - 60 n {\mu}_{4}^{2} + 3 n {\mu}_{8} + 630 {\mu}_{2}^{4} - 420 {\mu}_{2}^{2} {\mu}_{4} - 560 {\mu}_{2} {\mu}_{3}^{2} + 28 {\mu}_{2} {\mu}_{6} + 56 {\mu}_{3} {\mu}_{5} + 35 {\mu}_{4}^{2} - {\mu}_{8}}{n^{3} \left(n - 1\right)^{3}}\right]$

Similar to want we did in the previous post, we consider Taylor’s expanding $g(x) = \sqrt{x}$.

In [9]:
x = Symbol("x")
sigma = Symbol("sigma", positive = True)
lam = IndexedBase("lambda")
In [10]:
g = sqrt(x).series(x, x0=sigma**2, n=5)
g
Out[10]:
$\displaystyle - \frac{5 \left(- \sigma^{2} + x\right)^{4}}{128 \sigma^{7}} + \frac{\left(- \sigma^{2} + x\right)^{3}}{16 \sigma^{5}} - \frac{\left(- \sigma^{2} + x\right)^{2}}{8 \sigma^{3}} + \frac{- \sigma^{2} + x}{2 \sigma} + \sigma + O\left(\left(- \sigma^{2} + x\right)^{5}; x\rightarrow \sigma^{2}\right)$

Now, we are ready to compute $E(S_n) = Eg(S_n^2)$ in terms of the scaled central moments $\lambda_k = \mu_k / \sigma^k$.

Note that $\gamma = \lambda_3$ and $\kappa = \lambda_4$ are usually called skewness and kurtosis.

In [11]:
g_coeffs = list(reversed(g.removeO().as_poly(x).all_coeffs()))
g_coeffs
Out[11]:
$\displaystyle \left[ \frac{35 \sigma}{128}, \ \frac{35}{32 \sigma}, \ - \frac{35}{64 \sigma^{3}}, \ \frac{7}{32 \sigma^{5}}, \ - \frac{5}{128 \sigma^{7}}\right]$
In [12]:
ES = sum([g_coeffs[i] * m[i]  for i in range(0, 5)]).subs({
    mu[2]: sigma**2, 
    mu[3]: lam[3] * sigma**3, 
    mu[4]: lam[4] * sigma**4, 
    mu[6]: lam[6] * sigma**6})
In [13]:
ES.simplify()
Out[13]:
$\displaystyle \frac{128 n^{6} \sigma^{8} - 16 n^{5} \sigma^{8} {\lambda}_{4} - 368 n^{5} \sigma^{8} - 48 n^{4} \sigma^{8} {\lambda}_{3}^{2} - 15 n^{4} \sigma^{8} {\lambda}_{4}^{2} + 54 n^{4} \sigma^{8} {\lambda}_{4} + 8 n^{4} \sigma^{8} {\lambda}_{6} + 305 n^{4} \sigma^{8} - 336 n^{3} \sigma^{8} {\lambda}_{3}^{2} + 60 n^{3} \sigma^{8} {\lambda}_{4}^{2} - 90 n^{3} \sigma^{8} {\lambda}_{4} - 4 n^{3} \sigma^{8} {\lambda}_{6} - 89 n^{3} \sigma^{8} + 120 n^{3} \sigma^{3} {\lambda}_{3} {\mu}_{5} - 5 n^{3} {\mu}_{8} + 1984 n^{2} \sigma^{8} {\lambda}_{3}^{2} - 210 n^{2} \sigma^{8} {\lambda}_{4}^{2} + 1222 n^{2} \sigma^{8} {\lambda}_{4} - 156 n^{2} \sigma^{8} {\lambda}_{6} - 1011 n^{2} \sigma^{8} - 360 n^{2} \sigma^{3} {\lambda}_{3} {\mu}_{5} + 15 n^{2} {\mu}_{8} - 3920 n \sigma^{8} {\lambda}_{3}^{2} + 300 n \sigma^{8} {\lambda}_{4}^{2} - 3030 n \sigma^{8} {\lambda}_{4} + 292 n \sigma^{8} {\lambda}_{6} + 3585 n \sigma^{8} + 520 n \sigma^{3} {\lambda}_{3} {\mu}_{5} - 15 n {\mu}_{8} + 2800 \sigma^{8} {\lambda}_{3}^{2} - 175 \sigma^{8} {\lambda}_{4}^{2} + 2100 \sigma^{8} {\lambda}_{4} - 140 \sigma^{8} {\lambda}_{6} - 3150 \sigma^{8} - 280 \sigma^{3} {\lambda}_{3} {\mu}_{5} + 5 {\mu}_{8}}{128 n^{3} \sigma^{7} \left(n^{3} - 3 n^{2} + 3 n - 1\right)}$

We then expand it in terms of $1/n$

In [14]:
w = Symbol("1/n")
In [15]:
ES.subs(n, 1/w).series(w, 0, 3)
Out[15]:
$\displaystyle \sigma + 1/n \left(- \frac{\sigma {\lambda}_{4}}{8} + \frac{\sigma}{8}\right) + 1/n^{2} \left(- \frac{3 \sigma {\lambda}_{3}^{2}}{8} - \frac{15 \sigma {\lambda}_{4}^{2}}{128} + \frac{3 \sigma {\lambda}_{4}}{64} + \frac{\sigma {\lambda}_{6}}{16} - \frac{31 \sigma}{128}\right) + O\left(1/n^{3}\right)$

Finally, we get

$$ E(S_n) =\sigma - \frac{\sigma}{8n}(\lambda_4 - 1) + \frac{\sigma}{128 n^2}(8\lambda_6 - 15 \lambda_4^2 - 48 \lambda_3^2 + 6\lambda_4- 31) + o(n^{-2}) $$

and $$ MSE(S_n) = 2 \sigma^2 - 2 \sigma E(S_n) $$

In [16]:
MSES = 2*sigma**2 - 2*sigma*ES
MSES.subs(n, 1/w).series(w, 0, 3)
Out[16]:
$\displaystyle 1/n \left(\frac{\sigma^{2} {\lambda}_{4}}{4} - \frac{\sigma^{2}}{4}\right) + 1/n^{2} \left(\frac{3 \sigma^{2} {\lambda}_{3}^{2}}{4} + \frac{15 \sigma^{2} {\lambda}_{4}^{2}}{64} - \frac{3 \sigma^{2} {\lambda}_{4}}{32} - \frac{\sigma^{2} {\lambda}_{6}}{8} + \frac{31 \sigma^{2}}{64}\right) + O\left(1/n^{3}\right)$

Therefore $$ MSE(S_n) = \frac{\sigma^2}{4n} \left(\lambda_4 - 1\right) + \frac{\sigma^2}{64n^2} \left(48 \lambda_3^2 + 15 \lambda_4^2 - 6 \lambda_4 - 8 \lambda_6 + 31\right) + o(n^{-2}). $$

Comparing the MSE of $\hat \sigma_n = \sqrt{\frac{\sum(X_i - \bar X)^2}{n}}$,

$$ MSE(\hat \sigma_n) = E\left[\sqrt{\frac{n-1}{n}} S_n - \sigma\right]^2 = \frac{n-1}{n} E(S_n^2) - 2 \sqrt{\frac{n-1}{n}}\sigma E(S_n) + \sigma^2. $$

In [17]:
MSEsigmahat = (n-1)/n * sigma**2 - 2 * sigma * sqrt((n-1)/n) * ES + sigma**2
In [18]:
MSEsigmahat.subs(n, 1/w).series(w, 0, 3)
Out[18]:
$\displaystyle 1/n \left(\frac{\sigma^{2} {\lambda}_{4}}{4} - \frac{\sigma^{2}}{4}\right) + 1/n^{2} \left(\frac{3 \sigma^{2} {\lambda}_{3}^{2}}{4} + \frac{15 \sigma^{2} {\lambda}_{4}^{2}}{64} - \frac{7 \sigma^{2} {\lambda}_{4}}{32} - \frac{\sigma^{2} {\lambda}_{6}}{8} + \frac{55 \sigma^{2}}{64}\right) + O\left(1/n^{3}\right)$

which gives

$$ MSE(\hat \sigma_n) = \frac{\sigma^2}{4n} \left(\lambda_4 - 1\right) + \frac{\sigma^2}{64n^2} \left(48 \lambda_3^2 + 15 \lambda_4^2 - 14 \lambda_4 - 8 \lambda_6 + 55\right) + o(n^{-2}) $$

In [20]:
(MSES - MSEsigmahat).subs(n, 1/w).series(w, 0, 3).coeff(w**2)
Out[20]:
$\displaystyle \frac{\sigma^{2} {\lambda}_{4}}{8} - \frac{3 \sigma^{2}}{8}$
In [ ]: