Homework1 for STAT6202
作者:
Yang Liu
最近上传:
6 年前
许可:
LaTeX Project Public License 1.3c
摘要:
STAT6202 Homework template
\begin
Discover why 18 million people worldwide trust Overleaf with their work.
\begin
Discover why 18 million people worldwide trust Overleaf with their work.
% --------------------------------------------------------------
% This is all preamble stuff that you don't have to worry about.
% Head down to where it says "Start here"
% --------------------------------------------------------------
\documentclass[a4paper, 11pt]{article}
\usepackage{comment} % enables the use of multi-line comments (\ifx \fi)
\usepackage{lipsum} %This package just generates Lorem Ipsum filler text.
\usepackage{fullpage} % changes the margin
\usepackage[margin=1in]{geometry}
\usepackage{amsmath,amsthm,amssymb,amsfonts}
\usepackage[english]{babel}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amsfonts}
\usepackage[colorinlistoftodos]{todonotes}
\usepackage{enumitem}
\usepackage{stackrel}
\usepackage{mathtools,bm}
\usepackage{mathrsfs}
\usepackage{comment} % enables the use of multi-line comments (\ifx \fi)
\usepackage{lipsum} %This package just generates Lorem Ipsum filler text.
\usepackage{fullpage} % changes the margin
\usepackage[margin=1in]{geometry}
\usepackage{amsmath,amsthm,amssymb,amsfonts}
\usepackage{float}
\usepackage[english]{babel}
\usepackage[utf8]{inputenc}
\usepackage{amsmath,amsfonts}
\usepackage[colorinlistoftodos]{todonotes}
\usepackage{enumitem}
\usepackage{stackrel}
\usepackage{mathtools,bm}
\usepackage{graphicx}
\usepackage{dsfont}
\newenvironment{theorem}[2][Theorem]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{lemma}[2][Lemma]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{exercise}[2][Exercise]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{reflection}[2][Reflection]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{proposition}[2][Proposition]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\newenvironment{corollary}[2][Corollary]{\begin{trivlist}
\item[\hskip \labelsep {\bfseries #1}\hskip \labelsep {\bfseries #2.}]}{\end{trivlist}}
\begin{document}
\title{Homework 1 STAT 6202}%replace X with the appropriate number
\author{Yang Liu\\ %replace with your name
Instructor: Professor Tapan K. Nayak } %if necessary, replace with your course title
\maketitle
\hrule
\section*{1.} Let $X_1,\cdots,X_n$ be i.i.d. Bernoulli variables with success probability $\theta$, when $n>2$, and let $T=\sum_{i=1}^{n}X_i$. Derive the conditional distribution $X_1,\cdots,X_n$ given $T=t$.
\begin{proof}
Since $X_1,\cdots,X_n\stackrel{i.i.d.}{\sim}Bernoulli(\theta)$, and $T=\sum_{i=1}^nX_i\sim Binomial (n,\theta)$
\begin{align*}
P\left(X_1=x_1,\cdots,X_n=x_n\right)& =\prod_{i=1}^{n}\theta^{x_i}(1-\theta)^{1-x_i} \\
P\left(X_1=x_1,\cdots,X_n=x_n, T=\sum_{i=1}^nX_i=t\right)& =\theta^t(1-\theta)^{n-t}\\
P\left(\left.X_{1}=x_{1},\cdots,X_{n}=x_{n}\right|\sum_{i=1}^{n}X_{i}=t\right)& =\frac{\theta^t(1-\theta)^{n-t}}{\left(\begin{array}{c}
n\\
t
\end{array}\right)\theta^{t}\left(1-\theta\right)^{n-t}}\\
& = \frac{1}{\left(\begin{array}{c}
n\\
t
\end{array}\right)}
\end{align*}
\end{proof}
\section*{2.} Suppose $X_1$ and $X_2$ are iid $Poisson(\theta)$ random variables and let $T = X_1 + 2X_2$.
\begin{enumerate}[label=(\alph*)]
\item Find the conditional distribution of $(X_1, X_2)$ given $T = 7$.
\item For $\theta = 1$ and $\theta = 2$, respectively, calculate all probabilities in the above conditional
distribution and present the two conditional distributions numerically.
\end{enumerate}
\begin{proof}
\begin{enumerate}[label=(\alph*)]
\item Since $X_1,X_2\stackrel{i.i.d.}{\sim }Poisson (\theta)$, then we have
\begin{align*}
\left \lbrace X_1+2X_2=7 \right\rbrace= \left\lbrace (X_1=1,X_2=3), (X_1=3,X_2=2),(X_1=5,X_2=1),(X_1=7,X_2=0)\right\rbrace
\end{align*}
and $(X_1=1,X_2=3), (X_1=3,X_2=2),(X_1=5,X_2=1),(X_1=7,X_2=0)$ are mutually exclusive, then
\begin{align*}
P\left(T=7\right)& =P\left(X_1=1,X_2=3\right)+P \left(X_1=3,X_2=2\right)\\&\ \ +P\left(X_1=5,X_2=1\right)+P(X_1=7,X_2=0)\\
& = \frac{\theta}{1}e^{-\theta}\cdot \frac{\theta^3}{3!}e^{-\theta}+ \frac{\theta^3}{3!}e^{-\theta}\cdot \frac{\theta^2}{2!}e^{-\theta}+\frac{\theta^5}{5!}e^{-\theta}\cdot \frac{\theta^1}{1!}e^{-\theta}+ \frac{\theta^7}{7!}e^{-\theta}\cdot e^{-\theta}\\
& = \frac{\theta^4e^{-2\theta}}{6}\left(1+ \frac{\theta}{2}+\frac{\theta^2}{20}+\frac{\theta^3}{840}\right)
\end{align*}
Then the conditional distribution of $(X_1,X_2)$ given $T=7$ is
\begin{align*}
P\left(\left.X_1=1,X_2=3\right|T=7\right)& = \frac{P\left(X_1=1,X_2=3\right)}{P\left(T=7\right)}\\
& = \frac{ \frac{\theta^4e^{-2\theta}}{6}}{ \frac{\theta^4e^{-2\theta}}{6}\left(1+ \frac{\theta}{2}+\frac{\theta^2}{20}+\frac{\theta^3}{840}\right)}\\
& = \frac{840}{840+420\theta+42\theta^2+\theta^3}\\
P\left(\left.X_1=3,X_2=2\right|T=7\right)& =\frac{P\left(X_1=3,X_2=2\right)}{P\left(T=7\right)} \\& =\frac{ \frac{\theta^4e^{-2\theta}}{6}\cdot \frac{\theta}{2}
}{ \frac{\theta^4e^{-2\theta}}{6}\left(1+ \frac{\theta}{2}+\frac{\theta^2}{20}+\frac{\theta^3}{840}\right)}\\
& = \frac{420\theta}{840+420\theta+42\theta^2+\theta^3} \\P\left(\left.X_1=5,X_2=1\right|T=7\right)& =\frac{P\left(X_1=5,X_2=1\right)}{P\left(T=7\right)} \\
& =\frac{ \frac{\theta^4e^{-2\theta}}{6}\cdot \frac{\theta^2}{20}
}{ \frac{\theta^4e^{-2\theta}}{6}\left(1+ \frac{\theta}{2}+\frac{\theta^2}{20}+\frac{\theta^3}{840}\right)}\\
& = \frac{42\theta^2}{840+420\theta+42\theta^2+\theta^3}\\
P\left(\left.X_1=7,X_2=0\right|T=7\right)& =\frac{P\left(X_1=7,X_2=0\right)}{P\left(T=7\right)} \\
& =\frac{ \frac{\theta^4e^{-2\theta}}{6}\cdot \frac{\theta^3}{840}
}{ \frac{\theta^4e^{-2\theta}}{6}\left(1+ \frac{\theta}{2}+\frac{\theta^2}{20}+\frac{\theta^3}{840}\right)}\\
& = \frac{\theta^3}{840+420\theta+42\theta^2+\theta^3}
\end{align*}
\item
The conditional distribution of $(X_1,X_2)|T=7$ is given in table 1.
\begin{table}[H]
\centering
\caption{Conditional distribution of $(X_1,X_2)$ given $T=7$}
\begin{tabular}{c|cccc}
\hline
$P(X_1=x_1,X_2=x_2|T=7)$ & $(x_1=1,x_2=3)$ & $(x_1=3,x_2=2)$ & $(x_1=5,x_2=1)$& $(x_1=7,x_2=0)$ \\
\hline
$\theta=1$ & $\frac{840}{1303}$ & $\frac{420}{1303}$ & $\frac{42}{1303}$& $\frac{1}{1303}$ \\
\hline
$\theta=2$ & $\frac{840}{1856}$& $\frac{840}{1856}$ & $\frac{169}{1856}$ & $\frac{8}{1856}$ \\
\hline
\end{tabular}
\end{table}
\end{enumerate}
\end{proof}
\section*{3.} Let $X_1,\cdots,X_n$ be i.i.d. random variables with mean $\mu$ and variance $\sigma^2$. Let $\bar{X}$ denote the sample mean and $V=\sum_{i=1}^{n}\left(X_i-\bar{X}\right)^2$.
\begin{enumerate}[label=(\alph*)]
\item Derive the expected value of $\bar{X}$ and $V$.
\item Further suppose that $X_1,\cdots,X_n$ are normally distributed. Let $A_{n\times n} = ((a_{ij}))$
be an orthogonal matrix whose first row is $(\frac{1}{\sqrt{n}},\cdots,\frac{1}{\sqrt{n}})$ and let $Y = AX$, where
$Y = (Y_1,\cdots, Y_n)'$ and $X = (X_1,\cdots,X_n)$ are (column) vectors. (It is not necessary to
know $a_{ij}$ for $i = 2,\cdots, n$, $j = 1,\cdots, n$ for answering the following questions.)
\begin{enumerate}[label=(\roman*)]
\item Find $\sum_{j=1}^n a_{ij}$ for $i=1,\cdots,n$ and show that
$\sum_{i=1}^n Y_i^2 = \sum_{i=1}^nX_i^2$ (Use properties
of orthogonal matrices.)
\item Express $\bar{X}$ and $V$ in terms (or as functions) of $Y_1,\cdots,Y_n$.
\item Use (only) \textit{transformation of variables} approach to find the joint distribution of
$Y_1,\cdots,Y_n$. Are $Y_1,\cdots,Y_n$ independently distributed and what are their marginal distributions?
\item Prove that $\bar{X}$ and $V$ are independent given their marginal distributions.
\end{enumerate}
\end{enumerate}
\begin{proof}
\begin{enumerate}[label=(\alph*)]
\item Since $E[X_i]=\mu$ and $Var[X_i]=\sigma^2$ for $i=1,\cdots,n$
\begin{align*}
E[\bar{X}]&= E\left[\frac{\sum_{i=1}^m X_i}{n}\right]= \frac{\sum_{i=1}^n E[X_i]}{n}= \frac{n\mu}{n}=\mu
\end{align*}
\begin{align*}
Var\left[\bar{X}\right]&=E\left[\left(\bar{X}-\mu\right)^2\right]= Var\left[\frac{\sum_{i=1}^nX_i}{n}\right]\\&= \frac{\sum_{i=1}^n Var[X_i]}{n^2} = \frac{n\sigma^2}{n^2}=\frac{\sigma^2}{n}
\end{align*}
\begin{align*}
E\left[V\right]& = E\left[\sum_{i=1}^n\left(X_i-\bar{X}\right)^2\right]=E\left[\sum_{i=1}^n\left((X_i-\mu)-(\bar{X}-\mu)\right)^2\right]\\
& = E\left[ \sum_{i=1}^n \left(X_i-\mu\right)^2 \right]+ n E\left[\left(\bar{X}-\mu\right)^2\right]-2E\left[ \sum_{i=1}^n (X_i-\mu)(\bar{X}-\mu)\right]\\& = E\left[ \sum_{i=1}^n \left(X_i-\mu\right)^2 \right]- n E\left[\left(\bar{X}-\mu\right)^2\right]\\
&= n Var[X_i]- nVar[\bar{X}]\\
& = n\sigma^2 - n\cdot\frac{\sigma^2}{n}=(n-1)\sigma^2
\end{align*}
Or since
\begin{align*}
E\left[\bar{X}^2\right]& =Var[\bar{X}]+E[\bar{X}^2]\\
& = \frac{\sigma^2}{n}+\mu^2
\end{align*}
\begin{align*}
E\left[V\right]& = E\left[\sum_{i=1}^n\left(X_i-\bar{X}\right)^2\right]=E\left[ \sum_{i=1}^n X_i^2-2\sum_{i=1}X_i\bar{X}^2+ n\bar{X}^2\right]\\
& = E\left[\sum_{i=1}^nX_i^2-n\bar{X}^2\right]\\
& =n\cdot\left[\sigma^2+\mu^2\right]-n\cdot\left[ \frac{\sigma^2}{n}+\mu^2\right]\\
& = (n-1)\sigma^2
\end{align*}
\item
\begin{enumerate}[label=(\roman*)]
\item Due to the orthogonality of $A$, $A'A=AA'=I_{n\times n}$ ($I_{n\times n}$ is diagonal matrix of 1's. Let $A=(a_{1\cdot},\cdots,a_{n\cdot })'$ where $a_{j\cdot}$ is the $j^{th}$ row vector. Then we have for $i,j=1,\cdots,n$
$$a_{i\cdot }a_{i\cdot}'=1 \ and \ a_{i\cdot }a_{j\cdot }'=0$$
\begin{align*}
a_{1\cdot}a_{1\cdot}'&=\sum_{k=1}^n \frac{1}{\sqrt{n}}\cdot \frac{1}{\sqrt{n}}=\frac{\sum_{j=1}^na_{1j}}{\sqrt{n}}=1
\\
a_{i\cdot}a_{1\cdot}'&=\sum_{j=1}^na_{ij}\cdot\frac{1}{\sqrt{n}}=\frac{\sum_{j=1}^na_{ij}}{\sqrt{n}}=0
\end{align*}
Hence $\sum_{j=1}^n a_{ij}=\sqrt{n}$ for $j=1$ and $\sum_{j=1}^n a_{ij}=0$ for $j=2,\cdots,n$.
\begin{align*}
\sum_{i=1}^n Y_i ^2& =Y'Y = X'A'AX=X'(A'A)X\\
& = X'X=\sum_{i=1}^n X_i^2
\end{align*}
\item
Note that $Y_1= \sum_{i=1}^n\frac{1}{\sqrt{n}}\cdot X_i= \frac{\sum_{i=1}^nX_i}{\sqrt{n}}=\sqrt{n}\cdot\bar{X}$
\begin{align*}
\sum_{i=2}^nY_i^2&= Y'Y-Y_1^2=\sum_{i=1}^n X_i^2-\left(\sqrt{n}\bar{X}\right)^2\\
& = \sum_{i=1}^nX_i-n\bar{X}^2 \\
& =\sum_{i=1}^n \left(X_i-\bar{X}\right)^2
\end{align*}
Therefore $\bar{X}=\frac{Y_1}{\sqrt{n}}$ and $\sum_{i=1}^n \left(X_i-\bar{X}\right)^2 = \sum_{i=2}^n Y_i^2$
\item since $X_1,\cdots,X_n\stackrel{i.i.d.}{\sim}N\left(\mu,\sigma^2\right)$
\begin{align*}
f_{X_1,\cdots,X_n}\left(x_1,\cdots,x_n\right)& = \prod_{i=1}^n \frac{1}{\sqrt{2\pi}\sigma}e^{-\frac{(x_i-\mu)^2}{2\sigma^2}}\\
& = \left(2\pi\right)^{-\frac{n}{2}}\sigma^{-n}e^{-\frac{\sum_{i=1}^n\left(x_i-\mu\right)^2}{2\sigma^2}}\\
& = \left(2\pi\right)^{-\frac{n}{2}}\sigma^{-n}e^{-\frac{(X-\mathbf{1}\mu)'(X-\mathbf{1}\mu)}{2\sigma^2}}
\end{align*}
where $\mathbf{1}=(1,\cdots,1)'$. Let $A=(a_{\cdot1},\cdots,a_{\cdot n})$, $A'=(a_{\cdot 1},\cdots,a_{\cdot n})'$ where $a_{\cdot j}$ is the $j^{th}$ column vector,
since $Y=AX$, $X=A'AX=A'Y$, $\frac{d}{dY}X= A$, $\left|\frac{d}{dY}X\right|=\det(A)=\det(A'A)=1$, then we have
\begin{align*}
f_{Y_1,\cdots,Y_n}\left(y_1,\cdots,y_n\right)& =\left. f_{X_1,\cdots,X_n}(x_1,\cdots,x_n)\left|\frac{d}{dY}X\right|\right|_{X=A'Y}\\
& =\left. \left(2\pi\right)^{-\frac{n}{2}}\sigma^{-n}e^{-\frac{(X-\mu)'(X-\mu)}{2\sigma^2}}\right|_{X=A'Y}\\
& = \left(2\pi\right)^{-\frac{n}{2}}\sigma^{-n}e^{-\frac{(A'Y-\mathbf{1}\mu)'(A'Y-\mathbf{1}\mu)}{2\sigma^2}}\\
& = \left(2\pi\right)^{-\frac{n}{2}}\sigma^{-n}e^{-\frac{(Y-A\mathbf{1}\mu)'AA'(Y-A\mathbf{1}\mu)}{2\sigma^2}}\\
& = \left(2\pi\right)^{-\frac{n}{2}}\sigma^{-n}e^{-\frac{(Y-A\mathbf{1}\mu)'(Y-A\mathbf{1}\mu)}{2\sigma^2}}\\
& = \left(2\pi\right)^{-\frac{n}{2}}\sigma^{-n}e^{-\frac{\sum_{i=1}^n(y_i-a_{i\cdot }\mathbf{1}\mu)^2}{2\sigma^2}}\\
& = \left(2\pi\right)^{-\frac{n}{2}}\sigma^{-n}e^{-\frac{\sum_{i=1}^n\left(y_i-\sum_{j=1}^na_{ij}\mu\right)^2}{2\sigma^2}}\\
& = \frac{1}{\sqrt{2\pi}\sigma} e^{-\frac{(y_1-\sqrt{n}\mu)^2}{2\sigma^2}}\cdot\prod_{i=2}^n \frac{1}{\sqrt{2\pi}\sigma} e^{-\frac{y_i^2}{2\sigma^2}}
\end{align*}
The last equation is due to (b) (i). Note that $E[Y]= AE[X]=A\mathbf{1}\mu$,$Var[Y]=A'Var[X]A=A'A\sigma^2=I\cdot\sigma^2$, therefore $Y_1\sim N(\sqrt{n}\mu,\sigma^2 )\perp Y_2,\cdots,Y_n\stackrel{i.i.d.}{\sim}N(0,\sigma^2)$
Or
\begin{align*}
\sum_{i=1}^n \left(X_i-\mu\right)^2 & = \sum_{i=1}^nX_i^2-2\mu\sum_{i=1}^nX_i+n\mu^2\\
& = \sum_{i=1}^n Y_i^2-2\sqrt{n} Y_i+n\mu^2\\
& = \sum_{i=2}^nY_i^2+(Y_1-\sqrt{n}\mu)^2
\end{align*}
The second equation is due to (b) (i). Hence
\begin{align*}
f_{Y_1,\cdots,Y_n}\left(y_1,\cdots,y_n\right)& =\left. f_{X_1,\cdots,X_n}(x_1,\cdots,x_n)\left|\frac{d}{dY}X\right|\right|_{X=A'Y}\\
& = \frac{1}{\sqrt{2\pi}\sigma} e^{-\frac{(y_1-\sqrt{n}\mu)^2}{2\sigma^2}}\cdot\prod_{i=2}^n \frac{1}{\sqrt{2\pi}\sigma} e^{-\frac{y_i^2}{2\sigma^2}}
\end{align*}
\item Since $Y_1\sim N(\sqrt{n}\mu,\sigma^2 )\perp Y_2,\cdots,Y_n\stackrel{i.i.d.}{\sim}N(0,\sigma^2)$, $\bar{X}=\frac{Y_1}{\sqrt{n}}\sim N\left(\mu,\frac{\sigma^2}{\sqrt{n}}\right)\perp Y_2,\cdots,Y_n$ and $\frac{Y_2^2}{\sigma^2},\cdots,\frac{Y_n^2}{\sigma^2}\stackrel{i.i.d.}{\sim}\chi^2_1$, then $\sum_{i=2}^n \frac{Y_i^2}{\sigma^2}\sim\chi^2_{n-1}$. Therefore $\bar{X}\sim N\left(\mu,\frac{\sigma^2}{\sqrt{n}}\right)\perp V=\sum_{i=2}^n Y_i^2\sim \sigma^2\cdot\chi^2_{n-1}$
\end{enumerate}
\end{enumerate}
\end{proof}
\section*{4.} Consider a large population of individuals and let $\theta$ denote the (unknown) proportion of the population belonging to a sensitive group A (e.g. drug users).
Suppose, we randomly select $n$ individuals from the population and ask each person to select a card from a deck and answer the question written on the card. Each card in the deck has one of the two questions: $Q_1$: Do you belong to A? and $Q_2$: Do you not belong to A? Also, 85\% percent of the cards ask $Q_1$ and the remaining 15\% ask $Q_2$.
Assume that each person answers Yes or No truthfully to the selected question. For
$i = 1,\cdots,n$, let $X_i = 1$ if the $i^{th}$ person answers 'Yes' otherwise $X_i = 0$. So, the data are
the observed values of $X_1, \cdots,X_n$.
Give the joint distribution of $X_1,\cdots,X_n$ and the distribution of the total number of
Yes responses.
\begin{proof}
We first consider to calculate the probability for the $i^{th}$ person to answer 'Yes'
\begin{align*}
P(X_i=1)&= \ \ \ P(answer \ Q_1)\cdot P\left(\left.'Yes' \ as\ response\right| answer \ Q_1 \right)\\ & \ \ \ \ + P(answer \ Q_2)\cdot P\left(\left.'Yes' \ as \ response\right| answer \ Q_2 \right)\\
& = 0.85\times\theta + 0.15\times(1-\theta)\\
& = 0.15+ 0.7\theta
\end{align*}
Then we have $X_1,\cdots,X_n\stackrel{i.i.d.}{\sim}Bernoulli \left( 0.15+ 0.7\theta\right)$, therefore
\begin{align*}
P\left(X_1=x_1,\cdots,X_n=x_n\right)& = \prod_{i=1}^n \left(0.15+0.7\theta\right)^{x_i}\left(1-\left(0.15+0.7\theta\right)\right)^{1-x_i}\\& = \left(0.15+0.7\theta\right)^{\sum_{i=1}^nx_i}\left(0.85-0.7\theta\right)^{n-\sum_{i=1}^nx_i}
\end{align*}
Let $Y_n =\sum_{i=1}^nX_i$ be the total number of 'Yes' response, then $Y_n\sim Binomial\left(n, 0.15+0.7\theta\right)$
\begin{align*}
P\left(Y_n=y\right)= \left(\begin{array}{c}
n\\
y
\end{array}\right)(0.15+0.7\theta)^y(0.85-0.7\theta)^{n-y}\ \ for \ y=0,\cdots,n
\end{align*}
\end{proof}
\end{document}