suzanne_eng <- readLines("suzanne-cohen-eng.txt")
unigrams <- suzanne_eng %>% paste(collapse=" ") %>%
# tokenize by character (strsplit returns a list, so unlist it)
strsplit(split="") %>% unlist %>%
# remove instances of characters you don't care about
str_remove_all("[,.!'\"]") %>%
str_to_lower() %>%
# make a frequency table of the characters
table
letters_space<-c(letters," ")
x<-rep(0,27);names(x)<-letters_space
for (letter in letters_space) x[letter]<-unigrams[letter]
unigrams<-x
barplot(log(unigrams/sum(unigrams,na.rm=TRUE)+1))
suzanne1<-c(" ",suzanne_eng) %>% paste(collapse="") %>%
# tokenize by character (strsplit returns a list, so unlist it)
strsplit(split="") %>% unlist %>%
# remove instances of characters you don't care about
str_remove_all("[,.!'\"]") %>%
str_to_lower()
suzanne2<-c(suzanne_eng," ") %>% paste(collapse="") %>%
# tokenize by character (strsplit returns a list, so unlist it)
strsplit(split="") %>% unlist %>%
# remove instances of characters you don't care about
str_remove_all("[,.!'\"]") %>%
str_to_lower()
bigrams<-table(suzanne2,suzanne1)
X<-matrix(0,27,27)
row.names(X)<-letters_space
colnames(X)<-letters_space
for (letteri in letters_space)
for (letterj in letters_space)
if ((letteri %in% row.names(bigrams))&&(letterj %in% row.names(bigrams))) X[letteri,letterj]<-bigrams[letteri,letterj]
bigrams<-X
A <- matrix(c(0, 1/2,1,1,0,0,0,1/2,0),3,3)
## Solution 1
eigen.res<-eigen(t(A))
pi<-eigen.res$vectors[,1]
t(pi)%*%A
## [,1] [,2] [,3]
## [1,] -0.6666667+0i -0.6666667+0i -0.3333333+0i
## Solution 2
M<-diag(rep(1,3))-A
M[,3]<-rep(1,3)
solve(t(M),b=c(0,0,1))
## [1] 0.4 0.4 0.2
Input
The observation space \(O=\{o_{1},o_{2},\dots ,o_{N}\}\) , the state space \(S=\{s_{1},s_{2},\dots ,s_{K}\}\) , an array of initial probabilities \(\Pi =(\pi _{1},\pi _{2},\dots ,\pi _{K})\) such that \(\pi _{i}\) stores the probability that \(x_{1}=s_{i}\) , a sequence of observations $ X=(x_{1},x_{2},,x_{T})$ such that $ x_{t}=i$ if the observation at time \(t\) is $ o_{i}$ , transition matrix \(A\) of size $ KK$ such that $ A_{ij}$ stores the transition probability of transiting from state {s_{i}} to state {s_{j}} , emission matrix \(B\) of size \(K\times N\) such that \(B_{ij\) stores the probability of observing \(o_{j}\) from state \(s_{i}\) . Output The most likely hidden state sequence \(Z=(z_{1},z_{2},\ldots ,z_{T})\)
A<-matrix(c(0.3,0.7,0,0,0.9,0.1,0.6,0,0.4),3,3,byrow = TRUE)
B<-matrix(c(0.5,0.2,0.3,0,0,0,0,0,0,0.2,0.7,0.1,0,0,0,0,0,0.1,0,0.5,0.4),7,3)
X<-c(1,3,4,6)
M<-diag(rep(1,3))-A
M[,3]<-rep(1,3)
Pi<-solve(t(M),b=c(0,0,1))
SimulationHMM<-function(Pi,A,B,n){
Z<-rep(0,n)
X<-rep(0,n)
K<-length(Pi)
N<-nrow(B)
Z[1]<-sample(1:K,prob = Pi,size = 1,replace=TRUE)
X[1]<-sample(1:N,prob=B[,Z[1]],size=1)
for (i in 2:n){
Z[i]<- sample(1:K,prob=A[Z[i-1],],size=1)
X[i]<- sample(1:N,prob=B[,Z[i-1]],size=1)
}
return(list(X=X,Z=Z))
}
Hmmsimu<-SimulationHMM(Pi,A,B,100)
plot(Hmmsimu$X,col=Hmmsimu$Z)
SimulationHMMgauss<-function(Pi,A,n){
Z<-rep(0,n)
X<-rep(0,n)
K<-length(Pi)
N<-nrow(B)
Z[1]<-sample(1:K,prob = Pi,size = 1,replace=TRUE)
X[1]<-rnorm(1,mean=Z[1])
for (i in 2:n){
Z[i]<- sample(1:K,prob=A[Z[i-1],],size=1)
X[i]<-rnorm(1,mean=2*Z[i])
}
return(list(X=X,Z=Z))
}
Hmmsimu<-SimulationHMMgauss(Pi,A,100)
plot(Hmmsimu$X,col=Hmmsimu$Z)
Viterbi<-function(Pi,A,B,X){
K<-nrow(A)
T<-length(X)
S<-matrix(0,K,T)
logV<-matrix(-Inf,K,T)
Zest<-rep(0,T)
for (k in 1:K){
logV[k,1]<-log(B[X[1],k])+log(Pi[k])
S[k,1]<-0
}
# Forward
for (t in (2:T))
for (k in (1:K)){
logV[k,t]=max(logV[,t-1]+log(A[,k])+log(B[X[t],k]))
S[k,t-1]=which.max(logV[,t-1]+log(A[,k])+log(B[X[t],k]))
}
# Backward
Zest[T]<-which.max(logV[,T])
for (t in (T-1):1)
Zest[t]<-S[Zest[t+1],t]
return(Zest)
}
Viterbi(Pi,A,B,X)
## [1] 1 2 2 3
#Simulation
# Chaine de Markov sur les états caché
A<-matrix(c(0.95,0.1,0.05,0.9),2,2)
# Loi d'emission
B<-matrix(c(rep(1/6,6),rep(1/10,5),5/10),6,2)
dishonestCasino<-function(n,A,B){
K<-nrow(A) # nb d'état caché
L<-nrow(B) # nb d'état possible pour X_t
# Prenons un Pi uniforme
Pi<-rep(1/K,K)
# X obs, Z caché
Z<-rep(0,n); X<-rep(0,n)
# Simulation de l'état caché
Z[1]<-sample(1:K,prob=Pi,size=1)
for (t in 2:n)
Z[t]<-sample(1:K,prob=A[Z[t-1],],size=1)
for (t in 1:n)
X[t]<-sample(1:L,prob=B[,Z[t]],size=1)
return(list(X=X,Z=Z))
}
dishonestCasino(n=100,A,B)->games
plot(games$X,col=c("green","red")[games$Z])
games$Z
## [1] 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 2 2 1 1 1 1 2 2 2 2 2 2 2 2
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
#table(games$Z,res)