-
We build a Partition, on
a virtual sequence of length 10000, in 23 segments, such that each
segment is at least 50 positions long.
nbcl=23 # for example
import lcompte
lp=lcompte.Lproportion(fic="lprop1")
import partition
p=partition.Partition()
p.build_random(10000,nbcl,ec=50)
- We attribute predictor numbers to the
Segments of the
Partition.
lnum=lp.num()
import random
numr=random.choice(lnum)
for s in p:
s.g_num([numr])
x=random.choice(lnum)
while x==numr:
x=random.choice(lnum)
numr=x
- Then we build a Sequence from
the Partition and the
Lproportion.
import sequence
s=sequence.Sequence()
s.read_Part(p,lp)
- We compute the segmentation probabilities on the sequence from
the models used to build the sequence, up to 100 classes.
import lexique
lx=lexique.Lexique(Lprop=lp)
lprob=lx.probability(s,100)
from rpy import * #using R to plot it
r.plot(range(1,len(lprob)+1),lprob,ylab="Log-probability",xlab="Nbe of classes")
- We compute the logarithm of bayesian factor: the probability of
the sequence under the segmentations in k-segments for all k>=n,
divided by the probability of the sequence under the segmentations
in k-segments for all k<n:
import math
def expmean(l): #computes the logarithm of the mean of exponentials
m=max(l)
x=0
for i in l:
x+=math.exp(i-m)
return(math.log(x/len(l))+m)
lbay=[]
for i in range(1,len(lprob)):
lbay.append(expmean(lprob[i:])-expsmean(lprob[:i]))
r.plot(range(2,len(lprob)+1),lbay,ylab="Log of the Bayes factor",xlab="Nbe of classes")
- We select the biggest number of classes that is over 3.2.
r.abline(h=3.2,col="red")
nbc=lbay.index(filter(lambda x: x>=3.2,lbay)[-1])+2
print nbc