Welcome, guest | Sign In | My Account | Store | Cart
# Shannon Entropy of a string
# = minimum average number of bits per symbol
# required for encoding the string
#
# So the theoretical limit for data compression:
# Shannon Entropy of the string * string length
# FB - 201011291
import math
from sets import Set

st
= 'aabcddddefffg' # input string
# st = '00010101011110' # Shannon entropy for this would be 1 bit/symbol

print 'Input string:'
print st
print
stList
= list(st)
alphabet
= list(Set(stList)) # list of symbols in the string
print 'Alphabet of symbols in the string:'
print alphabet
print
# calculate the frequency of each symbol in the string
freqList
= []
for symbol in alphabet:
    ctr
= 0
   
for sym in stList:
       
if sym == symbol:
            ctr
+= 1
    freqList
.append(float(ctr) / len(stList))
print 'Frequencies of alphabet symbols:'
print freqList
print
# Shannon entropy
ent
= 0.0
for freq in freqList:
    ent
= ent + freq * math.log(freq, 2)
ent
= -ent
print 'Shannon entropy:'
print ent
print 'Minimum number of bits required to encode each symbol:'
print int(math.ceil(ent))

History