Current location - Loan Platform Complete Network - Big data management - How to implement double word frequency statistics for short English texts in python?
How to implement double word frequency statistics for short English texts in python?
import?re

from?itertools?import?imap?as?map

from?collections?import?Counter

def?parserwords(sentence):

preword?=?''

result?=? []

for?word?in?re.findall('\w+',?sentence.lower()):

if?preword:

result.append((preword,?word))

preword?=? word

return?result

context?=?"""

Do?you?hear?the?people?sing,?singing?a?song?of?angry?men.?

It?is?the?music?of?a?people,?who?will?not?be?slaves?again,?

When?the?beating?of?your?heart?echoes?the?beating?of?the?drums.?

There?is?a?life?about?to?start?when?tomorrow?comes.

""

"

words?=? []

for?sentence?in?map(parserwords,?

re.split(r'[,.]' ,?context.lower())):

words.extend(sentence)

prefixcounter?=?Counter([word[0]?for?word?in?words])

counter?=? Counter(words)

meter?=? {}

for?pre,?post?in?counter.iterkeys():

meter[(pre,?post)]? =?1.? *?counter[(pre,?post)]? /?prefixcounter[pre]

result?=?sorted(meter.iteritems(),

cmp?=?lambda?a,?b:?cmp(b[1],?a[1])?or?cmp(a[0],?b[0])

)< /p>

print?result[:5]