Update your mapper to remove punctuation marks during mapping.
mapper code below
#!/usr/bin/env python
#the above just indicates to use python to intepret this file
#This mapper code will input a line of text and output #
import sys
sys.path.append('.')
for line in sys.stdin:
line = line.strip() #trim spaces from beginning and
end
keys = line.split() #split line by space
for key in keys:
value = 1
print ("%s\t%d" % (key,value)) #for
each word generate 'word TAB 1' line
#!/usr/bin/env python
#the above just indicates to use python to intepret this file
#This mapper code will input a line of text and output #
import sys
import string
sys.path.append('.')
for line in sys.stdin:
line = line.strip() #trim spaces from beginning and
end
line = line.strip(string.punctuation)
keys = line.split() #split line by space
for key in keys:
value = 1
print ("%s\t%d" % (key,value)) #for
each word generate 'word TAB 1' line
Get Answers For Free
Most questions answered within 1 hours.