Skip to content
Browse files

1) Add Emoticons parsing to the Tweet API.

2) Add tests for emoticons in parse_tweet feed
3) Update Authors to reflect this contribution and original parse_tweet API
  • Loading branch information...
1 parent b21bc03 commit 8279bade1f42da60c492751fd6dcbeaaf71bc212 Muthiah Annamalai committed Mar 7, 2015
Showing with 56 additions and 3 deletions.
  1. +2 −1 AUTHORS.rst
  2. +40 −2 twitter/parse_tweet.py
  3. +14 −0 twitter_test.py
View
3 AUTHORS.rst
@@ -25,6 +25,7 @@ Now it's a full-on open source project with many contributors over time:
* Lars Weiler,
* Sebastian Wiesinger,
* Jake Robinson,
+* Muthu Annamalai,
* abloch,
* cahlan,
* dpslwk,
@@ -33,4 +34,4 @@ Now it's a full-on open source project with many contributors over time:
* git-matrix,
* sbywater,
* thefinn93,
-* themylogin,
+* themylogin,
View
42 twitter/parse_tweet.py
@@ -4,11 +4,36 @@
from twitter import TwitterError # import not used?
+class Emoticons:
+ POSITIVE = ["*O","*-*","*O*","*o*","* *",
+ ":P",":D",":d",":p",
+ ";P",";D",";d",";p",
+ ":-)",";-)",":=)",";=)",
+ ":<)",":>)",";>)",";=)",
+ "=}",":)","(:;)",
+ "(;",":}","{:",";}",
+ "{;:]",
+ "[;",":')",";')",":-3",
+ "{;",":]",
+ ";-3",":-x",";-x",":-X",
+ ";-X",":-}",";-=}",":-]",
+ ";-]",":-.)",
+ "^_^","^-^"]
+
+ NEGATIVE = [":(",";(",":'(",
+ "=(","={","):",");",
+ ")':",")';",")=","}=",
+ ";-{{",";-{",":-{{",":-{",
+ ":-(",";-(",
+ ":,)",":'{",
+ "[:",";]"
+ ]
class ParseTweet:
# compile once on import
regexp = {"RT": "^RT", "MT": r"^MT", "ALNUM": r"(@[a-zA-Z0-9_]+)",
- "HASHTAG": r"(#[\w\d]+)", "URL": r"([http://]?[a-zA-Z\d\/]+[\.]+[a-zA-Z\d\/\.]+)"}
+ "HASHTAG": r"(#[\w\d]+)", "URL": r"([https://|http://]?[a-zA-Z\d\/]+[\.]+[a-zA-Z\d\/\.]+)",
+ "SPACES":r"\s+"}
regexp = dict((key, re.compile(value)) for key, value in regexp.items())
def __init__(self, timeline_owner, tweet):
@@ -25,7 +50,8 @@ def __init__(self, timeline_owner, tweet):
self.URLs = ParseTweet.getURLs(tweet)
self.RT = ParseTweet.getAttributeRT(tweet)
self.MT = ParseTweet.getAttributeMT(tweet)
-
+ self.Emoticon = ParseTweet.getAttributeEmoticon(tweet)
+
# additional intelligence
if ( self.RT and len(self.UserHandles) > 0 ): # change the owner of tweet?
self.Owner = self.UserHandles[0]
@@ -37,6 +63,18 @@ def __str__(self):
self.Owner, len(self.URLs), len(self.Hashtags), len(self.UserHandles), len(self.tweet), self.RT, self.MT)
@staticmethod
+ def getAttributeEmoticon(tweet):
+ """ see if tweet is contains any emoticons, +ve, -ve or neutral """
+ emoji = list()
+ for tok in re.split(ParseTweet.regexp["SPACES"],tweet.strip()):
+ if tok in Emoticons.POSITIVE:
+ emoji.append( tok )
+ continue
+ if tok in Emoticons.NEGATIVE:
+ emoji.append( tok )
+ return emoji
+
+ @staticmethod
def getAttributeRT(tweet):
""" see if tweet is a RT """
return re.search(ParseTweet.regexp["RT"], tweet.strip()) != None
View
14 twitter_test.py
@@ -651,15 +651,29 @@ def testParseTweets(self):
data = twitter.ParseTweet("@twitter", hashtag_n_URL)
self.assertEqual([len(data.Hashtags), len(data.URLs)], [2, 1])
+ self.assertEqual(len(data.Emoticon),0)
url_only = u"""The #Rainbow #Nebula, 544,667 #lightyears away. pic.twitter.com/2A4wSUK25A""";
data = twitter.ParseTweet("@twitter", url_only)
self.assertEqual([data.MT, len(data.Hashtags), len(data.URLs)], [False, 3, 1])
+ self.assertEqual(len(data.Emoticon),0)
url_handle = u"""RT ‏@BarackObama POTUS recommends Python-Twitter #unrelated picture pic.twitter.com/w8lFIfuUmI""";
data = twitter.ParseTweet("@twitter", url_handle)
self.assertEqual([data.RT, len(data.Hashtags), len(data.URLs), len(data.UserHandles)], [True, 1, 1, 1])
+ self.assertEqual(len(data.Emoticon),0)
+ def testEmoticon(self):
+ url_handle = u"""RT ‏@BarackObama POTUS recommends :-) Python-Twitter #unrelated picture pic.twitter.com/w8lFIfuUmI""";
+ data = twitter.ParseTweet("@twitter", url_handle)
+ self.assertEqual([data.RT, len(data.Hashtags), len(data.URLs), len(data.UserHandles)], [True, 1, 1, 1])
+ self.assertEqual(len(data.Emoticon),1)
+
+ url_handle = u"""RT @cats ^-^ cute! But kitty litter :-( #unrelated picture""";
+ data = twitter.ParseTweet("@cats", url_handle)
+ self.assertEqual([data.RT, len(data.Hashtags), len(data.URLs), len(data.UserHandles)], [True, 1, 0, 1])
+ self.assertEqual(len(data.Emoticon),2)
+ self.assertEqual(data.Emoticon,['^-^',':-('])
class MockHTTPBasicAuthHandler(object):
'''A mock replacement for HTTPBasicAuthHandler'''

0 comments on commit 8279bad

Please sign in to comment.
Something went wrong with that request. Please try again.