1import re
2txt = 'He is so happy (smiley emoji)' # <-- grepper doesn't support emoji, so just assume it has
3def deEmojify(text):
4 regrex_pattern = re.compile(pattern = "["
5 u"\U0001F600-\U0001F64F" # emoticons
6 u"\U0001F300-\U0001F5FF" # symbols & pictographs
7 u"\U0001F680-\U0001F6FF" # transport & map symbols
8 u"\U0001F1E0-\U0001F1FF" # flags (iOS)
9 "]+", flags = re.UNICODE)
10 return regrex_pattern.sub(r'',text)
11print(deEmojify(txt))
12
13# output
14'He is so happy'
15
16# full version of emoji cleaner
17def deEmojify(text):
18 "function to remove emojis from text"
19 regrex_pattern = re.compile("["
20 u"\U0001F600-\U0001F64F" # emoticons
21 u"\U0001F300-\U0001F5FF" # symbols & pictographs
22 u"\U0001F680-\U0001F6FF" # transport & map symbols
23 u"\U0001F1E0-\U0001F1FF" # flags (iOS)
24 u"\U00002500-\U00002BEF" # chinese char
25 u"\U00002702-\U000027B0"
26 u"\U00002702-\U000027B0"
27 u"\U000024C2-\U0001F251"
28 u"\U0001f926-\U0001f937"
29 u"\U00010000-\U0010ffff"
30 u"\u2640-\u2642"
31 u"\u2600-\u2B55"
32 u"\u200d"
33 u"\u23cf"
34 u"\u23e9"
35 u"\u231a"
36 u"\ufe0f" # dingbats
37 u"\u3030"
38 "]+", re.UNICODE)
39 return regrex_pattern.sub(r'',text)
40print(deEmojify(txt))