Does anyone have a Python method that extracts hashtags from text using the same rules that Twitter applies for their hashtag extraction? I've checked Stackoverflow but haven't found any updated code.
Conversation
def extract_hashtags(text):
'''Extract hashtags'''
valid_tags = set()
tags = re.findall(r'#(\w+)', text)
for tag in tags:
if tag.isdigit():
continue
else:
valid_tags.add(tag)
return valid_tags
2
1
2
def extract_hashtags(text):
'''Extract hashtags'''
valid_tags = set()
tags = re.findall(r'#(\w+)', text)
for tag in tags:
if not tag.isdigit():
valid_tags.add(tag)
return valid_tags
(more concise)
