Clean up numbers for MfA
For the HSI data
from pathlib import Path
BASEPATH = Path("/home/joregan/hsi_mfa")
%pip install num2words
from num2words import num2words
import re
def get_words(word):
if re.match("^[2-9]0s$", word):
oword = num2words(int(word[0:-1]), lang="en")
return oword[:-1] + "ies"
if re.match("^[0-9]+(?:th|st|nd|rd)$", word):
return num2words(int(word[0:-2]), lang="en", to="ordinal")
if re.match("^[0-9]+(?:,[0-9][0-9][0-9])+$", word):
return num2words(int(word.replace(",", "")), lang="en")
if re.match("^\$[0-9]+(?:,[0-9][0-9][0-9])*$", word):
return num2words(int(word[1:].replace(",", "")), lang="en") + " dollars"
if re.match("^20[1-9][0-9]$", word):
return "twenty " + num2words(int(word[2:]), lang="en")
if re.match("^[0-9]+$"):
return num2words(int(word))
return word
get_words("$2019")