Skip to content

Commit

Permalink
Fixing split() issue
Browse files Browse the repository at this point in the history
  • Loading branch information
John Hawkins authored and John Hawkins committed May 31, 2021
1 parent ee366f6 commit 14ac252
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion texturizer/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def load_word_list(filename, escape=False):
rawd = rawd[:-1]
if escape:
rawd = re.escape(rawd)
word_list = str(rawd).split('\n')
word_list = str(rawd).split(r"\n")
_list = [i for i in word_list if i]
return _list

Expand Down
2 changes: 1 addition & 1 deletion texturizer/profanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def prof_features(x, col):
mild_profanity = 0
else:
text = (x[col].lower())
word_array = text.split('\s+')
word_array = text.split()
hard_profanity = len(hard_re.findall(text))
mask_profanity = len(masked_re.findall(text))
if set(mild_profanity_list).intersection(word_array):
Expand Down
2 changes: 1 addition & 1 deletion texturizer/scarcity.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def cal_features(x, col):
else:
text = remove_urls_and_tags( remove_escapes_and_non_printable( x[col] ) ).lower()
text = text.translate(str.maketrans('', '', string.punctuation)).lower()
words = text.split('\s+')
words = text.split()
scarcities = list(map(get_scarcity, words))
mean_scarcity = statistics.mean(scarcities)
median_scarcity = statistics.median(scarcities)
Expand Down
6 changes: 3 additions & 3 deletions texturizer/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def cal_features(x, col):
punct = sum(1 for c in x[col] if c in ['.','!','?',':',';','-',','])
capital_d = capitals/chars
punct_d = punct/chars
word_array = x[col].lower().split('\s+')
sentence_array = [ x for x in re.split("[.?]", x[col].lower()) if x]
line_array = [ x for x in re.split("[\r\n]+", x[col].lower()) if x]
word_array = x[col].lower().split()
sentence_array = [ x for x in re.split(r"[.?]", x[col].lower()) if x]
line_array = [ x for x in re.split(r"[\r\n]+", x[col].lower()) if x]
non_stop_words = list(set(word_array) - set(stop_word_list))
word_count = len(word_array)
sentence_count = len(sentence_array)
Expand Down

0 comments on commit 14ac252

Please sign in to comment.