-
Notifications
You must be signed in to change notification settings - Fork 0
/
co-occurence-analysis.py
56 lines (42 loc) · 1.57 KB
/
co-occurence-analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from word_freq_analysis import *
# explore bigrams/co-occuring words
# Create list of lists containing bigrams in tweets
terms_bigram = [list(bigrams(tweet)) for tweet in tweets_nsw_nc]
# View bigrams for the first tweet
print(f' co-occuring words{terms_bigram[0]}')
# top 20 most commonly used bi-grams
# Flatten list of bigrams in clean tweets
bigrams = list(itertools.chain(*terms_bigram))
# Create counter of words in clean bigrams
bigram_counts = collections.Counter(bigrams)
print(f'Most common bi-grams {bigram_counts.most_common(20)}')
# convert to pandas DF
bigram_df = pd.DataFrame(bigram_counts.most_common(20),
columns=['bigram', 'count'])
# Create dictionary of bigrams and their counts
bigram_dict = bigram_df.set_index('bigram').T.to_dict('records')
print(f'Bi-gram dictionary from dataframe {bigram_dict}')
# Create network plot
G = nx.Graph()
# Create connections between nodes
for k, v in bigram_dict[0].items():
G.add_edge(k[0], k[1], weight=(v * 10))
G.add_node("china", weight=100)
fig, ax = plt.subplots(figsize=(10, 8))
pos = nx.spring_layout(G, k=1)
# Plot networks
nx.draw_networkx(G, pos,
font_size=16,
width=3,
edge_color='grey',
node_color='purple',
with_labels=False,
ax=ax)
# Create offset labels
for key, value in pos.items():
x, y = value[0] + .135, value[1] + .045
ax.text(x, y,
s=key,
bbox=dict(facecolor='red', alpha=0.25),
horizontalalignment='center', fontsize=13)
plt.show()