Data Analytics/Python cheat sheets
Python 한글워드클라우드 만들기
peter was here
2020. 1. 4. 12:26
728x90
프로젝트명 : 한글 워드클라우드 만들기¶
1. 한글자연어 처리 라이브러리 설치¶
In [2]:
# !pip install KoNLPy
In [ ]:
from konlpy.tag import Twitter
from collections import Counter
2. 데이터 불러오기¶
In [42]:
file = open('텍스트파일 경로', 'r')
lists = file.readlines()
file.close()
lists
3. 형태소 분석¶
In [69]:
twitter = Twitter()
morphs = []
for sentence in lists:
morphs.append(twitter.pos(sentence))
print(morphs)
In [70]:
noun_adj_adv_list=[]
for sentence in morphs :
for word, tag in sentence :
if tag in ['Noun'] and ("것" not in word) and ("내" not in word)and ("나" not in word)and ("수"not in word) and("게"not in word)and("말"not in word):
noun_adj_adv_list.append(word)
print(noun_adj_adv_list)
In [9]:
count = Counter(noun_adj_adv_list)
In [10]:
words = dict(count.most_common())
4. 워드클라우드 만들기¶
- 워드클라우드 라이브러리 설치
In [15]:
# !pip install WordCloud
In [16]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
In [58]:
%matplotlib inline
import matplotlib
from matplotlib import rc
rc('font', family='NanumBarunGothic')
In [61]:
from wordcloud import WordCloud
wordcloud = WordCloud(
font_path = '/Library/Fonts/NanumBarunGothic.ttf', # 맥에선 한글폰트 설정 잘해야함.
background_color='white', # 배경 색깔 정하기
colormap = 'Accent_r', # 폰트 색깔 정하기
width = 800,
height = 800
)
wordcloud_words = wordcloud.generate_from_frequencies(words)
In [67]:
array = wordcloud.to_array()
print(type(array)) # numpy.ndarray
print(array.shape) # (800, 800, 3)
fig = plt.figure(figsize=(10, 10))
plt.imshow(array, interpolation="bilinear")
plt.axis('off')
plt.show()
fig.savefig('business_anlytics_worldcloud.png')
In [ ]:
In [ ]:
728x90