Sentimental_analysis
[Notice] [ML_practical practice_4]
1) Library & Data Import
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv("https://raw.githubusercontent.com/yoonkt200/FastCampusDataset/master/tripadviser_review.csv")
df.head()
rating | text | |
---|---|---|
0 | 4 | 여행에 집중할수 있게 편안한 휴식을 제공하는 호텔이었습니다. 위치선정 또한 적당한 ... |
1 | 4 | 2일 이상 연박시 침대, 이불, 베게등 침구류 교체 및 어메니티 보강이 필요해 보입... |
2 | 4 | 지인에소개로온 호텔 깨끗하고 좋은거같아요 처음에는 없는게 많아 많이 당황했는데 ... |
3 | 5 | 방에 딱 들어서자마자 눈이 휘둥그레질정도로 이렇게 넓은 호텔 처음 와본 것 같아요!... |
4 | 5 | 저녁에 맥주한잔 하는게 좋아서 렌트 안하고 뚜벅이 하기로 했는데 호텔 바로 앞에 버... |
Feature Description
-
rating : Rating score of user reviews
-
text: Contents of user review evaluation
2) Explore the dataset
2-1) Explore basic information
df.shape
(1001, 2)
df.isnull().sum()
rating 0 text 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1001 entries, 0 to 1000 Data columns (total 2 columns): rating 1001 non-null int64 text 1001 non-null object dtypes: int64(1), object(1) memory usage: 15.8+ KB
df['text'][0]
'여행에 집중할수 있게 편안한 휴식을 제공하는 호텔이었습니다. 위치선정 또한 적당한 편이었고 청소나 청결상태도 좋았습니다.'
len(df['text'].values.sum())
223576
3) Korean text data preprocessing
3-1) Apply regular expression
import re
def apply_regular_expression(text):
hangul = re.compile('[^ ㄱ-ㅣ가-힣]')
result = hangul.sub('', text)
return result
apply_regular_expression(df['text'][0])
'여행에 집중할수 있게 편안한 휴식을 제공하는 호텔이었습니다 위치선정 또한 적당한 편이었고 청소나 청결상태도 좋았습니다'
3-2) Korean Morphological Analysis - Noun Units
Noun morpheme extraction
from konlpy.tag import Okt
from collections import Counter
nouns_tagger = Okt()
nouns = nouns_tagger.nouns(apply_regular_expression(df['text'][0]))
nouns
['여행', '집중', '휴식', '제공', '호텔', '위치', '선정', '또한', '청소', '청결', '상태']
nouns = nouns_tagger.nouns(apply_regular_expression("".join(df['text'].tolist())))
counter = Counter(nouns)
counter.most_common(10)
[('호텔', 803), ('수', 498), ('것', 436), ('방', 330), ('위치', 328), ('우리', 327), ('곳', 320), ('공항', 307), ('직원', 267), ('매우', 264)]
Remove Hangul nouns
available_counter = Counter({x : counter[x] for x in counter if len(x) > 1})
available_counter.most_common(10)
[('호텔', 803), ('위치', 328), ('우리', 327), ('공항', 307), ('직원', 267), ('매우', 264), ('가격', 245), ('객실', 244), ('시설', 215), ('제주', 192)]
3-3) stopword dictionary
# source - https://www.ranks.nl/stopwords/korean
stopwords = pd.read_csv("https://raw.githubusercontent.com/yoonkt200/FastCampusDataset/master/korean_stopwords.txt").values.tolist()
print(stopwords[:10])
[['휴'], ['아이구'], ['아이쿠'], ['아이고'], ['어'], ['나'], ['우리'], ['저희'], ['따라'], ['의해']]
jeju_hotel_stopwords = ['제주', '제주도', '호텔', '리뷰', '숙소', '여행', '트립']
for word in jeju_hotel_stopwords:
stopwords.append(word)
3-4) Word Count
Create BoW Vector
from sklearn.feature_extraction.text import CountVectorizer
def text_cleaning(text):
hangul = re.compile('[^ ㄱ-ㅣ가-힣]')
result = hangul.sub('', text)
tagger = Okt()
nouns = nouns_tagger.nouns(result)
nouns = [x for x in nouns if len(x) > 1]
nouns = [x for x in nouns if x not in stopwords]
return nouns
vect = CountVectorizer(tokenizer = lambda x: text_cleaning(x))
bow_vect = vect.fit_transform(df['text'].tolist())
word_list = vect.get_feature_names()
count_list = bow_vect.toarray().sum(axis=0)
word_list
['가가', '가게', '가격', '가격표', '가구', '가급', '가기', '가까이', '가끔', '가능', '가도', '가동', '가두', '가득', '가든', '가라', '가량', '가려움', '가로', '가면', '가몬', '가무', '가물', '가미', '가방', '가버', '가성', '가세', '가스레인지', '가스렌지', '가슴', '가시', '가신', '가야', '가옥', '가요', '가용', '가운데', '가을', '가인', '가장', '가정', '가정식', '가족', '가지', '가짓수', '가차', '가치', '가품', '각각', '각오', '각자', '각종', '각층', '간격', '간곳', '간다', '간단', '간만', '간식', '간이', '간주', '간직', '간판', '간혹', '갈껄', '갈비', '갈비탕', '갈수', '갈수록', '감각', '감동', '감명', '감사', '감상', '감소', '감안', '감자', '감히', '갑인', '갑자기', '갑작스레', '강남', '강력', '강아지', '강압', '강제', '강조', '강추', '개념', '개략', '개미', '개발', '개방', '개별', '개보', '개뿔', '개선', '개수대', '개월', '개인', '개인실', '개인정보', '개조', '개층', '객수', '객실', '갤러리', '갱스터', '거기', '거나', '거두', '거론', '거르세', '거름', '거리', '거린데', '거림', '거문도', '거미', '거부', '거실', '거여', '거울', '거위', '거의', '거절', '거주', '거지', '거참', '거품', '걱정', '건가', '건강', '건너', '건너편', '건물', '건의', '건조', '건조기', '건조대', '건축', '걷기', '걸음', '걸이', '걸즈', '검사', '검색', '검정색', '검토', '것임', '겉보기', '게다가', '게스트', '게스트하우스', '게임', '게재', '겐찮은듯', '겔상', '겨우', '겨울', '겨울철', '격인', '격하', '결과', '결론', '결석', '결재', '결정', '결제', '결코', '결함', '결항', '결혼', '결혼식', '겸비', '겸용', '겹겹', '경고', '경관', '경내', '경로', '경매', '경영', '경영학', '경우', '경쟁', '경쟁력', '경찰', '경치', '경험', '계단', '계란', '계란후라이', '계산', '계속', '계정', '계획', '고가', '고간', '고객', '고급', '고기', '고기국수', '고깃배', '고내포구', '고려', '고루', '고무줄', '고문', '고민', '고봉', '고분', '고생', '고속', '고속도로', '고아', '고양이', '고여', '고오', '고요', '고유', '고작', '고장', '고정', '고층', '고통', '고트', '고함', '고해', '곡부', '곧바로', '곧장', '골드스타', '골목', '골목길', '골퍼', '골프', '골프장', '골프텔', '곰팡이', '곱슬', '곳곳', '곳곳이', '곳도', '곳임', '공간', '공감', '공개', '공공', '공공장소', '공급', '공기', '공덕', '공률', '공물', '공사', '공시', '공실이', '공연', '공연장', '공영', '공용', '공원', '공유', '공짜', '공차', '공터', '공포', '공항', '과거', '과물', '과언', '과일', '과장', '관경', '관계', '관계자', '관광', '관광객', '관광명소', '관광지', '관덕정', '관련', '관리', '관리인', '관리자', '관리직', '관음사', '관해', '광경', '광고', '광천수', '괴체', '교대', '교수', '교외', '교욱받', '교육', '교체', '교통', '교환', '교회', '구가', '구경', '구경만', '구관', '구글', '구나', '구내', '구덩이', '구도', '구두', '구둣주걱', '구들장', '구류', '구만', '구매', '구멍', '구별', '구분', '구비', '구사', '구색', '구석', '구석구석', '구성', '구식', '구암', '구역', '구역질', '구이', '구입', '구조', '구축', '국가', '국내', '국도', '국립', '국수', '국적', '국제', '국제공항', '군더더기', '군데', '군데군데', '굳럭', '굳이', '굿굿', '굿굿굿', '굿앤굿', '굿임', '권내', '권장', '권한', '귀중', '규모', '규율', '규칙', '균형', '그거', '그것', '그게', '그냥', '그네', '그녀', '그다음', '그다지', '그닥', '그대로', '그동안', '그때', '그랜드', '그레이스', '그로', '그룹', '그릇', '그린', '그림', '극복', '극악', '근래', '근무', '근본', '근육통', '근처', '근해', '글래드', '글쎄', '금고', '금늘', '금능', '금릉', '금방', '금속', '금액', '금연', '금요일', '금은', '금지', '금토일', '급상승', '급속', '기간', '기계', '기구', '기기', '기념일', '기능', '기대', '기도', '기류', '기리', '기반', '기본', '기부', '기분', '기사', '기상', '기소', '기숙사', '기술', '기술자', '기억', '기업', '기여', '기용', '기우', '기입', '기적', '기전', '기점', '기존', '기준', '기지', '기타', '기프트샵', '기호', '기회', '기후', '긴장', '길가', '길림', '길목', '길이', '김녕', '김녕해변', '김밥', '김씨', '김치', '김포공항', '까페', '깜빡', '깜짝', '깨끗', '깨끗깔끔', '께빵', '꼭대기', '꽃꺽으러', '꽃사슴', '꾸러미', '꾸밈', '꿀잠', '끝내기', '끼리', '나기', '나누기', '나니', '나라', '나름', '나머지', '나머진', '나무', '나물', '나보', '나오니', '나우', '나은', '나이', '나이트', '나이프', '나중', '나탈리', '낙후', '낚시', '난로', '난리', '난방', '난입', '난타', '날수', '날씨', '날짜', '남녀', '남성', '남아', '남자', '남자친구', '남짓', '남쪽', '남편', '낭만', '내겐', '내내', '내년', '내부', '내부시', '내시', '내야', '내외', '내용', '내의', '내인', '내일', '냄비', '냄새', '냉동', '냉장고', '너븐팡', '넓이', '네스프레소', '네이버', '년대', '년전', '녔던', '노곤', '노래', '노래방', '노력', '노리', '노블레스', '노선', '노을', '노크', '노트북', '노화', '노후', '녹물', '녹음', '녹지', '논평', '놀러와', '놀수', '놀이', '놀이기구', '놀이터', '농부가', '농장', '높이', '놨더군', '누가', '누구', '누군가', '누락', '누리', '누울', '눈앞', '뉴타운', '느낌', '는걸', '늘송', '능리', '다가', '다그', '다다미', '다라', '다락방', '다른', '다른사람', '다리미', '다만', '다미', '다발', '다섯', '다소', '다수', '다시', '다운', '다음', '다이지', '다인', '다정', '다행', '단계', '단기', '단면', '단어', '단위', '단점', '단정', '단지', '단체', '달걀', '달걀프라이', '달라', '달러', '달리', '달성', '닭머르', '담당', '담배', '담소', '담요', '답변', '당구', '당근', '당나귀', '당분간', '당시', '당신', '당일', '당황', '대가', '대가족', '대고', '대관령', '대답', '대당', '대도', '대도시', '대뜸', '대략', '대로', '대리', '대명', '대박', '대부분', '대비', '대상', '대신', '대안', '대여', '대요', '대욕', '대응', '대의', '대입', '대적', '대접', '대정', '대중', '대중교통', '대처', '대체', '대충', '대포', '대표', '대하', '대한', '대한민국', '대한항공', '대해', '대행', '대형', '대화', '대환영', '댐핑할', '더군다나', '더더', '더러', '더블', '더블베드', '더욱', '더원', '덕림사', '덕분', '덕택', '던데', '덮어놓고', '데리', '데스크', '데스크톱', '데이', '데이즈', '델문', '도구', '도달', '도대체', '도도', '도둑', '도로', '도록', '도리어', '도미', '도보', '도서관', '도시', '도시락', '도심', '도심지', '도어', '도어락', '도움', '도움말', '도일', '도정', '도중', '도착', '도처', '도청', '도쿄', '도크', '독립', '독서', '독점', '독채', '돈까스', '돌담', '돌잔치', '동계', '동광양', '동굴', '동남', '동남아', '동네', '동도', '동료', '동문', '동물', '동물원', '동반', '동부', '동북', '동생', '동선', '동시', '동안', '동영상', '동의', '동이', '동인', '동작', '동전', '동정', '동쪽', '돼지', '돼지고기', '됏다', '될껀', '될껄', '두루', '두번째', '두봉', '두부', '두엄', '두운', '두툼', '둘러보기', '둘이서', '둘째', '둥근지붕', '뒤쪽', '뒤척', '뒷골목', '뒷마당', '뒷문', '뒷쪽', '드네', '드라이기', '드라이버', '드라이브', '드라이어', '드롭', '드릴', '드타', '드하', '득시', '듭니', '듯이', '듯해', '등급', '등대', '등등', '등반', '등산', '등정후', '디귿', '디너', '디럭스', '디봇', '디셈버', '디자이너', '디자인', '디저트', '디제이', '따라서', '때로는', '때문', '떡국', '또오', '또한', '뚜벅', '뜨근뜨근', '뜨내기', '라그', '라마', '라며', '라면', '라서', '라스베가스', '라우터', '라운지', '라이센스', '라커룸', '락스', '락심이', '락커', '락타', '란딩', '랍니', '랜드', '랜트', '랜트카', '랜트하', '램프', '러닝', '러브', '럭셔리', '런가', '렀는데', '렀습니', '렀으', '레노', '레드', '레벨', '레비', '레스토랑', '레시', '레오', '레이', '레이크', '레인지', '레저', '레프트', '렌즈', '렌탈업체', '렌터', '렌터카', '렌트', '렌트카', '려고', '려운', '로고', '로그', '로만', '로맨틱', '로부터', '로비', '로서', '로션', '로얄', '로움', '로컬', '로터리', '로프트', '롯데', '롯데리아', '롱보드', '루온토', '루트', '루프', '룸메이트', '룸바닥', '룸상태', '룸서비스', '룸안', '룸키', '룸타입', '를위', '리가', '리기', '리넨', '리뉴', '리뉴얼', '리더', '리도', '리모콘', '리베라', '리베로', '리빙룸', '리셉션', '리움', '리젠시', '리조트', '리지', '리치', '리트', '리플렛', '린스', '링잉', '마누카꿀', '마늘', '마다', '마담', '마당', '마레', '마련', '마루', '마리', '마모', '마무리', '마사지', '마술', '마스코트', '마스크', '마스터', '마시기', '마안', '마운트', '마을', '마음', '마이너스', '마인드', '마일리지', '마자', '마저', '마주', '마지막', '마지막여행', '마차', '마찬가지', '마치', '마침내', '마켓', '마트', '마틸다', '막걸리', '만끽', '만난', '만날', '만남', '만다린', '만두', '만들기', '만료', '만약', '만요', '만원', '만점', '만족', '만족도', '만천원', '만큼', '만하', '만해', '만화책', '말레이시아', '말레이시아인', '말로', '말리', '말씀', '말투', '말함', '맘스', '맛사지', '맛잇엇어', '맛집', '망각', '망신', '망치', '맞은편', '맞이', '매년', '매니', '매니저', '매달', '매듭', '매력', '매번', '매우', '매운탕', '매일', '매장', '매점', '매칭', '매트', '매트리스', '매트릭스', '매하', '맥도날드', '맥도널드', '맥주', '맥주잔', '맨발', '머리', '머리카락', '머신', '머싱', '먹거리', '먹기', '먹방', '먹이', '먼저', '먼지', '멀리', '메가박스', '메뉴', '메리', '메리어트', '메시지', ...]
count_list
array([ 4, 8, 245, ..., 1, 7, 14])
bow_vect.shape
(1001, 3599)
bow_vect.toarray()
array([[0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], ..., [0, 0, 0, ..., 0, 0, 0], [0, 0, 2, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]])
bow_vect.toarray().sum(axis=0)
array([ 4, 8, 245, ..., 1, 7, 14])
bow_vect.toarray().sum(axis=0).shape
(3599,)
word_count_dict = dict(zip(word_list, count_list))
word_count_dict
{'가가': 4, '가게': 8, '가격': 245, '가격표': 1, '가구': 8, '가급': 1, '가기': 20, '가까이': 20, '가끔': 5, '가능': 10, '가도': 7, '가동': 2, '가두': 1, '가득': 2, '가든': 1, '가라': 3, '가량': 1, '가려움': 1, '가로': 2, '가면': 14, '가몬': 1, '가무': 1, '가물': 1, '가미': 1, '가방': 4, '가버': 1, '가성': 49, '가세': 3, '가스레인지': 1, '가스렌지': 1, '가슴': 1, '가시': 4, '가신': 3, '가야': 10, '가옥': 1, '가요': 5, '가용': 1, '가운데': 3, '가을': 4, '가인': 1, '가장': 42, '가정': 4, '가정식': 2, '가족': 94, '가지': 55, '가짓수': 3, '가차': 1, '가치': 15, '가품': 1, '각각': 7, '각오': 1, '각자': 2, '각종': 3, '각층': 1, '간격': 2, '간곳': 1, '간다': 4, '간단': 1, '간만': 1, '간식': 5, '간이': 3, '간주': 1, '간직': 1, '간판': 2, '간혹': 1, '갈껄': 1, '갈비': 1, '갈비탕': 1, '갈수': 7, '갈수록': 1, '감각': 1, '감동': 12, '감명': 1, '감사': 6, '감상': 3, '감소': 1, '감안': 5, '감자': 1, '감히': 1, '갑인': 1, '갑자기': 4, '갑작스레': 1, '강남': 1, '강력': 9, '강아지': 7, '강압': 2, '강제': 1, '강조': 1, '강추': 8, '개념': 1, '개략': 1, '개미': 1, '개발': 3, '개방': 2, '개별': 3, '개보': 1, '개뿔': 1, '개선': 4, '개수대': 1, '개월': 1, '개인': 23, '개인실': 1, '개인정보': 2, '개조': 5, '개층': 1, '객수': 1, '객실': 244, '갤러리': 2, '갱스터': 1, '거기': 24, '거나': 6, '거두': 1, '거론': 1, '거르세': 1, '거름': 2, '거리': 156, '거린데': 1, '거림': 1, '거문도': 1, '거미': 1, '거부': 4, '거실': 29, '거여': 1, '거울': 5, '거위': 1, '거의': 27, '거절': 3, '거주': 1, '거지': 1, '거참': 1, '거품': 2, '걱정': 27, '건가': 1, '건강': 2, '건너': 8, '건너편': 11, '건물': 55, '건의': 1, '건조': 2, '건조기': 3, '건조대': 2, '건축': 2, '걷기': 2, '걸음': 3, '걸이': 2, '걸즈': 1, '검사': 1, '검색': 13, '검정색': 1, '검토': 3, '것임': 3, '겉보기': 2, '게다가': 5, '게스트': 25, '게스트하우스': 30, '게임': 2, '게재': 1, '겐찮은듯': 1, '겔상': 1, '겨우': 3, '겨울': 15, '겨울철': 2, '격인': 1, '격하': 1, '결과': 2, '결론': 3, '결석': 1, '결재': 2, '결정': 12, '결제': 1, '결코': 2, '결함': 1, '결항': 2, '결혼': 1, '결혼식': 2, '겸비': 1, '겸용': 1, '겹겹': 2, '경고': 1, '경관': 3, '경내': 1, '경로': 1, '경매': 1, '경영': 2, '경영학': 1, '경우': 41, '경쟁': 1, '경쟁력': 2, '경찰': 2, '경치': 17, '경험': 26, '계단': 4, '계란': 11, '계란후라이': 1, '계산': 2, '계속': 23, '계정': 1, '계획': 13, '고가': 1, '고간': 1, '고객': 14, '고급': 8, '고기': 8, '고기국수': 1, '고깃배': 1, '고내포구': 1, '고려': 9, '고루': 1, '고무줄': 1, '고문': 2, '고민': 9, '고봉': 1, '고분': 2, '고생': 1, '고속': 2, '고속도로': 2, '고아': 1, '고양이': 3, '고여': 1, '고오': 1, '고요': 3, '고유': 2, '고작': 1, '고장': 3, '고정': 3, '고층': 2, '고통': 1, '고트': 1, '고함': 2, '고해': 1, '곡부': 1, '곧바로': 2, '곧장': 2, '골드스타': 1, '골목': 6, '골목길': 2, '골퍼': 2, '골프': 9, '골프장': 5, '골프텔': 2, '곰팡이': 14, '곱슬': 1, '곳곳': 4, '곳곳이': 1, '곳도': 8, '곳임': 2, '공간': 73, '공감': 1, '공개': 1, '공공': 2, '공공장소': 1, '공급': 2, '공기': 8, '공덕': 1, '공률': 1, '공물': 1, '공사': 12, '공시': 1, '공실이': 1, '공연': 8, '공연장': 2, '공영': 1, '공용': 8, '공원': 17, '공유': 5, '공짜': 1, '공차': 1, '공터': 1, '공포': 1, '공항': 307, '과거': 1, '과물': 2, '과언': 1, '과일': 9, '과장': 2, '관경': 1, '관계': 3, '관계자': 2, '관광': 38, '관광객': 15, '관광명소': 4, '관광지': 12, '관덕정': 4, '관련': 6, '관리': 39, '관리인': 1, '관리자': 3, '관리직': 2, '관음사': 1, '관해': 5, '광경': 2, '광고': 4, '광천수': 1, '괴체': 1, '교대': 1, '교수': 1, '교외': 1, '교욱받': 1, '교육': 5, '교체': 7, '교통': 30, '교환': 2, '교회': 2, '구가': 3, '구경': 7, '구경만': 1, '구관': 4, '구글': 2, '구나': 2, '구내': 1, '구덩이': 1, '구도': 1, '구두': 2, '구둣주걱': 1, '구들장': 1, '구류': 1, '구만': 2, '구매': 14, '구멍': 7, '구별': 1, '구분': 3, '구비': 11, '구사': 6, '구색': 2, '구석': 2, '구석구석': 5, '구성': 7, '구식': 1, '구암': 1, '구역': 3, '구역질': 2, '구이': 1, '구입': 5, '구조': 12, '구축': 1, '국가': 3, '국내': 1, '국도': 1, '국립': 1, '국수': 3, '국적': 3, '국제': 11, '국제공항': 1, '군더더기': 1, '군데': 8, '군데군데': 2, '굳럭': 1, '굳이': 7, '굿굿': 1, '굿굿굿': 1, '굿앤굿': 1, '굿임': 1, '권내': 1, '권장': 5, '권한': 2, '귀중': 1, '규모': 12, '규율': 2, '규칙': 1, '균형': 1, '그거': 3, '그것': 70, '그게': 1, '그냥': 42, '그네': 1, '그녀': 20, '그다음': 1, '그다지': 4, '그닥': 4, '그대로': 11, '그동안': 4, '그때': 3, '그랜드': 6, '그레이스': 3, '그로': 3, '그룹': 9, '그릇': 3, '그린': 1, '그림': 4, '극복': 1, '극악': 1, '근래': 1, '근무': 4, '근본': 1, '근육통': 1, '근처': 164, '근해': 1, '글래드': 3, '글쎄': 2, '금고': 2, '금늘': 1, '금능': 2, '금릉': 1, '금방': 3, '금속': 1, '금액': 8, '금연': 6, '금요일': 1, '금은': 1, '금지': 1, '금토일': 1, '급상승': 1, '급속': 1, '기간': 3, '기계': 4, '기구': 2, '기기': 4, '기념일': 1, '기능': 4, '기대': 15, '기도': 7, '기류': 3, '기리': 1, '기반': 4, '기본': 45, '기부': 1, '기분': 29, '기사': 8, '기상': 1, '기소': 1, '기숙사': 7, '기술': 3, '기술자': 1, '기억': 11, '기업': 2, '기여': 1, '기용': 1, '기우': 1, '기입': 1, '기적': 1, '기전': 1, '기점': 1, '기존': 1, '기준': 4, '기지': 1, '기타': 5, '기프트샵': 2, '기호': 1, '기회': 11, '기후': 1, '긴장': 1, '길가': 4, '길림': 1, '길목': 2, '길이': 2, '김녕': 1, '김녕해변': 1, '김밥': 1, '김씨': 1, '김치': 4, '김포공항': 1, '까페': 5, '깜빡': 1, '깜짝': 3, '깨끗': 5, '깨끗깔끔': 1, '께빵': 1, '꼭대기': 2, '꽃꺽으러': 1, '꽃사슴': 1, '꾸러미': 1, '꾸밈': 1, '꿀잠': 2, '끝내기': 1, '끼리': 18, '나기': 2, '나누기': 6, '나니': 1, '나라': 2, '나름': 13, '나머지': 6, '나머진': 1, '나무': 13, '나물': 1, '나보': 1, '나오니': 2, '나우': 1, '나은': 5, '나이': 3, '나이트': 2, '나이프': 2, '나중': 8, '나탈리': 2, '낙후': 3, '낚시': 3, '난로': 3, '난리': 3, '난방': 30, '난입': 2, '난타': 9, '날수': 1, '날씨': 12, '날짜': 1, '남녀': 1, '남성': 2, '남아': 5, '남자': 6, '남자친구': 2, '남짓': 1, '남쪽': 1, '남편': 10, '낭만': 2, '내겐': 1, '내내': 8, '내년': 1, '내부': 40, '내부시': 1, '내시': 1, '내야': 1, '내외': 2, '내용': 2, '내의': 2, '내인': 1, '내일': 2, '냄비': 1, '냄새': 58, '냉동': 1, '냉장고': 35, '너븐팡': 2, '넓이': 1, '네스프레소': 1, '네이버': 3, '년대': 2, '년전': 1, '녔던': 1, '노곤': 2, '노래': 1, '노래방': 3, '노력': 8, '노리': 1, '노블레스': 1, '노선': 2, '노을': 1, '노크': 1, '노트북': 2, '노화': 1, '노후': 6, '녹물': 1, '녹음': 4, '녹지': 1, '논평': 1, '놀러와': 2, '놀수': 1, '놀이': 3, '놀이기구': 2, '놀이터': 2, '농부가': 1, '농장': 3, '높이': 2, '놨더군': 1, '누가': 5, '누구': 5, '누군가': 4, '누락': 1, '누리': 1, '누울': 2, '눈앞': 3, '뉴타운': 1, '느낌': 49, '는걸': 2, '늘송': 3, '능리': 1, '다가': 1, '다그': 1, '다다미': 1, '다라': 1, '다락방': 1, '다른': 88, '다른사람': 1, '다리미': 2, '다만': 54, '다미': 1, '다발': 1, '다섯': 1, '다소': 21, '다수': 2, '다시': 93, '다운': 4, '다음': 102, '다이지': 1, '다인': 1, '다정': 2, '다행': 3, '단계': 4, '단기': 1, '단면': 1, '단어': 2, '단위': 2, '단점': 40, '단정': 1, '단지': 16, '단체': 19, '달걀': 3, '달걀프라이': 1, '달라': 13, '달러': 7, '달리': 6, '달성': 1, '닭머르': 1, '담당': 2, '담배': 19, '담소': 2, '담요': 1, '답변': 3, '당구': 2, '당근': 2, '당나귀': 2, '당분간': 1, '당시': 1, '당신': 21, '당일': 3, '당황': 7, '대가': 3, '대가족': 2, '대고': 1, '대관령': 1, '대답': 3, '대당': 1, '대도': 3, '대도시': 2, '대뜸': 1, '대략': 6, '대로': 8, '대리': 3, '대명': 1, '대박': 3, '대부분': 23, '대비': 64, '대상': 1, '대신': 8, '대안': 2, '대여': 3, '대요': 2, '대욕': 1, '대응': 2, '대의': 4, '대입': 1, '대적': 1, '대접': 1, '대정': 1, '대중': 9, '대중교통': 6, '대처': 2, '대체': 2, '대충': 3, '대포': 1, '대표': 4, '대하': 1, '대한': 19, '대한민국': 2, '대한항공': 1, '대해': 21, '대행': 1, '대형': 10, '대화': 11, '대환영': 1, '댐핑할': 1, '더군다나': 1, '더더': 2, '더러': 1, '더블': 29, '더블베드': 4, '더욱': 5, '더원': 1, '덕림사': 1, '덕분': 6, '덕택': 3, '던데': 1, '덮어놓고': 1, '데리': 5, '데스크': 30, '데스크톱': 1, '데이': 1, '데이즈': 1, '델문': 2, '도구': 18, '도달': 3, '도대체': 1, '도도': 1, '도둑': 1, '도로': 41, '도록': 1, '도리어': 1, '도미': 9, '도보': 35, '도서관': 1, '도시': 18, '도시락': 4, '도심': 14, '도심지': 1, '도어': 3, '도어락': 1, '도움': 51, '도움말': 1, '도일': 1, '도정': 1, '도중': 2, '도착': 69, '도처': 1, '도청': 2, '도쿄': 1, '도크': 1, '독립': 6, '독서': 1, '독점': 1, '독채': 5, '돈까스': 1, '돌담': 1, '돌잔치': 1, '동계': 1, '동광양': 1, '동굴': 1, '동남': 1, '동남아': 2, '동네': 7, '동도': 1, '동료': 2, '동문': 14, '동물': 9, '동물원': 2, '동반': 3, '동부': 2, '동북': 1, '동생': 3, '동선': 3, '동시': 7, '동안': 48, '동영상': 1, '동의': 3, '동이': 1, '동인': 2, '동작': 1, '동전': 1, '동정': 1, '동쪽': 5, '돼지': 16, '돼지고기': 4, '됏다': 1, '될껀': 1, '될껄': 1, '두루': 2, '두번째': 2, '두봉': 2, '두부': 1, '두엄': 1, '두운': 2, '두툼': 1, '둘러보기': 1, '둘이서': 3, '둘째': 5, '둥근지붕': 1, '뒤쪽': 4, '뒤척': 1, '뒷골목': 1, '뒷마당': 1, '뒷문': 1, '뒷쪽': 2, '드네': 1, '드라이기': 7, '드라이버': 1, '드라이브': 11, '드라이어': 11, '드롭': 1, '드릴': 1, '드타': 1, '드하': 2, '득시': 1, '듭니': 5, '듯이': 1, '듯해': 1, '등급': 3, '등대': 3, '등등': 8, '등반': 3, '등산': 6, '등정후': 1, '디귿': 1, '디너': 4, '디럭스': 6, '디봇': 1, '디셈버': 2, '디자이너': 1, '디자인': 11, '디저트': 1, '디제이': 2, '따라서': 4, '때로는': 1, '때문': 112, '떡국': 2, '또오': 1, '또한': 76, '뚜벅': 3, '뜨근뜨근': 1, '뜨내기': 1, '라그': 1, '라마': 4, '라며': 3, '라면': 15, '라서': 1, '라스베가스': 1, '라우터': 1, '라운지': 9, '라이센스': 1, '라커룸': 1, '락스': 2, '락심이': 1, '락커': 2, '락타': 1, '란딩': 1, '랍니': 1, '랜드': 1, '랜트': 1, '랜트카': 1, '랜트하': 1, '램프': 2, '러닝': 1, '러브': 3, '럭셔리': 5, '런가': 2, '렀는데': 1, '렀습니': 2, '렀으': 1, '레노': 1, '레드': 1, '레벨': 1, '레비': 1, '레스토랑': 64, '레시': 1, '레오': 2, '레이': 1, '레이크': 1, '레인지': 3, '레저': 1, '레프트': 1, '렌즈': 1, '렌탈업체': 1, '렌터': 1, '렌터카': 4, '렌트': 17, '렌트카': 8, '려고': 4, '려운': 1, '로고': 1, '로그': 3, '로만': 1, '로맨틱': 2, '로부터': 2, '로비': 49, '로서': 2, '로션': 1, '로얄': 1, '로움': 1, '로컬': 3, '로터리': 1, '로프트': 1, '롯데': 6, '롯데리아': 2, '롱보드': 1, '루온토': 1, '루트': 1, '루프': 17, '룸메이트': 1, '룸바닥': 1, '룸상태': 2, '룸서비스': 9, '룸안': 1, '룸키': 2, '룸타입': 1, '를위': 1, '리가': 2, '리기': 1, '리넨': 1, '리뉴': 1, '리뉴얼': 1, '리더': 1, '리도': 1, '리모콘': 3, '리베라': 2, '리베로': 1, '리빙룸': 2, '리셉션': 29, '리움': 2, '리젠시': 1, '리조트': 53, '리지': 1, '리치': 1, '리트': 1, '리플렛': 1, '린스': 2, '링잉': 1, '마누카꿀': 1, '마늘': 1, '마다': 1, '마담': 2, '마당': 2, '마레': 2, '마련': 7, '마루': 5, '마리': 11, '마모': 1, '마무리': 3, '마사지': 4, '마술': 1, '마스코트': 2, '마스크': 1, '마스터': 2, '마시기': 2, '마안': 1, '마운트': 1, '마을': 9, '마음': 31, '마이너스': 1, '마인드': 4, '마일리지': 2, '마자': 2, '마저': 1, '마주': 4, '마지막': 21, '마지막여행': 1, '마차': 1, '마찬가지': 4, '마치': 12, '마침내': 3, '마켓': 9, '마트': 14, '마틸다': 2, '막걸리': 1, '만끽': 1, '만난': 1, '만날': 1, '만남': 1, '만다린': 2, '만두': 1, '만들기': 1, '만료': 1, '만약': 6, '만요': 1, '만원': 20, '만점': 1, '만족': 12, '만족도': 1, '만천원': 1, '만큼': 2, '만하': 2, '만해': 2, '만화책': 1, '말레이시아': 1, '말레이시아인': 1, '말로': 2, '말리': 1, '말씀': 7, '말투': 3, '말함': 2, '맘스': 1, '맛사지': 1, '맛잇엇어': 1, '맛집': 25, '망각': 1, '망신': 2, '망치': 2, '맞은편': 7, '맞이': 5, '매년': 2, '매니': 1, '매니저': 3, '매달': 1, '매듭': 1, '매력': 5, '매번': 1, '매우': 265, '매운탕': 1, '매일': 36, '매장': 3, '매점': 3, '매칭': 1, '매트': 5, '매트리스': 13, '매트릭스': 1, '매하': 1, '맥도날드': 5, '맥도널드': 1, '맥주': 22, '맥주잔': 1, '맨발': 3, '머리': 7, '머리카락': 4, '머신': 3, '머싱': 1, '먹거리': 7, '먹기': 2, '먹방': 1, '먹이': 3, '먼저': 3, '먼지': 3, '멀리': 14, '메가박스': 1, '메뉴': 15, '메리': 1, '메리어트': 1, '메시지': 1, ...}
3-5) Apply TF-IDF
TF-IDF coversion
from sklearn.feature_extraction.text import TfidfTransformer
tfidf_vectorizer = TfidfTransformer()
tf_idf_vect = tfidf_vectorizer.fit_transform(bow_vect)
print(tf_idf_vect.shape)
print(tf_idf_vect[0])
(1001, 3599) (0, 3588) 0.35673213299026796 (0, 2927) 0.2582351368959594 (0, 2925) 0.320251680858207 (0, 2866) 0.48843555212083145 (0, 2696) 0.23004450213863206 (0, 2311) 0.15421663035331626 (0, 1584) 0.48843555212083145 (0, 1527) 0.2928089229786031 (0, 790) 0.2528176728459411
Vector: word mapping
invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}
print(str(invert_index_vectorizer)[:100]+'..')
{2866: '집중', 3588: '휴식', 2696: '제공', 2311: '위치', 1584: '선정', 790: '또한', 2927: '청소', 2925: '청결', 1527..
4) Logistic Regression Classification
4-1) Create dataset
Converting Rating data to binary
df.head()
rating | text | |
---|---|---|
0 | 4 | 여행에 집중할수 있게 편안한 휴식을 제공하는 호텔이었습니다. 위치선정 또한 적당한 ... |
1 | 4 | 2일 이상 연박시 침대, 이불, 베게등 침구류 교체 및 어메니티 보강이 필요해 보입... |
2 | 4 | 지인에소개로온 호텔 깨끗하고 좋은거같아요 처음에는 없는게 많아 많이 당황했는데 ... |
3 | 5 | 방에 딱 들어서자마자 눈이 휘둥그레질정도로 이렇게 넓은 호텔 처음 와본 것 같아요!... |
4 | 5 | 저녁에 맥주한잔 하는게 좋아서 렌트 안하고 뚜벅이 하기로 했는데 호텔 바로 앞에 버... |
df.rating.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x7fc1c0d616a0>
def rating_to_label(rating):
if rating > 3:
return 1
else:
return 0
df['y'] = df['rating'].apply(lambda x: rating_to_label(x))
df.head()
rating | text | y | |
---|---|---|---|
0 | 4 | 여행에 집중할수 있게 편안한 휴식을 제공하는 호텔이었습니다. 위치선정 또한 적당한 ... | 1 |
1 | 4 | 2일 이상 연박시 침대, 이불, 베게등 침구류 교체 및 어메니티 보강이 필요해 보입... | 1 |
2 | 4 | 지인에소개로온 호텔 깨끗하고 좋은거같아요 처음에는 없는게 많아 많이 당황했는데 ... | 1 |
3 | 5 | 방에 딱 들어서자마자 눈이 휘둥그레질정도로 이렇게 넓은 호텔 처음 와본 것 같아요!... | 1 |
4 | 5 | 저녁에 맥주한잔 하는게 좋아서 렌트 안하고 뚜벅이 하기로 했는데 호텔 바로 앞에 버... | 1 |
df.y.value_counts()
1 726 0 275 Name: y, dtype: int64
4-2) Dataset Separation
from sklearn.model_selection import train_test_split
y = df['y']
x_train, x_test, y_train, y_test = train_test_split(tf_idf_vect, y, test_size=0.30)
print(x_train.shape)
print(x_test.shape)
(700, 3599) (301, 3599)
4-3) model training
Logistic Regression training
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Train LR model
lr = LogisticRegression(random_state=0)
lr.fit(x_train, y_train)
# classifiacation predict
y_pred = lr.predict(x_test)
evaluation
# classification result for test dataset
print("accuracy: %.2f" % accuracy_score(y_test, y_pred))
print("Precision : %.3f" % precision_score(y_test, y_pred))
print("Recall : %.3f" % recall_score(y_test, y_pred))
print("F1 : %.3f" % f1_score(y_test, y_pred))
accuracy: 0.72 Precision : 0.718 Recall : 1.000 F1 : 0.836
from sklearn.metrics import confusion_matrix
# print confusion matrix
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
print(confmat)
[[ 3 84] [ 0 214]]
4-4) Re-sampling
1:1 Sampling
positive_random_idx = df[df['y']==1].sample(275, random_state=33).index.tolist()
negative_random_idx = df[df['y']==0].sample(275, random_state=33).index.tolist()
# dataset split to train/test
random_idx = positive_random_idx + negative_random_idx
X = tf_idf_vect[random_idx]
y = df['y'][random_idx]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)
print(x_train.shape)
print(x_test.shape)
(412, 3599) (138, 3599)
model retraining
lr = LogisticRegression(random_state=0)
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
evaluation
print("accuracy: %.2f" % accuracy_score(y_test, y_pred))
print("Precision : %.3f" % precision_score(y_test, y_pred))
print("Recall : %.3f" % recall_score(y_test, y_pred))
print("F1 : %.3f" % f1_score(y_test, y_pred))
accuracy: 0.72 Precision : 0.644 Recall : 0.797 F1 : 0.712
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
print(confmat)
[[53 26] [12 47]]
5) Positive/negative keyword analysis
Coef Analysis of Logistic Regression Models
# print logistic regression's coef
plt.rcParams['figure.figsize'] = [10, 8]
plt.bar(range(len(lr.coef_[0])), lr.coef_[0])
<BarContainer object of 3599 artists>
Positive/negative keyword output
print(sorted(((value, index) for index, value in enumerate(lr.coef_[0])), reverse=True)[:5])
print(sorted(((value, index) for index, value in enumerate(lr.coef_[0])), reverse=True)[-5:])
[(1.3321308087111168, 2400), (1.1098677278465363, 2977), (1.029120247844704, 1247), (0.9474432432978868, 2957), (0.9049132254229898, 26)] [(-0.6491883332225628, 363), (-0.6683241824194205, 3538), (-0.6811855513119685, 1909), (-0.9632209931825515, 1293), (-1.124500886987929, 515)]
coef_pos_index = sorted(((value, index) for index, value in enumerate(lr.coef_[0])), reverse=True)
coef_neg_index = sorted(((value, index) for index, value in enumerate(lr.coef_[0])), reverse=False)
coef_pos_index
[(1.3321308087111168, 2400), (1.1098677278465363, 2977), (1.029120247844704, 1247), (0.9474432432978868, 2957), (0.9049132254229898, 26), (0.8631251640260484, 385), (0.8624237330200107, 2730), (0.7848182816732695, 578), (0.732990219026413, 2311), (0.716865493140725, 246), (0.7161355390234533, 1809), (0.7134163462461057, 956), (0.7044600617626677, 115), (0.6869152801231841, 1384), (0.6556108465327279, 1148), (0.6279495890384094, 2849), (0.6222266165132151, 2779), (0.6161464320403829, 883), (0.5993549427526994, 1491), (0.5957963623120057, 2680), (0.5486926383676386, 2834), (0.5396380473836403, 660), (0.5293505033175993, 416), (0.5268251635528765, 680), (0.5162996339456437, 3447), (0.515397298688398, 2781), (0.5102891400815143, 790), (0.5001491197806007, 3428), (0.4929472812035707, 1816), (0.49144456248364404, 692), (0.48756380000669713, 131), (0.48684210942709405, 1159), (0.45674587837763275, 1217), (0.45540907417568605, 1853), (0.4539368892907782, 981), (0.45364743248610284, 1799), (0.4357968956624657, 2988), (0.4317750704888898, 2606), (0.43044019060177896, 2771), (0.42976241036062235, 2455), (0.42955255071710025, 1981), (0.42879073804182866, 2722), (0.4199707273896906, 1779), (0.4159154526928186, 19), (0.4084324785499222, 2780), (0.4081209617829656, 1067), (0.4042034857154998, 2683), (0.4040469138707011, 3152), (0.4004256014611016, 1028), (0.39815159541886946, 826), (0.3846458902898351, 588), (0.38147465088928206, 627), (0.37718502893618444, 154), (0.3738446974828173, 2351), (0.367332320736131, 2678), (0.3671732270029874, 3016), (0.35993951750388575, 0), (0.35967370276499855, 1885), (0.34265833016423075, 1826), (0.33745159616067577, 2385), (0.3371647391165404, 3598), (0.335633532472175, 136), (0.33354095363617825, 1695), (0.325633618317818, 665), (0.3224469848677877, 1085), (0.3194837275959594, 73), (0.3180284758263984, 2685), (0.3165087876097102, 1554), (0.31480819925031794, 1350), (0.31431164320253163, 2225), (0.31025312484733686, 43), (0.308334722553892, 1263), (0.3029159220243568, 3588), (0.3020625524276556, 2726), (0.30156731773121254, 121), (0.2949888109951585, 1671), (0.29406267557940335, 282), (0.29388674541634185, 3320), (0.29319214345781675, 2167), (0.29257136559515806, 2), (0.2903527229951154, 582), (0.2893656045575609, 979), (0.2865105037602163, 341), (0.28357844878803906, 3347), (0.28264031635232884, 658), (0.28060836686814006, 1935), (0.27970378259167916, 1700), (0.2786399841890623, 3201), (0.27703202112305, 625), (0.27668100652947636, 3029), (0.2743375997046002, 1929), (0.27413594760437765, 3547), (0.27368204754138026, 1926), (0.27183018519065966, 1165), (0.27137529927855913, 71), (0.2685065484060374, 3013), (0.2683925002732845, 1078), (0.26602345854602616, 960), (0.26563651641305047, 2674), (0.2655828802203175, 395), (0.2637574460673181, 1659), (0.2630924155233454, 3425), (0.262576984334013, 1205), (0.2612445245166083, 1750), (0.25955244207216993, 3404), (0.2582453145513143, 860), (0.2573369451118894, 2038), (0.2528728295737944, 3267), (0.25117183576365226, 3150), (0.2502831420241506, 723), (0.24956513735466768, 2075), (0.24845709898575633, 1025), (0.24565832211918145, 1374), (0.2444466387148004, 3238), (0.24345854902783198, 714), (0.24263088287913587, 3353), (0.23959737325461491, 3060), (0.2394235215359124, 1453), (0.2382302727044769, 2595), (0.236693438753923, 3537), (0.2352318898029652, 726), (0.23441224495353918, 100), (0.23406755548226849, 2551), (0.23379140860743455, 2566), (0.23346109278685567, 532), (0.22895808194923195, 3337), (0.22880986570599313, 353), (0.22841556466198995, 1383), (0.22709775600161025, 2691), (0.22652789077304955, 2436), (0.22627009793868919, 2753), (0.22507189125351756, 110), (0.22396460652995273, 147), (0.22259697920818705, 1482), (0.22193733214129846, 2438), (0.22175178779418242, 1552), (0.22151447958091616, 1199), (0.22146319716752702, 544), (0.22127225625738847, 3272), (0.220781024296084, 2274), (0.21982768282476364, 2705), (0.21936837987103552, 718), (0.2193496947270397, 3299), (0.2193496947270397, 172), (0.2184497853928657, 717), (0.21833407390702603, 1505), (0.21809621397373719, 3512), (0.21781685606599935, 322), (0.2176002036083807, 2913), (0.21692615631477355, 3551), (0.21671313079089544, 3102), (0.21671313079089544, 1541), (0.21606244261917687, 3014), (0.21562177834082913, 3127), (0.21556533928126198, 2677), (0.2152860596906335, 769), (0.2138168027519045, 2108), (0.21380867386959615, 263), (0.2132109653607666, 1307), (0.21244302599751747, 3061), (0.21224270498780334, 206), (0.21202778293074742, 192), (0.21201290684914603, 1949), (0.21145270517927034, 1762), (0.21114972847304267, 1455), (0.21083036354582643, 2186), (0.21051142044700794, 3384), (0.21001347415099242, 3286), (0.20998040536146637, 68), (0.20967616965410357, 235), (0.208251046375912, 1768), (0.20760478979685312, 3390), (0.20740315589887034, 3088), (0.20556807206875818, 3414), (0.20234144193644174, 3555), (0.2018656714571112, 2371), (0.2016461772393062, 51), (0.19929383346501198, 1721), (0.1990149202138041, 1010), (0.19896039182139422, 3432), (0.19894547087204073, 340), (0.19839781641856194, 3002), (0.19813175575019054, 3274), (0.1966951722367976, 623), (0.19660389823049113, 2016), (0.19581183146615333, 2168), (0.19505097638548372, 1886), (0.1945003983255703, 1147), (0.19426488558460306, 3408), (0.19403487720548504, 1523), (0.19371889849533547, 356), (0.19358226372345383, 1917), (0.19334929562068714, 3057), (0.19311769483840216, 2804), (0.19291083167167608, 2180), (0.19105979566738465, 3359), (0.19038712527647178, 40), (0.19025428348723153, 839), (0.19004592638299575, 278), (0.1896791185823034, 2965), (0.1892532697410779, 3339), (0.18906755452359386, 1961), (0.18888996868423136, 1432), (0.18781348017991015, 1278), (0.18759776633440292, 1550), (0.1869393811048611, 3549), (0.1864613695267563, 2200), (0.18617798946763775, 1249), (0.18545273365532508, 1386), (0.18465966170413162, 2331), (0.18448194605435825, 3283), (0.18359704376005423, 773), (0.18341863298488395, 2748), (0.18265890303695448, 1291), (0.18243752629350396, 3370), (0.18243752629350396, 663), (0.18242433015171347, 791), (0.18221035881128547, 1960), (0.18170817590567348, 2088), (0.18165649605140002, 281), (0.18136983329265988, 3486), (0.18105293360540595, 109), (0.1809803538967947, 3369), (0.17985798544663784, 2080), (0.17956847966930387, 3034), (0.17955990993913923, 3250), (0.17891844051459602, 2033), (0.17889862025954986, 2764), (0.17879595150453056, 1442), (0.17838512732829997, 1758), (0.17690299960811534, 1918), (0.1767714210124328, 801), (0.1767285112565217, 2141), (0.17659448012000462, 304), (0.17647989044490972, 3516), (0.17634068214069432, 3297), (0.17608186054158145, 2070), (0.17576526782661273, 1030), (0.17515502627906773, 2622), (0.1749737604394968, 3410), (0.17410214749925304, 2759), (0.17407257079494115, 3271), (0.17407257079494115, 2607), (0.17407257079494115, 2465), (0.17407257079494115, 2398), (0.17407257079494115, 571), (0.17311325469340594, 2742), (0.17276920148824162, 321), (0.17256067465778124, 1452), (0.17159599314441507, 903), (0.17103233804318696, 47), (0.16925315589858916, 2350), (0.16920767999211647, 423), (0.1685975878154169, 489), (0.1685895331301071, 440), (0.16749119686465136, 2042), (0.16702579919941596, 382), (0.16700101352781155, 667), (0.16610539591043183, 286), (0.16507120766930342, 2003), (0.16429698469550058, 1120), (0.16331700479491104, 2188), (0.16331700479491104, 2018), (0.16331390385034894, 2793), (0.16300508910149644, 916), (0.16282275467452148, 1843), (0.1624724734816524, 1359), (0.1618759037676759, 3395), (0.16130519387471293, 2975), (0.16025844697165065, 669), (0.16013102293033513, 1953), (0.1597482903610505, 996), (0.15968247106389244, 2767), (0.1595935294251936, 259), (0.15880675549174347, 1602), (0.1587439552243165, 935), (0.15869391556965143, 93), (0.15866592489114187, 2581), (0.15842096948259, 2258), (0.15836742047722766, 2787), (0.15791266057854855, 3058), (0.15765558962932025, 2271), (0.15732721123058058, 1292), (0.15722879615475854, 720), (0.15694585991437593, 3037), (0.1565630030203952, 1910), (0.15653431345464613, 748), (0.15647025349331029, 1485), (0.1560861187506581, 1451), (0.15579667970897626, 1348), (0.15540520631625526, 1326), (0.1549720046769624, 2047), (0.15452615455169144, 543), (0.1541983511016218, 3164), (0.1541983511016218, 683), (0.1539873238718623, 3162), (0.1538488627144806, 888), (0.1536071393576577, 3208), (0.15298690219214506, 1593), (0.15275270485680378, 2386), (0.15240308917035653, 991), (0.151621197000685, 612), (0.15129785599666357, 939), (0.15094776923710648, 2895), (0.15094776923710648, 2425), (0.14962944016742355, 2216), (0.14904033568007535, 290), (0.1486419531495219, 2794), (0.1485083911928923, 2940), (0.14824979715450434, 2013), (0.14749451289822033, 83), (0.1474087004234331, 1805), (0.14710629342492257, 3189), (0.1468532386313502, 937), (0.14658634943466398, 2146), (0.1465384816163645, 1053), (0.14652908216269708, 3356), (0.1464667635763324, 1319), (0.1464667635763324, 987), (0.146232204378051, 1709), (0.14596163518975866, 589), (0.14594613731072828, 2170), (0.1459333390058329, 2956), (0.1456575089375777, 176), (0.14544378809072755, 74), (0.1452359825672161, 393), (0.14490710054390635, 971), (0.14459417700731625, 2897), (0.1435072329559267, 349), (0.1435072329559267, 81), (0.14342614982099325, 237), (0.14334431387987334, 989), (0.14333871647641, 2497), (0.1427597861226763, 1891), (0.14274532098406992, 453), (0.14267158693294138, 2462), (0.14241007268476308, 2792), (0.14239906559751997, 2191), (0.14172778883838677, 2148), (0.14078125131967817, 283), (0.1406282886456704, 311), (0.14042112327259743, 2864), (0.14040331443949813, 2150), (0.14036503615131526, 910), (0.14030418343407003, 1253), (0.14030418343407003, 30), (0.1402567473881267, 2525), (0.1401665654116925, 179), (0.14011839714622298, 1796), (0.13995369350695977, 3125), (0.1390466886476543, 1530), (0.13874050516494102, 1938), (0.13855961822894952, 1440), (0.1385421137946354, 1239), (0.13825954738820223, 1614), (0.13823477220476124, 2708), (0.1377461516784561, 2411), (0.13747373989422332, 3366), (0.1374181381097756, 2475), (0.13638701952286028, 1), (0.13633128481810017, 561), (0.1358404957999536, 852), (0.13575081367693947, 961), (0.13573573877465953, 1106), (0.1357304444802513, 1740), (0.13570313559786057, 326), (0.13504425093295547, 1301), (0.13449235053930575, 3434), (0.13426801036692038, 12), (0.13419735575739455, 2267), (0.13413197224055048, 817), (0.1332035480663822, 2844), (0.13293688071865534, 843), (0.13293254436624813, 1781), (0.1327270864101471, 1616), (0.1327270864101471, 818), (0.13235034222368947, 3567), (0.13192012478547538, 3476), (0.13192012478547538, 1112), (0.13178963598053114, 3426), (0.13167562685657733, 2605), (0.13166726335215087, 1557), (0.13160015043919898, 3581), (0.13160015043919898, 2488), (0.13160015043919898, 2094), (0.1313547383200888, 2972), (0.13031729072402892, 1746), (0.12988487499838028, 1172), (0.12973246724796839, 2573), (0.1296971052433702, 2969), (0.1296971052433702, 767), (0.1293905727630968, 1795), (0.12938453024897784, 1715), (0.12933454503707428, 1728), (0.1289365379022888, 3027), (0.12867150979878933, 1921), (0.12860870968792637, 2078), (0.12810442768317531, 1739), (0.12805439958035297, 3026), (0.1278779808209269, 3442), (0.12780007554324174, 618), (0.12735321130110464, 3552), (0.12693382684487228, 1283), (0.12693382684487228, 774), (0.12681663696770337, 2618), (0.1267241604496154, 2421), (0.1267241604496154, 2166), (0.12564078640260584, 347), (0.12536956468677118, 855), (0.1253512794237119, 2523), (0.12484293651117723, 3015), (0.12446046957444332, 3561), (0.12438503356051701, 2220), (0.12428431698758667, 2335), (0.12428431698758667, 1051), (0.1241217444105632, 1621), (0.12359845049290538, 1506), (0.12312446033672597, 872), (0.1228540584502789, 375), (0.12218510960287418, 1321), (0.12212076174130922, 3121), (0.12191095229953881, 1092), (0.12191095229953881, 122), (0.12178744304758918, 1060), (0.12155319948980126, 3577), (0.12128424197829846, 3508), (0.12125712391650044, 806), (0.12094559865788808, 2298), (0.12081130224026823, 2171), (0.12081130224026823, 703), (0.12081130224026823, 379), (0.1206667914049753, 519), (0.12053482818464879, 671), (0.12016451781173339, 3518), (0.11957468357601171, 3168), (0.11957468357601171, 2632), (0.11930669990535478, 3309), (0.11907549449107033, 3593), (0.11907309519569548, 3513), (0.11907309519569548, 3143), (0.11907309519569548, 401), (0.11900817960475607, 271), (0.11899208138806625, 2869), (0.11895958968877159, 1539), (0.11880440997267704, 3435), (0.11849763128788413, 2546), (0.11809597331801082, 2911), (0.11809597331801082, 2862), (0.11806555686548152, 3262), (0.11806555686548152, 2036), (0.11755582720566768, 2824), (0.1158635500136846, 3412), (0.1158635500136846, 1690), (0.1158635500136846, 867), (0.1158635500136846, 182), (0.1146049615072046, 59), (0.11457215302554612, 758), (0.11452048101166765, 284), (0.11437158955438233, 2899), (0.11437158955438233, 415), (0.11421370258446362, 2533), (0.11402351764493407, 814), (0.11398329625091098, 400), (0.11398004111441176, 2062), (0.11388712615281078, 457), (0.11380483755755394, 825), (0.11376257728774304, 2953), (0.11374483910338776, 2496), (0.11363061333793036, 3427), (0.11363061333793036, 390), (0.11331142513554919, 217), (0.11323824243411867, 2848), (0.11269570106913031, 2158), (0.11260561285741831, 1553), (0.11170726385475563, 2594), (0.11146074951366217, 2765), (0.11143204200991574, 2303), (0.11121357044433934, 337), (0.11121357044433934, 272), (0.11096001854374073, 3089), (0.11095263793825426, 2613), (0.11095263793825426, 1406), (0.11095263793825426, 970), (0.11087589389709121, 2315), (0.11087589389709121, 425), (0.11072422457342575, 3558), (0.11072422457342575, 3419), (0.11072422457342575, 2608), (0.11072422457342575, 2598), (0.11060675699993265, 2426), (0.11022343631120256, 2457), (0.11015920452585921, 2051), (0.11015920452585921, 1725), (0.11015920452585921, 1684), (0.11015920452585921, 95), (0.11010581079286663, 3067), (0.10998171923083665, 1612), (0.10998171923083665, 266), (0.10988929327193322, 1483), (0.10968349322053741, 3423), (0.10968349322053741, 1945), (0.10968349322053741, 1095), (0.10946316414746812, 2832), (0.1090479208217944, 2456), (0.1090479208217944, 493), (0.1090479208217944, 146), (0.10885202678651067, 2971), (0.1085737346294867, 1073), (0.10811028544693015, 1991), (0.1079924384456164, 2085), (0.10794562030046964, 1743), (0.10773901543501041, 267), (0.10712091911923531, 3445), (0.10682046421939902, 741), (0.10682046421939902, 140), (0.10675298330383282, 159), (0.10618581865111706, 2165), (0.10618581865111706, 498), (0.10618581865111706, 432), (0.10605425200156694, 3430), (0.10605425200156694, 2948), (0.10605425200156694, 857), (0.10605425200156694, 346), (0.10605425200156694, 265), (0.10557486423652133, 2766), (0.10557486423652133, 822), (0.10534575807425914, 2491), (0.10519179459973899, 2676), (0.10496503453539867, 479), (0.10473498183402759, 1562), (0.10457183285346207, 472), (0.10425444516416421, 1465), (0.10425423248252984, 2709), (0.10421806120046974, 1676), (0.10403053110147295, 1798), (0.1040240341000106, 2823), (0.1040240341000106, 2501), (0.1039699081798286, 3504), (0.1039699081798286, 942), (0.10388368558329736, 751), (0.10368637066103771, 2250), (0.10368637066103771, 730), (0.10276872872705302, 2719), (0.10276872872705302, 2459), (0.10247708643417394, 2961), (0.1024231846229235, 1339), (0.10236183219704427, 3277), (0.10232708110688793, 735), (0.10228458667189501, 3198), (0.10228458667189501, 2962), (0.10228458667189501, 995), (0.10136053789274621, 2304), (0.10134976554256996, 2688), (0.10134976554256996, 1636), (0.10134976554256996, 1337), (0.10134976554256996, 1324), (0.10134976554256996, 707), (0.1011396883323341, 765), (0.10069601186190272, 1939), (0.10064515702280895, 3344), (0.10064515702280895, 2173), (0.10064515702280895, 1138), (0.10064515702280895, 461), (0.10064515702280895, 34), (0.10042325887796415, 300), (0.10029575753140649, 1989), (0.09998749056664377, 3107), (0.09930697078845709, 2221), (0.09914935926854875, 244), (0.0987669339896394, 3078), (0.0987669339896394, 2164), (0.09836837816997851, 3315), (0.09836837816997851, 875), (0.09817944238454097, 2699), (0.09794705270385755, 1027), (0.0975399452441692, 2587), (0.0975399452441692, 2077), (0.09748748613843083, 332), (0.09735701024742209, 2396), (0.09725019916278516, 258), (0.09699466636036946, 1546), (0.09699466636036946, 1335), (0.0968815397155067, 2907), (0.09686500832458751, 777), (0.09686052023444106, 135), (0.09673046950545752, 1444), (0.09673046950545752, 715), (0.09661595034015592, 446), (0.09653772583290808, 3207), (0.09653772583290808, 2138), (0.09626960058032391, 3120), (0.09618513758438467, 3515), (0.09565542764095197, 2863), (0.09565542764095197, 894), (0.09547947763468337, 2162), (0.09531096377076571, 702), (0.09528243773263785, 1974), (0.09509363527560924, 3003), (0.09498866156805089, 56), (0.09474781428844517, 684), (0.09470186479356585, 3105), (0.0942830918446496, 451), (0.09415396440857329, 2308), (0.09415396440857329, 1642), (0.09405941869361335, 1670), (0.09361290381636478, 3167), (0.09361290381636478, 2182), (0.09355616462549204, 3035), (0.09355616462549204, 1896), (0.09355616462549204, 1184), (0.09341909409291495, 3072), (0.09337462696070649, 438), (0.09296267142463642, 1603), (0.0928458913431107, 2710), (0.0928458913431107, 1213), (0.09284154955685534, 1371), (0.09266999833156657, 406), (0.09207228230404421, 1163), (0.09205966214580999, 2997), (0.09178619545179537, 2752), (0.09143824914038053, 3064), (0.09118758190755498, 601), (0.0904102863515016, 1752), (0.0904102863515016, 518), (0.08994784131054855, 1626), (0.08992899272331892, 1381), (0.08992899272331892, 274), (0.0895027880278346, 749), (0.08922252170461646, 234), (0.08893701129431314, 1801), (0.08888778897986298, 377), (0.08850344945738366, 3461), (0.08850344945738366, 1488), (0.08829971029325993, 674), (0.08820524746883318, 3011), (0.08820524746883318, 946), (0.08820524746883318, 693), (0.08817034107034716, 2919), (0.08817034107034716, 1007), (0.08817034107034716, 421), (0.08804345721280432, 1361), (0.08804345721280432, 383), (0.08762934372453222, 2228), (0.08757617301522154, 2408), (0.08743961293482269, 1848), (0.08723026371545585, 1439), (0.08711481957056837, 1181), (0.08702300996955852, 2415), (0.08665193755012388, 3381), (0.08665193755012388, 2287), (0.08665193755012388, 1836), (0.08665193755012388, 1769), (0.08613066304157803, 1118), (0.08604351125758435, 2905), (0.08604351125758435, 569), (0.08548454984937787, 3179), (0.08548454984937787, 2648), (0.08548454984937787, 230), (0.0851745887022458, 3048), (0.0851745887022458, 1994), (0.0851745887022458, 1204), (0.0847098159525699, 2515), (0.08462148604526211, 2429), (0.08426216025543003, 153), (0.08396067945350467, 1744), (0.08395033773740962, 1591), (0.08392814428727823, 2027), (0.08382123724955291, 2749), (0.08382123724955291, 2473), (0.08382123724955291, 1409), (0.08382123724955291, 1124), (0.08353088199290906, 444), (0.08317371353393369, 2642), (0.08309673441860337, 3111), (0.08309673441860337, 245), (0.08292925047434145, 3259), (0.08292925047434145, 1302), (0.08292925047434145, 293), (0.08291617096613416, 3140), (0.08269105630936234, 2901), (0.08250700752808858, 696), (0.08240929260535937, 1716), (0.0821143663223302, 3529), (0.0821143663223302, 3213), (0.0821143663223302, 2127), (0.0821143663223302, 1525), (0.0821143663223302, 1126), (0.08177476895231811, 2994), (0.08177476895231811, 2993), (0.08177476895231811, 2917), (0.08177476895231811, 1711), (0.08177476895231811, 1304), (0.08151167856717134, 2198), (0.08151167856717134, 945), (0.08151167856717134, 566), (0.081292170917407, 2998), (0.08105200153726197, 2484), (0.08077003694760779, 3076), (0.0806878432489464, 2360), (0.08068159929883512, 721), (0.08061100817847923, 2096), (0.08061100817847923, 1080), (0.08061100817847923, 892), (0.08061100817847923, 116), (0.08054857135944754, 3284), (0.08054857135944754, 2821), (0.08054857135944754, 1143), (0.08054857135944754, 963), (0.08054857135944754, 208), (0.08035021335850029, 1330), (0.08000278922985744, 592), (0.07992964611435344, 2480), (0.07984395488180059, 466), (0.07967648486597259, 3460), (0.07942048197153923, 1248), (0.07942048197153923, 800), (0.07936184382084922, 103), (0.07934695778482571, 1594), (0.0790924779245287, 2836), (0.07878339653169947, 921), (0.07872352699109669, 1427), (0.07832911303100597, 1274), (0.07826986945258207, 2529), (0.07826986945258207, 548), (0.07812731180375966, 2757), (0.07782256783754223, 1394), (0.07772413797438447, 1563), (0.0776677243681878, 1770), (0.0776677243681878, 431), (0.07763906076073131, 1969), (0.07750076957730183, 3443), (0.07750076957730183, 1597), (0.07750076957730183, 141), (0.07725627254289316, 3181), (0.07725627254289316, 2217), (0.07709690524884415, 3431), (0.07709690524884415, 2482), (0.07699366193593116, 2735), (0.0769319438415435, 2157), (0.0769319438415435, 1677), (0.07675730092547896, 909), (0.07667082959004155, 1219), (0.07608187763041152, 2026), (0.07602294882487318, 863), (0.07600285375632602, 1002), (0.0759181697785818, 3454), (0.07577029842086451, 1864), (0.07564976198948453, 2629), (0.07563006827415941, 2876), (0.0754734026281844, 1306), (0.07523752970018145, 893), (0.07523752970018145, 482), (0.07514147802049713, 2773), (0.07506743730551602, 4), (0.07496398600693509, 646), (0.07488884792464619, 3539), (0.07488884792464619, 388), (0.07470692889125978, 3114), (0.07470692889125978, 759), (0.07436171988469886, 1298), (0.07401855876239286, 88), (0.07393391675480251, 2052), (0.07393391675480251, 954), (0.07393391675480251, 812), (0.07355314671246128, 1316), (0.07331021916892902, 3270), (0.07327690911717218, 107), (0.07291562786035617, 2417), (0.0728684083665527, 1962), (0.07280881447765751, 1759), (0.0727652146198062, 619), (0.07247090110087959, 477), (0.07229078508957919, 590), (0.07137989306133816, 2435), (0.07121096313036739, 209), (0.0711621604468216, 3338), (0.07106080111048188, 3199), (0.07093515542997689, 3303), (0.07093515542997689, 1008), (0.07093515542997689, 805), (0.07064298286891789, 2022), (0.07050951911510893, 1033), (0.06999901722433252, 2035), (0.06983294154740267, 1128), (0.06977993867564153, 3210), (0.06969183120786246, 986), (0.06955745792024413, 3453), (0.06927229373883934, 404), (0.06901888758929042, 2352), (0.06901888758929042, 2061), (0.06901888758929042, 1526), (0.06901888758929042, 21), (0.06830194446478273, 1503), (0.06828111020089074, 2740), (0.06828111020089074, 2736), (0.06828111020089074, 1943), (0.06828111020089074, 1847), (0.06828111020089074, 1180), (0.06828111020089074, 829), (0.0682032266181214, 3248), (0.0682032266181214, 1619), (0.0682032266181214, 1257), (0.0682032266181214, 876), (0.06805916422035019, 2918), (0.06805916422035019, 2293), (0.06805916422035019, 1544), (0.06785022063152461, 3503), (0.06780627218311859, 2556), (0.06771935959217608, 2541), (0.06770398717597208, 3364), (0.06770398717597208, 3119), (0.06770398717597208, 3075), (0.06770398717597208, 859), (0.06770398717597208, 450), (0.06754168403023177, 264), (0.06694966279504407, 2806), (0.06694966279504407, 2369), (0.06694966279504407, 1832), (0.06669378295200436, 2082), (0.06648844683125392, 771), (0.06645518694941502, 3251), (0.06645518694941502, 1252), (0.06645518694941502, 1117), (0.06645292906426767, 3041), (0.06645292906426767, 2564), (0.06645292906426767, 2259), (0.06645292906426767, 2059), (0.06645292906426767, 2014), (0.06645292906426767, 1579), (0.06645292906426767, 842), (0.06645292906426767, 815), (0.06645292906426767, 526), (0.06626802899587075, 2797), (0.06584406166184281, 435), (0.06571699962537708, 1194), (0.06559989448715318, 2356), (0.0655031569982287, 396), (0.06412191076896116, 2260), (0.06412191076896116, 1894), (0.06409990552638617, 2246), (0.06396406009757603, 1625), (0.0629882398131742, 3594), (0.06272140140211431, 1565), (0.06272140140211431, 1513), (0.06272140140211431, 232), (0.06272140140211431, 18), (0.06271927037627348, 308), (0.06251092516187161, 725), (0.0621509926564875, 333), (0.061604560505147975, 2530), (0.06152215539240401, 3313), (0.06152215539240401, 3223), (0.06152215539240401, 556), (0.061201092717504006, 2599), (0.061201092717504006, 52), (0.060822883069674344, 2093), (0.06077659974490063, 3305), (0.06077659974490063, 2880), (0.06077659974490063, 2859), (0.06077659974490063, 1627), (0.06077659974490063, 1024), (0.06077659974490063, 657), (0.06061527321340795, 3184), (0.06061527321340795, 3031), (0.06061527321340795, 2944), (0.06061527321340795, 2647), (0.06061527321340795, 2528), (0.06061527321340795, 2261), (0.06061527321340795, 1269), (0.06061527321340795, 1109), (0.06061527321340795, 554), (0.0605113418129938, 10), (0.06025557611898251, 1538), (0.06025557611898251, 1129), (0.06013641813402835, 1131), (0.05999262335652932, 3149), (0.05999262335652932, 2846), (0.05999262335652932, 2624), (0.05999262335652932, 2461), (0.05999262335652932, 2359), (0.05999262335652932, 2279), (0.05999262335652932, 1069), (0.05999262335652932, 364), (0.05927023626041681, 1617), (0.059123169887753284, 3187), (0.05877791360283384, 3455), (0.05877791360283384, 1392), (0.05877791360283384, 704), (0.05826465255311857, 1835), (0.05826465255311857, 516), (0.05820238903108268, 189), (0.05790138179416361, 1342), (0.05763974687012355, 761), (0.05736186569390286, 480), (0.05724320662088986, 2239), (0.05724320662088986, 1734), (0.05720960329569059, 2809), (0.05720960329569059, 2057), (0.05720960329569059, 1691), (0.05664040415694123, 2029), (0.056621937029158506, 3379), (0.056621937029158506, 1774), (0.0564278865235491, 3062), (0.055853771367528685, 2619), (0.05576417002155924, 2951), (0.05576417002155924, 2224), (0.05576417002155924, 1026), (0.05576417002155924, 57), (0.05547547575885028, 2585), (0.055435054504402954, 1850), (0.055435054504402954, 1097), (0.055435054504402954, 807), (0.05502062125426616, 1600), (0.054991052287921414, 2650), (0.05493907332775764, 165), (0.05493907332775764, 45), (0.054863416369423136, 8), (0.054793242034454126, 276), (0.05402699630874494, 2193), (0.05402699630874494, 42), (0.05370254799315957, 1877), (0.05305980956943215, 3295), (0.05305980956943215, 3005), (0.05293101100444882, 1923), (0.05292864156660283, 2811), (0.052918966628638694, 1317), (0.0528978884762766, 2222), (0.05268951274578193, 1090), (0.05266839593639291, 3362), (0.05266839593639291, 1476), (0.05263105940007825, 1417), (0.05259054089966629, 1818), (0.05232043524255482, 1620), (0.05232043524255482, 497), (0.05194868290767959, 487), (0.05183816185568962, 1005), (0.05170169366608983, 2136), (0.05170169366608983, 700), (0.05170169366608983, 565), (0.05138757211673676, 3417), (0.05090385531872281, 2233), (0.05085971069291413, 1162), (0.050453398152483155, 1901), (0.050408082037084294, 94), (0.05033746451820631, 106), (0.05017128862015281, 1966), (0.05012558519824015, 3575), (0.050110577708012825, 1977), (0.04997505051609792, 1260), (0.04957823522570809, 1653), (0.04948672513322782, 2666), (0.04939479567119575, 705), (0.049026286011909795, 3298), (0.04882517448802302, 629), (0.048796833331736136, 2472), (0.048796833331736136, 1775), (0.048796833331736136, 494), (0.04846817068404057, 1595), (0.04817904263490586, 2361), (0.04785381019455731, 3017), (0.04778438162905383, 1857), (0.047749937179018, 3326), (0.047749937179018, 3227), (0.047749937179018, 2305), (0.047749937179018, 2156), (0.04725047923528368, 1489), (0.04673580212471322, 1988), (0.04668402365113107, 1903), (0.04573312371101507, 3361), (0.04573312371101507, 676), (0.0452051431757508, 3510), (0.0452051431757508, 3055), (0.0452051431757508, 2737), (0.0452051431757508, 2292), (0.0452051431757508, 1475), (0.045137142121380584, 2822), (0.04498602637267015, 2161), (0.044786100911622696, 896), (0.0443122935728851, 3235), (0.0443122935728851, 3049), (0.0443122935728851, 2154), (0.0443122935728851, 1978), (0.0443122935728851, 546), (0.04416876165065787, 3279), (0.04416876165065787, 2582), (0.04403938115580219, 2064), (0.04375757986584301, 97), (0.04349807110759742, 3570), (0.043347710079499176, 3399), (0.042665291615604624, 3310), (0.042665291615604624, 2076), (0.042665291615604624, 1407), (0.042665291615604624, 586), (0.041822606779345184, 3287), (0.041818373480833865, 724), (0.04176434834421346, 2532), (0.04125350376404429, 3336), (0.04125350376404429, 2656), (0.04125350376404429, 2570), (0.04125350376404429, 2522), ...]
invert_index_vectorizer = {v: k for k, v in vect.vocabulary_.items()}
for coef in coef_pos_index[:15]:
print(invert_index_vectorizer[coef[1]], coef[0])
이용 1.3321308087111168 추천 1.1098677278465363 버스 1.029120247844704 최고 0.9474432432978868 가성 0.9049132254229898 근처 0.8631251640260484 조식 0.8624237330200107 다음 0.7848182816732695 위치 0.732990219026413 공간 0.716865493140725 시설 0.7161355390234533 맛집 0.7134163462461057 거리 0.7044600617626677 분위기 0.6869152801231841 바다 0.6556108465327279
for coef in coef_neg_index[:15]:
print(invert_index_vectorizer[coef[1]], coef[0])
냄새 -1.124500886987929 별로 -0.9632209931825515 아무 -0.6811855513119685 화장실 -0.6683241824194205 그냥 -0.6491883332225628 모기 -0.6302873381425533 수건 -0.6243491941007028 느낌 -0.5975494080979522 모텔 -0.5971174361320487 다른 -0.5966138818945081 최악 -0.593317479621261 음식 -0.5443424935120069 주위 -0.5321043465183405 진짜 -0.5254380815734122 목욕 -0.5087212885846032
댓글남기기