import json
import urllib.request
import random
import time
# by 만능소보로 in 오늘의유머 when 2016.11.14
def requestComment(clubID, articleID, page):
commentURL = "http://cafe.naver.com/CommentView.nhn?search.clubid=" + clubID + "&search.articleid=" + articleID + "&search.page=" + str(page)
# 가져오기.
try:
print("요청중[" + str(page) + "] : " + commentURL)
requestResult = (urllib.request.urlopen(commentURL).read()).decode("utf-8")
except:
print("연결에 문제가 생겼어요...")
exit()
# JSON으로 파싱하기.
try:
commentJson = json.loads(requestResult)
except:
print("혹시 숫자 잘못입력한거 아니죠? 확인하고 다시 시도해주세요.")
exit()
# 정상적으로 파싱됬나?
commentResult = commentJson.get("result")
if commentResult is None:
print("어? 필요한 값이 사라졌어요...(바뀌었나?)")
exit()
return commentResult
# 입력.
clubID = input("카페 번호(clubid)는? : ")
articleID = input("글 번호(articleid)는? : ")
# 첫페이지 가져와요.
commentResult = requestComment(clubID, articleID, 1)
# 변수 확인!!!
commentTotalCount = commentResult["totalCount"]
commentCountPerPage = commentResult["countPerPage"]
commentPage = 0
if commentTotalCount > 0:
commentPage = ((commentTotalCount - 1) // commentCountPerPage) + 1;
print("")
print("총 댓글수 : " + str(commentTotalCount))
print("댓글 페이지 : " + str(commentPage))
print("")
resultText = "번호\t날짜\t유저ID\t유저닉네임\t내용\t대댓글여부\t삭제여부\n"
for page in range(1, commentPage + 1):
if commentResult is None:
commentResult = commentResult = requestComment(clubID, articleID, page)
else:
print("요청중[" + str(page) + "] : 이미 가져왔었음.")
# 개별 댓글 수집
commentLists = commentResult["list"]
for eachComment in commentLists:
#
eachCommentID = eachComment["commentid"]
eachCommentDate = eachComment["writedt"]
eachCommentUserNickname = eachComment["writernick"]
eachCommentUserID = eachComment["writerid"]
eachCommentContent = eachComment["content"].replace("\t", " ")
eachCommentReplayID = eachComment["refcommentid"]
eachCommentIsReply = eachComment["refComment"]
eachCommentISDeleted = eachComment["deleted"]
#
resultText += str(eachCommentID) + "\t"
resultText += eachCommentDate + "\t"
resultText += eachCommentUserNickname + "\t"
resultText += eachCommentUserID + "\t"
resultText += eachCommentContent + "\t"
if eachCommentIsReply == True:
resultText += str(eachCommentReplayID) + "\t"
else:
resultText += "\t"
resultText += str(eachCommentISDeleted) + "\n"
commentResult = None # 비우면 다음 루프때 받을거야.
time.sleep(random.uniform(0.75, 1.25)) # 예의상 사람인척 딜레이를 주었다.
try:
resultFile = open("navercafe_comment_" + clubID + "_" + articleID + ".csv", 'w')
resultFile.write(resultText)
finally:
resultFile.close()
# 작업 완료
print("\n\n끝!!!")
time.sleep(random.uniform(1.5, 2.5))
print("\n\n\(안녕)/\n\n")
time.sleep(random.uniform(1.5, 2.5))