안녕하세요.
네이버 블로그 검색화면의 타이틀을 크롤링 하려고 하는데요.
예전에는 되었는데, 네이버가 뭔가 바뀌면서 잘 안되네요.
import requests
from bs4 import BeautifulSoup
keyword = '종이상자'
url = f'https://section.blog.naver.com/Search/Post.naver?pageNo=1&rangeType=ALL&orderBy=sim&keyword={keyword}'
res = requests.get(url)
soup = BeautifulSoup(res.text, "html.parser")
titles = BeautifulSoup.select(soup,'#content > section > div.area_list_search > div:nth-child(1) > div > div.info_post > div.desc > a.desc_inner > strong > span > strong')
for title in titles:
print(title)
페이지 소스를 봐도 타이틀명이 보이지 않습니다.
이럴 경우에는 어떻게 해야 할할까요?
selenium밖에 답이 없는 걸까요?
아니면 requests + beautifulsoup 조합으로 가능할런지요?
아시는분 께서는 도움 주시면 정말 감사하겠습니다.
url = f'https://search.naver.com/search.naver?query={keyword}&nso=&where=blog&sm=tab_opt'
...
titles = BeautifulSoup.select(soup,'ul.lst_total > li.bx > div.total_wrap.api_ani_send > div.total_area > a')
for title in titles:
print(title.text)
curl -s 'https://section.blog.naver.com/ajax/SearchList.naver?countPerPage=7¤tPage=1&endDate=&keyword=%EC%A2%85%EC%9D%B4%EC%83%81%EC%9E%90&orderBy=sim&startDate=&type=post' \
-H 'Accept: application/json, text/plain, */*' \
-H 'Accept-Language: ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7' \
-H 'Cache-Control: no-cache' \
-H 'Connection: keep-alive' \
-H 'Cookie: NNB=SVUVYPZ7AHFWA; _ga_4BKHBFKFK0=GS1.1.1630025832.1.1.1630025836.56; ASID=6e0daafb0000017ddb241b6400000069; _ga=GA1.2.1335475913.1623978760; _ga_7VKFYR6RV1=GS1.1.1640754827.6.1.1640755103.60; userids_word_speech_bubble_20140109=true; stat_yn=1; NSCS=1; NFS=2; page_uid=ickKGlp0JywssvxtZIossssssow-182686; nid_inf=-1297408043; NID_AUT=QrcI07Eke2kilAM0t/7cbMkG27n38W3ZU1x1B003S7u0QnbLIdWamljRKMOEyrG5; NID_JKL=GmbUyrZei3dGquuXzHHiARsEfEMvv+ogzf7p7o3MmTk=; NID_SES=AAABwtZJqiRUv4Mh1l5NaLeAyPLQBYVrryjKSqDVziJSOC3cSWxGn13oxIIBExQ6YjGoxuxecpRhBzjMBnycuXMCQguH5FpSsx6fdTx8v147VbGn3vZG1AmukgBIhV7lncceCN08QlIrNx1JblgG31nCEZtyBrLJ6EG6w9QE3G7uKis6SQvI3eFaF03tBO8UtYY15zpUQ3SZ84FaTJFY0mAzoNN9cJhXR2mVjxOMZmfP6+NxvJnSWofRyOEvLhGTgiWFviYLDosKIatoegesoh1rMokfj+XBUUE/DjXYndURRNTnUHBMQJ8Yq/ELsLhX3y2NlDA5zCYjQ391hqUHkNGC/nodAw7SneEkDx3wk+D9cGzWPkVIfujbGNbgg/aZgnPHY4R93rz0ieTxK30D01PHwLqKSUfAp6ozhufTRZCJxwXOzCv+5qaN3FfnnFlBUKI+eZhODXs9kgpSIeNDbmlMVvVO1wh+TwmLi67piWPJDsDTI9hE3eA8B3WJNhROcAoKhi2OqwLn3KfaEmn/DsOvdYhmQv4gfqy00iwOHO8G28C35/xK/NLiGguqtorjz4nuRhjdFhJ8YrlovkiniNxpAuvlxHhaaxAN38P1DY9qerRs; JSESSIONID=71D03D2A08A9A345303AC672A490DCB0.jvm1' \
-H 'DNT: 1' \
-H 'Pragma: no-cache' \
-H 'Referer: https://section.blog.naver.com/Search/Post.naver?pageNo=1&rangeType=ALL&orderBy=sim&keyword=%EC%A2%85%EC%9D%B4%EC%83%81%EC%9E%90' \
-H 'Sec-Fetch-Dest: empty' \
-H 'Sec-Fetch-Mode: cors' \
-H 'Sec-Fetch-Site: same-origin' \
-H 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36' \
-H 'sec-ch-ua: "Google Chrome";v="113", "Chromium";v="113", "Not-A.Brand";v="24"' \
-H 'sec-ch-ua-mobile: ?0' \
-H 'sec-ch-ua-platform: "Windows"'