»Ë»Ñ Æ÷·³

³×À̹ö Áöµµ ¸®ºä Å©·Ñ¸µ5

   from selenium.webdriver.common.by import By
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from openpyxl import Workbook
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import time
import datetime
import requests
# Å©·Ò ¿É¼Ç ¼³Á¤
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options as ChromeOptions
options = ChromeOptions()
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"
options.add_argument('user-agent=' + user_agent)
options.add_argument("lang=ko_KR")
options.add_argument('headless')  # Çìµå¸®½º ¸ðµå·Î ½ÇÇà (ºê¶ó¿ìÀú âÀ» ¶ç¿ìÁö ¾ÊÀ½)
options.add_argument('window-size=1920x1080')
options.add_argument("disable-gpu")
options.add_argument("--no-sandbox")
# Å©·Ò µå¶óÀ̹ö ÃֽŠ¹öÀü ¼³Á¤
service = ChromeService(executable_path=ChromeDriverManager().install())
# Å©·Ò µå¶óÀ̹ö ½ÇÇà
driver = webdriver.Chrome(service=service, options=options)
# url
url = 'https://m.place.naver.com/restaurant/1085956231/review/visitor?entry=ple&reviewSort=recent'
# BS4 setting for secondary access
session = requests.Session()
headers = {
    "User-Agent": "user value"}
retries = Retry(total=5,
                backoff_factor=0.1,
                status_forcelist=[500, 502, 503, 504])
session.mount('http://', HTTPAdapter(max_retries=retries))
# New xlsx file
now = datetime.datetime.now()
xlsx = Workbook()
list_sheet = xlsx.create_sheet('output')
list_sheet.append(['nickname', 'content', 'date', 'revisit'])
# Start crawling/scraping!
try:
    print("Starting webdriver and accessing URL...")
    driver.get(url)
    driver.implicitly_wait(30)
    print("Page loaded. Scrolling down...")
    driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
    count = 0
    try:
        while True:
            print("Clicking on '´õº¸±â' button...")
            driver.find_element(By.XPATH, '//*[@id="app-root"]/div/div/div/div[6]/div[2]/div[3]/div[2]/div/a').click()
            count += 1
            print(f"'´õº¸±â' button clicked {count} times.")
            time.sleep(0.4)
    except Exception as e:
        print(f'No more "´õº¸±â" button found, finished scrolling after {count} clicks.')
    time.sleep(25)
    html = driver.page_source
    bs = BeautifulSoup(html, 'lxml')
    reviews = bs.select('li.YlrAu')
    print(f"Found {len(reviews)} reviews.")
    for r in reviews:
        nickname = r.select_one('div.VYGLG')
        content = r.select_one('div.vg7Fp.CyA_N')
        date = r.select('div.D40bm>span.CKUdu>time')[0]
        revisit = r.select('div.D40bm>span.CKUdu')[1]
        # exception handling
        nickname = nickname.text if nickname else ''
        content = content.text if content else ''
        date = date.text if date else ''
        revisit = revisit.text if revisit else ''
        time.sleep(0.06)
        print(f"Review: {nickname} / {content} / {date} / {revisit}")
        list_sheet.append([nickname, content, date, revisit])
        time.sleep(0.06)
    # Save the file
    file_name = 'naver_review_' + now.strftime('%Y-%m-%d_%H-%M-%S') + '.xlsx'
    xlsx.save(file_name)
    print(f"File saved as {file_name}")
except Exception as e:
    print(f"Exception occurred: {e}")
    # Save the file(temp)
    file_name = 'naver_review_' + now.strftime('%Y-%m-%d_%H-%M-%S') + '.xlsx'
    xlsx.save(file_name)
    print(f"File saved as {file_name} after exception")
finally:
    driver.quit()
    print("Webdriver closed.")

³×À̹ö Áöµµ ¸®ºä Å©·Ñ¸µ ÄÚµå À̸ç,
À§ ¿Í°°ÀÌ ÀÛ¼º½Ã
¿¢¼¿ÆÄÀÏÀº »ý¼ºµÇÁö¸¸, Å©·Ñ¸µÀÌ ÀüÇô¾ÈµÇ°íÀÖ´Â »óȲÀÔ´Ï´Ù.
¾Æ·¡´Â °á°ú°ªÀÔ´Ï´Ù.

   DevTools listening on ws://127.0.0.1:51574/devtools/browser/16977761-9899-4120-9ba5-2ef94f71fbc6 Starting webdriver and accessing URL... Page loaded. Scrolling down... Clicking on '´õº¸±â' button... No more "´õº¸±â" button found, finished scrolling after 0 clicks. Found 0 reviews. File saved as naver_review_2024-06-07_10-45-31.xlsx Webdriver closed. PS C:\Users\ooooo\

À§´Â °á°ú°ª ÀÔ´Ï´Ù.
¿¢¼¿ÆÄÀÏÀº »ý¼ºÀÌ µÇÁö¸¸ ÀüÇô ¸®ºä¸¦ ÀÐÁö ¸øÇÏ´Â »óȲÀÔ´Ï´Ù.
µµ¿òÀ» ÁÖ½Ã¸é °¨»çµå¸®°Ú½À´Ï´Ù.

0
ÃßõÇϱ⠴ٸ¥ÀÇ°ß 0
|
°øÀ¯¹öÆ°

´Ù¸¥ÀÇ°ß 0 Ãßõ 0 Æ°Æ°µ¥½º
2024-06-08 Á¡¾ÆÀÌÄÜ
  1. ´ñ±ÛÁÖ¼Òº¹»ç

´Ù¸¥ÀÇ°ß 0 Ãßõ 0 Æ°Æ°µ¥½º
2024-06-08 Á¡¾ÆÀÌÄÜ
  1. ´ñ±ÛÁÖ¼Òº¹»ç

´Ù¸¥ÀÇ°ß 0 Ãßõ 0 ¾ÓÆ΢â
2024-06-08 Á¡¾ÆÀÌÄÜ
  1. ´ñ±ÛÁÖ¼Òº¹»ç

´Ù¸¥ÀÇ°ß 0 Ãßõ 0 ¾ÓÆ΢â
2024-06-08 Á¡¾ÆÀÌÄÜ
  1. ´ñ±ÛÁÖ¼Òº¹»ç

´Ù¸¥ÀÇ°ß 0 Ãßõ 0 admin_noreply
2024-06-10 Á¡¾ÆÀÌÄÜ
  1. ´ñ±ÛÁÖ¼Òº¹»ç
  • ¾Ë¸² ¿å¼³, »óó ÁÙ ¼ö ÀÖ´Â ¾ÇÇÃÀº »ï°¡ÁÖ¼¼¿ä.
©¹æ »çÁø  
¡â ÀÌÀü±Û¡ä ´ÙÀ½±Û -¸ñ·Ïº¸±â