In the last post, I have shown you how to scrape google “People also ask”. Here is the code to scrape the Google Map using Selenium. Please leave the suggestions, and comments below or in our Facebook group You, Me, and Automation.
'''
Google Map Scraper Using Selenium
Code By Sunil
17 July, 2022
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import csv
import time
search = "landscaping in dallas"
pages = 2
header = ["data_cid", "title", "address", "website", "phone", "rating","reviews","image","category","timing","description","profiles"]
data = []
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
#options.headless = True
driver = webdriver.Chrome(options=options)
driver.get('https://www.google.com')
driver.implicitly_wait(2)
driver.find_element(By.NAME,"q").send_keys(search + Keys.ENTER)
more = driver.find_element(By.TAG_NAME,"g-more-link")
more_btn = more.find_element(By.TAG_NAME,"a")
more_btn.click()
time.sleep(10)
for page in range(2, pages+1):
elements = driver.find_elements(By.CSS_SELECTOR, 'div#search a[class="vwVdIc wzN8Ac rllt__link a-no-hover-decoration"')
counter = 1
for element in elements:
data_cid = element.get_attribute('data-cid')
element.click()
print('item click... 5 seconds...')
time.sleep(5)
#title
title = driver.find_element(By.CSS_SELECTOR,'h2[data-attrid="title"]')
print('title: ', title.text)
#address
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-attrid="kc:/location/location:address"] span:nth-child(2)')
if len(temp_obj.text) > 0:
address = temp_obj.text
except NoSuchElementException:
address =""
print ('address: ',address)
#website
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[class="kp-header"] div > div > div:nth-child(2) > div > a')
if temp_obj.text == 'Website':
website = temp_obj.get_attribute('href')
else:
website = ""
except NoSuchElementException:
website =""
print('website:', website)
#phone
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-attrid="kc:/collection/knowledge_panels/has_phone:phone"] span:nth-child(2) > span > a > span')
if len(temp_obj.text) > 0:
phone = temp_obj.text
except NoSuchElementException:
phone =""
print('phone:', phone)
#rating
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'g-review-stars span')
if len(temp_obj.get_attribute('aria-label')) > 0:
rating = temp_obj.get_attribute('aria-label')
except NoSuchElementException:
rating =""
print('rating:',rating)
#total review
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'a[data-async-trigger="reviewDialog"] span')
if len(temp_obj.text) > 0:
reviews = temp_obj.text
except NoSuchElementException:
reviews =""
print('reviews:', reviews)
#image
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-attrid="kc:/location/location:media"] > div > a > div')
if len(temp_obj.get_attribute('style')) > 0:
image = temp_obj.get_attribute('style')
if 'background' in image:
image = image.replace('background-image: url("','')
image = image.replace('"','')
image = image.replace(');','')
except NoSuchElementException:
image =""
print('image:', image)
#category
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-attrid="kc:/local:lu attribute list"] > div > div > span')
if len(temp_obj.text) > 0:
category = temp_obj.text
except NoSuchElementException:
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-attrid="kc:/local:one line summary"] > div > span')
if len(temp_obj.text) > 0:
category = temp_obj.text
except NoSuchElementException:
category=""
print('category:', category)
#timing
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-attrid="kc:/location/location:hours"] > div > div > div:nth-child(2) > div > table')
if len(temp_obj.get_attribute('innerHTML')) > 0:
timing = temp_obj.get_attribute('innerHTML')
timing = ""+timing.replace(' class="SKNSIb"','')+"
"
except NoSuchElementException:
timing =""
print('timing:', timing)
#description
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-long-text]')
if len(temp_obj.get_attribute('data-long-text')) > 0:
description = temp_obj.get_attribute('data-long-text')
except NoSuchElementException:
'''
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-attrid="kc:/local:merchant_description"] > c-wiz > div > div:nth-child(2)')
if len(temp_obj.get_attribute('innerHTML')) > 0:
description = temp_obj.get_attribute('innerHTML')
except NoSuchElementException:
description =""
'''
description=""
print('description:', description)
# social profiles
profiles=""
for s_count in range (1, 6):
try:
temp_obj = driver.find_element(By.CSS_SELECTOR, 'div[data-attrid="kc:/common/topic:social media presence"] div:nth-child(2) > div:nth-child(' + str(s_count) + ') > div > g-link > a')
if len(temp_obj.get_attribute('href')) > 0:
profiles_str = temp_obj.get_attribute('href')
except NoSuchElementException:
profiles_str = ""
break
profiles += "
" + profiles_str
print('profiles: ', profiles)
#print(counter, data_cid, title.text, address, website, phone,rating,reviews,image,category,timing,description,profiles)
row = [data_cid, title.text, address, website, phone,rating,reviews,image,category,timing,description,profiles]
data.append(row)
counter+=1
try:
page_button = driver.find_element(By.CSS_SELECTOR, 'a[aria-label="Page ' + str(page) + '"]')
page_button.click()
print('page click... 10 seconds...')
time.sleep(10)
except NoSuchElementException:
break
with open('gmap.csv', 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(header)
writer.writerows(data)