Prerequisite
- Apache Hadoop
- Apache Hive
Walk-through
demo_app_hotels.py
from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait # Import required package import json def get_user_name_password(): config_data = None with open('config.json') as json_file: config_data = json.load(json_file) return config_data["facebook"] if __name__ == "__main__": print("Web Scrapping Application Started ...") chromedriver_path = r"D:\work\web_scrapping\chromedriver.exe" driver_obj = webdriver.Chrome(chromedriver_path) driver_obj.maximize_window() driver_obj.get("https://www.facebook.com") user_detail = get_user_name_password() username_element = driver_obj.find_element_by_id("email") password_element = driver_obj.find_element_by_id("pass") submit_btn_element = driver_obj.find_element_by_id("loginbutton") username_element.send_keys(user_detail["user_name"]) password_element.send_keys(user_detail["password"]) submit_btn_element.click() wait = WebDriverWait(driver_obj, 5) print(driver_obj.title) #profile_name_list = driver_obj.find_elements_by_class_name("profileLink") #for profile_name in profile_name_list: # print("profile_name: " + profile_name.text) driver_obj.get("https://www.facebook.com/groups/214914325234824/") profile_name_list = driver_obj.find_elements_by_class_name("profileLink") wait = WebDriverWait(driver_obj, 15) for profile_name in profile_name_list: print("profile_name: " + profile_name.text) print(profile_name.get_attribute("href")) driver_obj.close() print("Web Scrapping Application Completed.")
Summary
In this article, we have successfully learned how to extract the useful information from Hotels.com(https://in.hotels.com) using Web Scraping with Python and Selenium.. Please go through all these steps and provide your feedback and post your queries/doubts if you have. Thank you. Appreciated.Happy Learning !!!
0 Comments