引言随着互联网的快速发展,越来越多的数据以网页形式呈现。Python作为一种功能强大的编程语言,在数据抓取方面有着广泛的应用。本教程将带你通过Python爬取咸鱼数据,让你轻松掌握数据抓取技巧。第一章...
随着互联网的快速发展,越来越多的数据以网页形式呈现。Python作为一种功能强大的编程语言,在数据抓取方面有着广泛的应用。本教程将带你通过Python爬取咸鱼数据,让你轻松掌握数据抓取技巧。
import requests
from bs4 import BeautifulSoup
def get_goods_info(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') title = soup.find('div', class_='title').text price = soup.find('div', class_='price').text img = soup.find('img', class_='image').get('src') return title, price, img
# 示例:爬取咸鱼商品信息
url = 'https://www.xianyu.com/item/xxxxxx'
print(get_goods_info(url))def get_comments_info(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') comments = soup.find_all('div', class_='comment-content') comments_info = [] for comment in comments: content = comment.find('p').text user = comment.find('a', class_='user-name').text score = comment.find('span', class_='score').text comments_info.append({'content': content, 'user': user, 'score': score}) return comments_info
# 示例:爬取咸鱼商品评论
url = 'https://www.xianyu.com/item/xxxxxx'
print(get_comments_info(url))import csv
def save_to_csv(data, filename): with open(filename, 'w', newline='', encoding='utf-8-sig') as f: writer = csv.writer(f) writer.writerow(['标题', '价格', '图片']) for item in data: writer.writerow([item['title'], item['price'], item['image']])