一位伙计自己开了个游戏室,想在群里拉点人,就用所学知识帮帮忙,于是就有了这篇文章,今天小编特此通过实例代码给大家介绍下Python selenium 加载并保存QQ群成员去除其群主、管理员信息的示例代码
模拟登陆页面
页面分析
思路:
点击登陆按钮
选择要登陆的账号
代码实现
# Author:smart_num_1
# Blog:https://blog.csdn.net/smart_num_1
# WeChat:Be_a_lucky_dog
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
def login(driver = None):
already_dic = {}
# 创建一个字典,保存电脑登陆的QQ
login_button = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//p[@class="user-info"]/a')))
login_button.click()
# 点击登录,获取电脑登陆的QQ
already_login_number = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//div[@id="loginWin"]/iframe')))
driver.get(url = already_login_number.get_attribute('src'))
# 此步骤目的,是因为登录框是一个子页面,在上一级页面中获得到的这个子页面
already_login_numbers = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//span[contains(@class,"nick")]')))
# 获取电脑登陆的QQ
print('在以下账号中选择所需账号')
for already_login_number in already_login_numbers:
already_dic[already_login_number.get_attribute('innerText')] = already_login_number
print(already_login_number.get_attribute('innerText'))
QQ_NeedToLogin = input('需要登陆: ')
# 通过获取键名,在 already_dic 获得相应的键值
already_dic[QQ_NeedToLogin].click()
# 模拟点击要登陆的QQ,达到登陆的效果
time.sleep(1)
if __name__ == '__main__':
start_url = 'https://qun.qq.com/index.html#click'
# 群首页,用来登陆账号
driver = webdriver.Chrome(executable_path = './chromedriver.exe')
# 因为selenium 需要用到浏览器、浏览器驱动,但是还要配置环境变量,很麻烦,如果这样指定 webdriver 路径的话,就可以省去那一步
driver.get(url=start_url)
login(driver=driver)
选择所需加载群
页面分析
打开群管理界面,会看到这样的信息,我们的目的是爬取已加入群的成员信息
代码实现
# Author:smart_num_1
# Blog:https://blog.csdn.net/smart_num_1
# WeChat:Be_a_lucky_dog
def get_group_number(driver = None):
group_number_dic = {}
# 同样的,利用字典储存信息
my_group_list = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//ul[@class="my-group-list"]/li')))
# 获取每个已加入群的节点信息
print('在以下群中选择:')
i = 1
for my_group in my_group_list:
try:
group_number_dic[str(i)] = my_group
print('第 %s 个--- '%str(i) + my_group.get_attribute('title') + ' ' + my_group.get_attribute('data-id'))
i += 1
except:
continue
# 打印出获得的群信息,获取所有的目标群
group = input('获取群编号 : ')
# 通过键名获取键值,得到要点击的目标
group_number_dic[group].click()
return driver
if __name__ == '__main__':
member_url_test = 'https://qun.qq.com/member.html'
driver.get(url = member_url_test)
driver = get_group_number(driver=driver)
保存所需信息
页面分析
可以看到,是个动态加载的页面,因为用的是selenium,所以就没必要分析到底是通过请求那个url得到的信息,直接模拟滚动获取就可以了
代码实现
# Author:smart_num_1
# Blog:https://blog.csdn.net/smart_num_1
# WeChat:Be_a_lucky_dog
def get_group_member(driver = None):
driver.refresh()
# 刷新一下界面,防止上一步点击过后,页面不更新的情况
elem_end = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//td[@class="td-user-nick"]/img')))
# 添加了等待,这个定位可以随便的选择,确保页面加载完毕的
for i in range(10):
time.sleep(0.5)
driver.execute_script("var action=document.documentElement.scrollTop=10000")
print('加载中······')
# 这个滚动范围可以任选,因为每次会加载21个信息,我看过我加的群,在10次过后的成员基本属于潜水的人了,要不要的就无所谓了
group_members = driver.find_elements_by_xpath('//tr[contains(@class,"mb")]')
for group_member in group_members:
try:
data = group_member.text.split('\n')[2].split(' ')[0]
# 这一步,得到一个列表,从第一位开始分别是成员、群昵称、QQ号、性别、Q龄、入群时间、等级(积分)、最后发言,在这里我是只需要QQ号码
#对于其他信息,根据自己需要,添加代码即可
if data.isdigit() == True:
with open('./record.txt','a',encoding = 'utf-8') as record:
record.write(data + '@qq.com')
record.write('\n')
except:
continue
print('Loaded')
完整代码
# Author:smart_num_1
# Blog:https://blog.csdn.net/smart_num_1
# WeChat:Be_a_lucky_dog
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
import time
import random
import os
def get_group_member(driver = None):
driver.refresh()
elem_end = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//td[@class="td-user-nick"]/img')))
for i in range(10):
time.sleep(0.5)
driver.execute_script("var action=document.documentElement.scrollTop=10000")
print('加载中······')
group_members = driver.find_elements_by_xpath('//tr[contains(@class,"mb")]')
for group_member in group_members:
try:
data = group_member.text.split('\n')[2].split(' ')[0]
if data.isdigit() == True:
with open('./record.txt','a',encoding = 'utf-8') as record:
record.write(data + '@qq.com')
record.write('\n')
except:
continue
print('Loaded')
def get_group_number(driver = None):
group_number_dic = {}
my_group_list = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//ul[@class="my-group-list"]/li')))
print('在以下群中选择:')
i = 1
for my_group in my_group_list:
try:
group_number_dic[str(i)] = my_group
print('第 %s 个--- '%str(i) + my_group.get_attribute('title') + ' ' + my_group.get_attribute('data-id'))
i += 1
except:
continue
group = input('获取群编号 : ')
group_number_dic[group].click()
return driver
def login(driver = None):
already_dic = {}
login_button = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//p[@class="user-info"]/a')))
login_button.click()
already_login_number = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_element_located((By.XPATH,'//div[@id="loginWin"]/iframe')))
driver.get(url = already_login_number.get_attribute('src'))
already_login_numbers = WebDriverWait(driver = driver,timeout = 100).until(EC.presence_of_all_elements_located((By.XPATH,'//span[contains(@class,"nick")]')))
print('在以下账号中选择所需账号')
for already_login_number in already_login_numbers:
already_dic[already_login_number.get_attribute('innerText')] = already_login_number
print(already_login_number.get_attribute('innerText'))
QQ_NeedToLogin = input('需要登陆: ')
already_dic[QQ_NeedToLogin].click()
time.sleep(1)
def start(driver = None,url = None):
print('Please wait for loading\n')
driver.get(url = url)
driver = get_group_number(driver=driver)
print('Please wait for loading\n')
get_group_member(driver=driver)
if __name__ == '__main__':
print('Please wait for loading')
chrome_options=Options()
chrome_options.add_argument('--headless')
try:
random.seed(time.time())
QQ_number = '738334209'
start_url = 'https://qun.qq.com/index.html#click'
member_url = 'https://qun.qq.com/member.html#gid=%s'%QQ_number
member_url_test = 'https://qun.qq.com/member.html'
driver = webdriver.Chrome(executable_path = './chromedriver.exe',chrome_options=chrome_options)
try:
driver.get(url=start_url)
login(driver=driver)
while True:
start(driver = driver,url = member_url_test)
flag = input('是否继续爬取? yes or no : ')
if flag == 'no':
break
os.system('cls')
driver.quit()
except:
print('Something wrong')
driver.quit()
except:
print('Something wrong!!!!!!')
os.system('pause')
转载请标明出处:https://blog.csdn.net/smart_num_1/article/details/106326488
总结
《魔兽世界》大逃杀!60人新游玩模式《强袭风暴》3月21日上线
暴雪近日发布了《魔兽世界》10.2.6 更新内容,新游玩模式《强袭风暴》即将于3月21 日在亚服上线,届时玩家将前往阿拉希高地展开一场 60 人大逃杀对战。
艾泽拉斯的冒险者已经征服了艾泽拉斯的大地及遥远的彼岸。他们在对抗世界上最致命的敌人时展现出过人的手腕,并且成功阻止终结宇宙等级的威胁。当他们在为即将于《魔兽世界》资料片《地心之战》中来袭的萨拉塔斯势力做战斗准备时,他们还需要在熟悉的阿拉希高地面对一个全新的敌人──那就是彼此。在《巨龙崛起》10.2.6 更新的《强袭风暴》中,玩家将会进入一个全新的海盗主题大逃杀式限时活动,其中包含极高的风险和史诗级的奖励。
《强袭风暴》不是普通的战场,作为一个独立于主游戏之外的活动,玩家可以用大逃杀的风格来体验《魔兽世界》,不分职业、不分装备(除了你在赛局中捡到的),光是技巧和战略的强弱之分就能决定出谁才是能坚持到最后的赢家。本次活动将会开放单人和双人模式,玩家在加入海盗主题的预赛大厅区域前,可以从强袭风暴角色画面新增好友。游玩游戏将可以累计名望轨迹,《巨龙崛起》和《魔兽世界:巫妖王之怒 经典版》的玩家都可以获得奖励。
更新日志
- 小骆驼-《草原狼2(蓝光CD)》[原抓WAV+CUE]
- 群星《欢迎来到我身边 电影原声专辑》[320K/MP3][105.02MB]
- 群星《欢迎来到我身边 电影原声专辑》[FLAC/分轨][480.9MB]
- 雷婷《梦里蓝天HQⅡ》 2023头版限量编号低速原抓[WAV+CUE][463M]
- 群星《2024好听新歌42》AI调整音效【WAV分轨】
- 王思雨-《思念陪着鸿雁飞》WAV
- 王思雨《喜马拉雅HQ》头版限量编号[WAV+CUE]
- 李健《无时无刻》[WAV+CUE][590M]
- 陈奕迅《酝酿》[WAV分轨][502M]
- 卓依婷《化蝶》2CD[WAV+CUE][1.1G]
- 群星《吉他王(黑胶CD)》[WAV+CUE]
- 齐秦《穿乐(穿越)》[WAV+CUE]
- 发烧珍品《数位CD音响测试-动向效果(九)》【WAV+CUE】
- 邝美云《邝美云精装歌集》[DSF][1.6G]
- 吕方《爱一回伤一回》[WAV+CUE][454M]


