import datetime
import time
import re
import requests
import pywikibot
# 获取mostread数据
def Get_mostread_data(t):
# 构造URL,使用给定的日期`t`作为参数
url = 'https://zh.wikipedia.org/api/rest_v1/feed/featured/' + t
# url = 'https://zh.wikipedia.org/api/rest_v1/feed/featured/2023/02/18'
# 发送GET请求到指定的URL,并获取响应数据
response = requests.get(url)
# 将响应数据解析为JSON格式
data = response.json()
# 检查数据中是否包含键值为'mostread'的数据
if 'mostread' in data:
return data['mostread']
else:
return None
# 获取当前时间减去一天的日期
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
# 调用`Get_mostread_data()`函数,传入格式化后的昨天日期作为参数
old_mostread = Get_mostread_data(yesterday.strftime("%Y/%m/%d"))
# 获取当前时间
begin_time = datetime.datetime.now()
# url = 'https://zh.wikipedia.org/api/rest_v1/feed/featured/' + pywikibot.Timestamp.utcnow().strftime("%Y/%m/%d")
# url = 'https://zh.wikipedia.org/api/rest_v1/feed/featured/2023/02/18'
# response = requests.get(url)
# data = response.json()
# if 'mostread' in data:
# mostread = data['mostread']
# else:
# mostread = None
# print(mostread)
old_data = {}
if old_mostread:
old_articles = old_mostread['articles']
# 遍历`old_articles`中的所有元素
for i in range(len(old_articles)):
# pageid作为条目的唯一标识
old_data[old_articles[i]['pageid']] = i+1
# print(old_data)
# mostread = Get_mostread_data(pywikibot.Timestamp.utcnow().strftime("%Y/%m/%d"))
# mostread = Get_mostread_data('2023/02/21')
# data = {}
mostread_exist = True
sleep_times = 0
while mostread_exist:
mostread = Get_mostread_data(
pywikibot.Timestamp.utcnow().strftime("%Y/%m/%d"))
if mostread:
site = pywikibot.Site('wikipedia:zh')
# 时间
mostread_date = time.strftime(
"%Y年%-m月%-d日", time.strptime(mostread['date'], "%Y-%m-%dZ")) # 消除前置0,windows下可能无效
# print(mostread_date)
text = '''
{{{{/header|date={}|update=~~~~~}}}}
{{| class="wikitable" style="margin:0 auto;"
|-
|+ <big>{}</big>
! 排名 !! 条目 !! 评级 !! 浏览量 !! 排名变化'''
text = text.format(mostread_date, mostread_date)
# text = text + '\n'
cell = ''
# parameters = {'action':'query','prop':'pageassessments','titles':item['titles']['normalized']}
# import json
# with open('data.json') as f:
# old_data = json.load(f)
# print(old_data)
n = 0 # 排名
for item in mostread['articles']:
if n == 0:
# top1的浏览量
view_top1 = int(item['views'])
top1_title = item['title']
n += 1
pageid = item['pageid']
# print(pageid)
if old_data:
if pageid in old_data:
old_rank = old_data[pageid]
# 比较当前排名与前一天排名,确定排名变化情况并生成相应的HTML代码
if n == old_rank:
rank_change = '<span style="color:grey">-</span>'
elif n > old_rank:
rank_change = '<span style="color:red"><b>▼</b></span>'
elif n < old_rank:
rank_change = '<span style="color:green"><b>▲</b></span>'
else:
# 如果不存在,则生成相应的HTML代码,表示新进入榜单
rank_change = '<span style="color:blue"><b>+</b></span>'
else:
# 如果前一天数据缺失,则排名变化一栏为空
rank_change = ''
# 创建一个Request对象,用于查询指定页面的评级信息
pa_query = pywikibot.data.api.Request(site=site,
parameters={'action': 'query',
'prop': 'pageassessments',
'titles': item['titles']['normalized']}
).submit()
try:
# 获取查询结果中的评级信息
pa = list(pa_query['query']['pages'].values())[
0]['pageassessments']
# print(pa)
for p in list(pa.values()):
# 获取当前元素的'class'值,并使用其生成相应的模板代码
p_clss = '{{{{Class/icon|{}}}}}'.format(p['class'])
# print(p_clss)
except KeyError:
# pageassessments无数据为空
p_clss = ''
# print(pa)
# 计算当前元素浏览量占最多浏览量的百分比
bar = (int(item['views'])/view_top1)*100
# data[pageid] =
# 定义一个包含单元格数据的字符串模板
cell_text = """
|-
| {} || [[{}]]{{{{/bar|{}}}}}|| {} || {} ||style="text-align:center"| {}"""
# 使用格式化后的数据填充字符串模板
cell += cell_text.format(n, item['titles']['normalized'],
bar, p_clss, item['views'], rank_change)
# print(item['views'])
# print(n,item['views'],item['titles']['normalized'], p_clss)
# print(item['articles']['title'])
# page = pywikibot.Page(site, item['title'])
# print(page.getlatestrevisionid())
# print(data)
# with open('data.json', 'w') as f:
# json.dump(data, f)
text = text + cell + '\n|}\n{{/footer}}'
# print (text)
page = pywikibot.Page(site, "Wikipedia:动态热门")
page.text = text
comment = '更新数据:' + mostread_date + ';第一:[[' + top1_title + ']]'
page.save(comment)
print(comment)
print('Update time:', time.strftime("%Y-%m-%d %H:%M:%S"))
mostread_exist = False
else:
t = 1800
# 如果是第一次等待
if sleep_times == 0:
print('mostread does not exist,wait...', end='')
else:
print(' ......', end="", flush=True)
sleep_times += 1
# print('mostread does not exist,wait {} s...'.format(t))
time.sleep(t)
# 计算等待时间
delta = datetime.datetime.now() - begin_time
# 如果等待时间小于1天
if delta.days == 0:
mostread_exist = True
# 如果等待时间大于或等于1天
else:
print('more than 1 day, STOP!!!')
print('Stop time:', time.strftime("%Y-%m-%d %H:%M:%S"))
# 停止循环
mostread_exist = False
# 防止异常,等待5分钟后再更新相应的条目对话页
time.sleep(60)
# 从[[Wikipedia:动态热门]]最新版本的摘要信息中获取日期、版本号和第一名标题的数据
def Get_mostreadpage_rev(site):
page = pywikibot.Page(site, 'Wikipedia:动态热门')
rev = page.revisions(reverse=False, total=1) # 最新版本
data = {}
# print(rev)
# 遍历版本信息
for r in rev:
# print(r)
# print(r.comment)
res = re.search(r'更新数据:(.*);第一:\[\[(.*)\]\]', r.comment)
try:
t = time.strftime("%Y%m%d", time.strptime(
res.group(1), "%Y年%m月%d日"))
# print(t,r.revid)
# print(r.timestamp.isoformat(),r.comment)
# 将更新日期、版本ID和第一名存入数据字典
data[t] = (r.revid, res.group(2))
except AttributeError:
pass
# print(data)
# 返回格式:{'20230311': (76318999, '2023年世界棒球經典賽')}
return data
# 获取第一名条目对话页章节0处的文本
def Get_section0(talk_title):
import requests
url = 'https://zh.wikipedia.org/w/api.php'
# https://zh.wikipedia.org/w/api.php?action=query&prop=revisions&titles=User%20talk:Shizhao/mostread&rvslots=*&rvprop=content&rvsection=0
url_params = { # 设置API参数
"action": "query",
"prop": "revisions",
"titles": talk_title,
"rvslots": "*",
"rvprop": "content",
"rvsection": 0,
"format": "json"
}
response = requests.get(url, params=url_params)
data = response.json()
# 获取第一节文本内容
section0_text = list(data['query']['pages'].values())[
0]['revisions'][0]['slots']['main']['*']
return section0_text
# 主程序
for date, v in Get_mostreadpage_rev(site).items():
revid = v[0]
title = v[1]
# print(date,v[0],v[1])
# 条目对话页
talk_page = pywikibot.Page(site, title).toggleTalkPage()
talk_title = talk_page.title()
# print(talk_page)
if talk_page.exists():
# print('p',talk_page.templatesWithParams())
for item in talk_page.templatesWithParams():
# print('item',item)
# 判断{{Mostread}}是否在对话页上存在
if pywikibot.Page(site, 'Template:Mostread') in item:
# print(item[1])
# 当前{{Mostread}}上的参数值
most_data = item[1]
# n = 0
for index in range(len(most_data)):
# param:'20220304:333333'
# 是否已经存在同样的日期
if most_data[index].find(date, 0, 8) == -1: # 日期不存在
date_exist = False
# most_data.append(date + ':' + str(revid))
# break
else:
date_exist = True
break
# n += 1
# 存在日期的话,判断版本号是否一致,不一致则更新
# print('index',index)
# print('date_exist',date_exist)
if date_exist:
if most_data[index].find(str(revid), 8) == -1:
most_data[index] = date + ':' + str(revid)
# print('date_exist',most_data)
else:
# 日期不存在则加上
most_data.append(date + ':' + str(revid))
# print('no date_exist',most_data)
break
else:
most_data = [date + ':' + str(revid)]
# print(most_data)
# 构造最终的模板文本
mostread_template = '{{Mostread|%s}}' % '|'.join(most_data)
# print(mostread_template)
# talkpage_text = talk_page.text
# 获取条目对话页章节0的文本
talkpage_text = Get_section0(talk_title)
# 匹配文本中的模板及参数
most_pattern = pywikibot.textlib.MultiTemplateMatchBuilder(
site).pattern('mostread')
searchObj = re.search(most_pattern, talkpage_text)
# 存在{{mostread}}模板则替换
if searchObj:
talkpage_text = re.sub(
most_pattern, mostread_template, talkpage_text)
else:
# 不存在模板则加在章节0文本的最后
talkpage_text = talkpage_text + '\n' + mostread_template
else:
# 对话页面不存在的话,构造的文本
talkpage_text = '{{Mostread|%s:%s}}' % (date, str(revid))
# print(talkpage_text)
# print('talkpage_text: ', talkpage_text)
talk_page.text = talkpage_text
# 更新文本到对话页的章节0处
# text must be used with section
talk_page.save("BOT更新:%s条目浏览量TOP 1" % date, text=talkpage_text, section=0)
pywikibot.stopme()