跳至內容

使用者:Non-robot/mostread.py

維基百科,自由的百科全書
import datetime
import time
import re
import requests
import pywikibot

# 获取mostread数据


def Get_mostread_data(t):
    # 构造URL,使用给定的日期`t`作为参数
    url = 'https://zh.wikipedia.org/api/rest_v1/feed/featured/' + t
    # url = 'https://zh.wikipedia.org/api/rest_v1/feed/featured/2023/02/18'
    # 发送GET请求到指定的URL,并获取响应数据
    response = requests.get(url)
    # 将响应数据解析为JSON格式
    data = response.json()
    # 检查数据中是否包含键值为'mostread'的数据
    if 'mostread' in data:
        return data['mostread']
    else:
        return None


# 获取当前时间减去一天的日期
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
# 调用`Get_mostread_data()`函数,传入格式化后的昨天日期作为参数
old_mostread = Get_mostread_data(yesterday.strftime("%Y/%m/%d"))
# 获取当前时间
begin_time = datetime.datetime.now()
# url = 'https://zh.wikipedia.org/api/rest_v1/feed/featured/' + pywikibot.Timestamp.utcnow().strftime("%Y/%m/%d")
# url = 'https://zh.wikipedia.org/api/rest_v1/feed/featured/2023/02/18'
# response = requests.get(url)
# data = response.json()
# if 'mostread' in data:
#    mostread = data['mostread']
# else:
#    mostread = None
# print(mostread)

old_data = {}
if old_mostread:
    old_articles = old_mostread['articles']
    # 遍历`old_articles`中的所有元素
    for i in range(len(old_articles)):
        # pageid作为条目的唯一标识
        old_data[old_articles[i]['pageid']] = i+1
# print(old_data)

# mostread = Get_mostread_data(pywikibot.Timestamp.utcnow().strftime("%Y/%m/%d"))
# mostread = Get_mostread_data('2023/02/21')
# data = {}

mostread_exist = True
sleep_times = 0
while mostread_exist:
    mostread = Get_mostread_data(
        pywikibot.Timestamp.utcnow().strftime("%Y/%m/%d"))
    if mostread:
        site = pywikibot.Site('wikipedia:zh')
        # 时间

        mostread_date = time.strftime(
            "%Y年%-m月%-d日", time.strptime(mostread['date'], "%Y-%m-%dZ"))  # 消除前置0,windows下可能无效
        # print(mostread_date)

        text = '''
{{{{/header|date={}|update=~~~~~}}}}
{{| class="wikitable" style="margin:0 auto;"
|-
|+ <big>{}</big>
! 排名 !! 条目 !! 评级 !! 浏览量 !! 排名变化'''
        text = text.format(mostread_date, mostread_date)

        # text = text + '\n'
        cell = ''
        # parameters = {'action':'query','prop':'pageassessments','titles':item['titles']['normalized']}
        # import json
        # with open('data.json') as f:
        #    old_data = json.load(f)
        # print(old_data)
        n = 0  # 排名

        for item in mostread['articles']:
            if n == 0:
                # top1的浏览量
                view_top1 = int(item['views'])
                top1_title = item['title']
            n += 1
            pageid = item['pageid']
            # print(pageid)
            if old_data:
                if pageid in old_data:
                    old_rank = old_data[pageid]
                    # 比较当前排名与前一天排名,确定排名变化情况并生成相应的HTML代码
                    if n == old_rank:
                        rank_change = '<span style="color:grey">-</span>'
                    elif n > old_rank:
                        rank_change = '<span style="color:red"><b>▼</b></span>'
                    elif n < old_rank:
                        rank_change = '<span style="color:green"><b>▲</b></span>'
                else:
                    # 如果不存在,则生成相应的HTML代码,表示新进入榜单
                    rank_change = '<span style="color:blue"><b>+</b></span>'
            else:
                # 如果前一天数据缺失,则排名变化一栏为空
                rank_change = ''

            # 创建一个Request对象,用于查询指定页面的评级信息
            pa_query = pywikibot.data.api.Request(site=site,
                                                  parameters={'action': 'query',
                                                              'prop': 'pageassessments',
                                                              'titles': item['titles']['normalized']}
                                                  ).submit()
            try:
                # 获取查询结果中的评级信息
                pa = list(pa_query['query']['pages'].values())[
                    0]['pageassessments']
                # print(pa)
                for p in list(pa.values()):
                    # 获取当前元素的'class'值,并使用其生成相应的模板代码
                    p_clss = '{{{{Class/icon|{}}}}}'.format(p['class'])
                # print(p_clss)
            except KeyError:
                # pageassessments无数据为空
                p_clss = ''
                # print(pa)
            # 计算当前元素浏览量占最多浏览量的百分比
            bar = (int(item['views'])/view_top1)*100
            # data[pageid] =
            # 定义一个包含单元格数据的字符串模板
            cell_text = """
|-
| {} || [[{}]]{{{{/bar|{}}}}}|| {} || {} ||style="text-align:center"| {}"""
            # 使用格式化后的数据填充字符串模板
            cell += cell_text.format(n, item['titles']['normalized'],
                                     bar, p_clss, item['views'], rank_change)
            # print(item['views'])
            # print(n,item['views'],item['titles']['normalized'], p_clss)
            # print(item['articles']['title'])
            # page = pywikibot.Page(site, item['title'])
            # print(page.getlatestrevisionid())
        # print(data)
        # with open('data.json', 'w') as f:
        #    json.dump(data, f)
        text = text + cell + '\n|}\n{{/footer}}'
        # print (text)
        page = pywikibot.Page(site, "Wikipedia:动态热门")
        page.text = text
        comment = '更新数据:' + mostread_date + ';第一:[[' + top1_title + ']]'
        page.save(comment)

        print(comment)
        print('Update time:', time.strftime("%Y-%m-%d %H:%M:%S"))
        mostread_exist = False
    else:
        t = 1800
        # 如果是第一次等待
        if sleep_times == 0:
            print('mostread does not exist,wait...', end='')
        else:
            print(' ......', end="", flush=True)
        sleep_times += 1
        # print('mostread does not exist,wait {} s...'.format(t))
        time.sleep(t)
        # 计算等待时间
        delta = datetime.datetime.now() - begin_time
        # 如果等待时间小于1天
        if delta.days == 0:
            mostread_exist = True
        # 如果等待时间大于或等于1天
        else:
            print('more than 1 day, STOP!!!')
            print('Stop time:', time.strftime("%Y-%m-%d %H:%M:%S"))
            # 停止循环
            mostread_exist = False

# 防止异常,等待5分钟后再更新相应的条目对话页
time.sleep(60)

# 从[[Wikipedia:动态热门]]最新版本的摘要信息中获取日期、版本号和第一名标题的数据


def Get_mostreadpage_rev(site):
    page = pywikibot.Page(site, 'Wikipedia:动态热门')
    rev = page.revisions(reverse=False, total=1)  # 最新版本
    data = {}
    # print(rev)
    # 遍历版本信息
    for r in rev:
        # print(r)
        # print(r.comment)
        res = re.search(r'更新数据:(.*);第一:\[\[(.*)\]\]', r.comment)

        try:
            t = time.strftime("%Y%m%d", time.strptime(
                res.group(1), "%Y年%m月%d日"))
            # print(t,r.revid)
            # print(r.timestamp.isoformat(),r.comment)
            # 将更新日期、版本ID和第一名存入数据字典
            data[t] = (r.revid, res.group(2))
        except AttributeError:
            pass
    # print(data)
    # 返回格式:{'20230311': (76318999, '2023年世界棒球經典賽')}
    return data

# 获取第一名条目对话页章节0处的文本


def Get_section0(talk_title):
    import requests
    url = 'https://zh.wikipedia.org/w/api.php'
    # https://zh.wikipedia.org/w/api.php?action=query&prop=revisions&titles=User%20talk:Shizhao/mostread&rvslots=*&rvprop=content&rvsection=0
    url_params = {  # 设置API参数
        "action": "query",
        "prop": "revisions",
        "titles": talk_title,
        "rvslots": "*",
        "rvprop": "content",
        "rvsection": 0,
        "format": "json"
    }
    response = requests.get(url, params=url_params)
    data = response.json()
    # 获取第一节文本内容
    section0_text = list(data['query']['pages'].values())[
        0]['revisions'][0]['slots']['main']['*']
    return section0_text


# 主程序
for date, v in Get_mostreadpage_rev(site).items():
    revid = v[0]
    title = v[1]
    # print(date,v[0],v[1])

    # 条目对话页
    talk_page = pywikibot.Page(site, title).toggleTalkPage()
    talk_title = talk_page.title()
    # print(talk_page)
    if talk_page.exists():
        # print('p',talk_page.templatesWithParams())
        for item in talk_page.templatesWithParams():
            # print('item',item)
            # 判断{{Mostread}}是否在对话页上存在
            if pywikibot.Page(site, 'Template:Mostread') in item:
                # print(item[1])
                # 当前{{Mostread}}上的参数值
                most_data = item[1]
                # n = 0
                for index in range(len(most_data)):
                    # param:'20220304:333333'
                    # 是否已经存在同样的日期
                    if most_data[index].find(date, 0, 8) == -1:  # 日期不存在
                        date_exist = False
                        # most_data.append(date + ':' + str(revid))
                        # break
                    else:
                        date_exist = True
                        break
                    # n += 1
                # 存在日期的话,判断版本号是否一致,不一致则更新
                # print('index',index)
                # print('date_exist',date_exist)
                if date_exist:
                    if most_data[index].find(str(revid), 8) == -1:
                        most_data[index] = date + ':' + str(revid)
                        # print('date_exist',most_data)
                else:
                    # 日期不存在则加上
                    most_data.append(date + ':' + str(revid))
                    # print('no date_exist',most_data)
                break
            else:
                most_data = [date + ':' + str(revid)]

        # print(most_data)
        # 构造最终的模板文本
        mostread_template = '{{Mostread|%s}}' % '|'.join(most_data)
        # print(mostread_template)
        # talkpage_text = talk_page.text
        # 获取条目对话页章节0的文本
        talkpage_text = Get_section0(talk_title)
        # 匹配文本中的模板及参数
        most_pattern = pywikibot.textlib.MultiTemplateMatchBuilder(
            site).pattern('mostread')
        searchObj = re.search(most_pattern, talkpage_text)
        # 存在{{mostread}}模板则替换
        if searchObj:
            talkpage_text = re.sub(
                most_pattern, mostread_template, talkpage_text)
        else:
            # 不存在模板则加在章节0文本的最后
            talkpage_text = talkpage_text + '\n' + mostread_template
    else:
        # 对话页面不存在的话,构造的文本
        talkpage_text = '{{Mostread|%s:%s}}' % (date, str(revid))
        # print(talkpage_text)
    # print('talkpage_text: ', talkpage_text)
    talk_page.text = talkpage_text
    # 更新文本到对话页的章节0处
    # text must be used with section
    talk_page.save("BOT更新:%s条目浏览量TOP 1" % date, text=talkpage_text, section=0)

pywikibot.stopme()