-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge_categories.py
71 lines (54 loc) · 2.11 KB
/
merge_categories.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# coding=utf-8
import pywikibot
import re
from pywikibot import pagegenerators
from time import sleep
CATEGORY_EN = 'Flags by year of introduction'
CATEGORY_RU = 'Флаги по годам'
PATTERN_EN = 'Category:Flags introduced in %s'
PATTERN_RU = 'Категория:Флаги %s года'
PATTERN_YEAR = '([0-9]+)'
site_en = pywikibot.Site('en', 'wikipedia')
site_ru = pywikibot.Site('ru', 'wikipedia')
repo = pywikibot.Site('wikidata', 'wikidata')
def raw_merge(target_item, redirect_item):
print('MERGE: %s <- %s' % (target_item.getID(), redirect_item.getID()))
redirect_item.mergeInto(target_item, ignore_conflicts='description')
if redirect_item.isRedirectPage():
return
descriptions = redirect_item.get(force=True)['descriptions']
new_descriptions = {}
for code in descriptions:
new_descriptions[code] = ''
redirect_item.editDescriptions(new_descriptions, summary='Clearing item to prepare for redirect')
redirect_item.set_redirect_target(target_item, force=True)
def merge(item1, item2):
if item1.getID() == item2.getID():
return False
if item1.getID() < item2.getID():
raw_merge(item1, item2)
else:
raw_merge(item2, item1)
return True
def iterate_items():
search_pattern_ru = PATTERN_RU % PATTERN_YEAR
cat_ru = pywikibot.Category(site_ru, CATEGORY_RU)
subcats_ru = cat_ru.subcategories()
subcats_generator = pagegenerators.PreloadingGenerator(subcats_ru, 500)
for subcat_ru in subcats_generator:
matches = re.fullmatch(search_pattern_ru, subcat_ru.title())
year = matches[1]
title_en = PATTERN_EN % year
subcat_en = pywikibot.Category(site_en, title_en)
try:
item_en = subcat_en.data_item()
except pywikibot.exceptions.NoPage:
print('NO PAGE: %s' % title_en)
continue
item_ru = subcat_ru.data_item()
if merge(item_en, item_ru):
print('MERGE: %s + %s' % (item_en.getID(), item_ru.getID()))
sleep(5)
else:
print('SKIP: %s = %s' % (item_en.getID(), item_ru.getID()))
iterate_items()