Commit 8dbd5715 authored by Jim Tittsler's avatar Jim Tittsler

Copy partner info to WikiEducator

parents
# don't include the configuration file
options.*
.wikieducator.rc
# compiled python
*.py[co]
# editor debris
*~
.#*
._*
.*.sw?
# OS debris
.DS_Store
# ignore temp/state files
.netrwhist
#
The MIT License (MIT)
Copyright (c) 2014 Open Education Resource Foundation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
# oerupartners
This is a simple Python script which uses
[lxml] to scrape the [OERu.org]
partner pages to keep the descriptions and
logos up to date on [WikiEducator].
## License
MIT
[lxml]: http://lxml.de/
[OERu.org]: https://OERu.org/
[WikiEducator]: https://WikiEducator.org/
import os
import re
import time
import math
import ConfigParser
from lxml import html, etree
import requests
import hashlib
try:
from cStringIO import StringIO
except:
from StringIO import StringIO
from PIL import Image
import logging
import mwclient
OERu = 'http://OERu.org'
OERu_partner_page = OERu + '/oeru-partners/'
config = ConfigParser.ConfigParser()
config.read('.wikieducator.rc')
we_user = config.get('login', 'user')
we_pass = config.get('login', 'password')
we = mwclient.Site('wikieducator.org', path='/')
we.login(username = we_user, password = we_pass)
logging.basicConfig(level=logging.DEBUG, filename='/tmp/oerupartners.log')
def get_partners(u):
page = requests.get(u)
tree = html.fromstring(page.text)
partner_nodes = tree.xpath('//*[@id="isotope-partners"]/div[not(contains(@class,"partners-donor"))]/div/a')
partners = []
for node in partner_nodes:
img = node.xpath('img')[0]
partners.append({
"name": img.xpath('string(@alt)'),
"url": OERu + node.xpath('string(@href)'),
"img": OERu + img.xpath('string(@src)')
})
return partners
def get_image_files(we, partners):
for partner in partners:
# fetch the image
print partner['img']
img = requests.get(partner['img'])
sha1 = hashlib.sha1(img.content).hexdigest()
# see if we already have this image
print ' ', sha1
images = we.allimages(sha1=sha1, generator=True)
count = 0
for i in images:
partner['file'] = i.page_title
print " Exists:", i.page_title.encode('utf-8')
count += 1
if count == 0:
# upload the image
pname = re.sub(r'[^- _a-z0-9]', '', partner['name'], flags=re.IGNORECASE)
filename = pname + partner['img'][-4:]
print "upload:", filename
try:
we.upload(file=StringIO(img.content), filename=filename,
description='OERu partner %s logo' % (partner['name']))
except mwclient.errors.FileExists:
print "FILE %s EXISTS!" % (filename)
partner['file'] = filename
page = we.Pages['File:' + filename]
description = """
== Summary ==
{{Information
|Description=%s logo. Institutional logo used under fair dealing for OERu membership recognition on WikiEducator.
|Source=http://OERu.org/
|Author=%s
|Date=%s
|Permission=
|other_versions=
}}
== Licensing ==
{{Fair-use}}""" % (partner['name'], partner['name'], time.strftime('%Y-%m-%d'))
page.save(text = description, summary=u'update description/license information',
minor=True)
im = Image.open(StringIO(img.content))
(partner['width'], partner['height']) = im.size
del im
return partners
def make_partner_pages(we, partners):
for partner in partners:
print partner['name']
# fetch the page
partner_page = requests.get(partner['url'])
tree = html.fromstring(partner_page.text)
printname = tree.xpath('//h3/text()')
printname = printname[0] if len(printname) > 0 else ''
tagline = tree.xpath('//h5/text()')
tagline = tagline[0] if len(tagline) > 0 else ''
body = tree.xpath('//*[@id="main"]/div/div[2]/div')
body = etree.tostring(body[0])
#print '====================================='
#print
#print partner['name']
#print ' ', printname
#print ' ', tagline
#print body
# very quick and very dirty HTML to wikitext
body = body.replace('</a>', ']')
body = re.sub(r'<a.*?href="([^"]*)"[^>]*>', r"[\1 ", body)
body = re.sub(r'<((div[^>]*)|(p[^>]*)|(br/?))>', "\n", body)
body = re.sub(r'</?em>', "''", body)
body = re.sub(r'</[^>]*>', "", body)
body = re.sub('<((p[^>]*)|(br/?))>', "\n", body)
body = body.replace('&#8211;', "-")
body = body.replace('&#8217;', "'")
body = re.sub(r'&#822[01];', '"', body)
body = body.replace('&#160;', ' ')
body = re.sub(r'<((span)|(ul)|(ol))[^>]*>', '', body)
body = body.replace('<li>', '*')
#print '-------------------------------------'
#print body
pagebody = '''
<div style="width:640px; float:left;">
{{OERu_Partner_Header|name=%s}}
<onlyinclude>{{OERu_Partner_Summary|text=%s}}</onlyinclude>
{{OERu_Partner_Body|text=%s}}
</div>
{{OERu_Partner_Logo|img=File:%s|alt=%s|link=%s}}
[[Category:OERu Partner]]
''' % (printname, tagline, body, partner['file'], printname, partner['url'])
page = we.Pages['OERu/Home/Partners/%s' % (partner['name'])]
page.save(text=pagebody, summary=u'copy partner page from OERu.org')
def featured_partner(we, partners):
numparts = len(partners)
partners = sorted(partners, key=lambda k: k['name'])
body = []
body.append('{{Lozenge|style=background:#C3A8C9;|text=Featured OERu Partner}}')
body.append('')
body.append('')
body.append('{{#switch:{{#expr: floor(({{#time:U}} mod 345600)/%d)}}' % (
math.ceil(345600/numparts)))
i = 0
for partner in partners:
body.append(' | %d = [[File:%s|left|%dpx|link=%s]] {{OERu Partner Feature|%s|%s}}' %
(i, partner['file'], min(200, partner['width']),
partner['url'], partner['name'], partner['url']))
i += 1
body.append('}}')
page = we.Pages['Template:OERu Featured Partner']
page.save(text="\n".join(body), summary=u'update featured partner display')
# get list of partner hashes
# add the image filename to each hash (uploading new images)
# update/make a partner page for each
# make a featured partner template
partners = get_partners(OERu_partner_page)
partners = get_image_files(we, partners)
for partner in partners:
print partner['name']
print ' ',partner['url']
print ' ',partner['file']
print ' %s (%d x %d)' % (partner['img'], partner['width'], partner['height'])
print len(partners)
make_partner_pages(we, partners)
featured_partner(we, partners)
[login]
user = TheBot
password = wikieducatorbotpassword
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment