diff --git a/README.couch b/README.couch
deleted file mode 100644
index 777779120fd316657ec86ecde72c30ab7894111f..0000000000000000000000000000000000000000
--- a/README.couch
+++ /dev/null
@@ -1,31 +0,0 @@
-# CouchDB actions
-
-## Access via the Futon interface:
-
-In browser use: https://couch.oerfoundation.org/_utils/
-for a specific page, something like this: https://couch.oerfoundation.org/_utils/document.html?mentions/_design/ids
-
-## Reading a view
-
-To copy a view locally (so you can edit it (pretty printed)):
-
-`curl https://user:pass@couch.oerfoundation.org/mentions/_design/ids | python -m json.tool > ids.json`
-
-## Replacing a downloaded view with an update
-
-`curl -X PUT https://user:pass@couch.oerfoundation.org/mentions/_design/ids -d @ids.json`
-
-## Getting rid of [error] nonode@nohot database_does_not_exist re _users
-
-To fix this, you can create a _users, _replicator, and _global_changes tables...
-
-`curl -X PUT https://user:pass@couch.oerfoundation.org/_user`
-`curl -X PUT https://user:pass@couch.oerfoundation.org/_replicator`
-`curl -X PUT https://user:pass@couch.oerfoundation.org/_global_changes`
-
-(see http://docs.couchdb.org/en/master/install/setup.html, http://guide.couchdb.org/draft/views.html)
-
-More complex queries (using Bash!!):
-
-`curl 'https://bot:vUAQo58A8Fq9wq@couch.dev.oerfoundation.org/blog-feeds-hetzner/_design/ids/_view/by_site_and_wp_id?key=\[179,"66"\]'`
-`curl 'https://bot:vUAQo58A8Fq9wq@couch.dev.oerfoundation.org/blog-feeds-hetzner/_design/ids/_view/by_site_id?key="66"'`
diff --git a/ask.py b/ask.py
deleted file mode 100644
index c60646b28d81d8c4b503ae10ea452f716611de0d..0000000000000000000000000000000000000000
--- a/ask.py
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2012 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-from datetime import datetime
-import couchdb
-import feedparser
-import sys
-import json
-import hashlib
-import HTMLParser
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('./options.json', 'rt'))
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-h = HTMLParser.HTMLParser()
-tags = options['tags']
-
-# serial number of messages retrieved this poll to uniquify ID
-serial = 0
-
-# get the last time for a ask post in the database
-view = db.view('ids/ask', descending=True, limit=1)
-if len(view) == 1:
-    for row in view:
-        lasttime = row.key
-else:
-    lasttime = "2000-01-01T00:00:00.000Z"
-
-def gravatar(e):
-    return 'http://www.gravatar.com/avatar/' + \
-            hashlib.md5(e.strip().lower()).hexdigest() + '?s=48&d=identicon&r=pg'
-
-def process_tag(tag):
-    def comment_or_answer(mo):
-        if mo.group(1) == 'Comment':
-            return 'Re:'
-        return 'Answer:'
-
-    global serial
-    feed = 'http://ask.oeruniversity.org/feeds/atom/?tags=' + tag
-    qfeed = 'http://ask.oeruniversity.org/feeds/questiona/%s/'
-    qpattern = re.compile(r'http://ask.OERuniversity.org/question/(?P<q>\d+)')
-
-    # find all of the questions
-    rss = feedparser.parse(feed)
-    qitems = rss['items']
-    qitems.reverse()
-
-    qs = []
-    for qitem in qitems:
-        mo = qpattern.match(qitem['link'])
-        if mo:
-            qs.append(mo.group('q'))
-
-    # for each of the questions, find the new questions, answers, comments
-    for q in qs:
-        rss = feedparser.parse(qfeed % q)
-        feedtitle = rss['channel']['title']
-
-        items = rss['items']
-
-        for item in items:
-            if item['title'] == 'RSS Error' and item['description'] == 'Error reading RSS data':
-                break
-            truncated = False
-            dt = datetime.strptime(item['date'], '%Y-%m-%dT%H:%M:%S+00:00')
-            we_timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
-            if we_timestamp <= lasttime:
-                continue
-            seconds = time.mktime(dt.timetuple())
-            # strip out HTML markup before abridging, so we don't stop midtag
-            body = item['title'] + ' ' + item['summary']
-            abridged = re.sub(r'<[^>]*>', '', body)
-            abridged = h.unescape(abridged)
-            # remove square brackets (link anchors)
-            abridged = re.sub(r'\[|]', ' ', abridged)
-            abridged = re.sub(r'\s+', ' ', abridged)
-            # remove inline attribution, already have author
-            abridged = re.sub(r'(Comment|Answer) by (.*?) for',
-                    comment_or_answer, abridged, 1)
-            abridged = abridged[:500].strip()
-            abridged = abridged.replace('&nbsp;', ' ')
-            abridged = abridged.replace('\n', ' ')
-            i = len(abridged)
-            if i > 137:
-                i = 137
-                while abridged[i] != ' ' and i > 0:
-                    i -= 1
-                abridged = abridged[:i] + '...'
-                truncated = True
-
-            author = item['author_detail']['name']
-
-            mention = {
-                    'from_user': author,
-                    'from_user_name': author,
-                    'created_at': item['date'],
-                    'profile_image_url':
-                        gravatar(item['author_detail']['email']),
-                    'text': abridged,
-                    'truncated': truncated,
-                    'id': '%d%05d%03d' % (seconds, int(q), serial),
-                    'profile_url': item['author_detail']['href'],
-                    'we_source': 'ask',
-                    'we_feed': '%s: %d' % (feedtitle, int(q)),
-                    'we_tags': [tag],
-                    'we_timestamp': we_timestamp,
-                    'we_link': item['link']
-                    }
-            if tag == 'sp4edu':
-                mention['we_tags'] = ['sp4ed']
-            print mention
-            print '==========='
-            db.save(mention)
-            serial += 1
-
-for tag in tags:
-    process_tag(tag)
diff --git a/blog_feeds.py b/blog_feeds.py
deleted file mode 100644
index 257ec3f8abb847531e2de48931f677bd6c8df996..0000000000000000000000000000000000000000
--- a/blog_feeds.py
+++ /dev/null
@@ -1,261 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2018 Open Education Resource Foundation
-# developed by Dave Lane dave@oerfoundation.org, with help from
-# code written by Jim Tittsler
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-import calendar
-from datetime import datetime
-import cookielib
-import urllib, urllib2
-import couchdb
-import feedparser
-import lxml.html
-import xml.sax
-import json
-# for debugging
-import logging
-import pprint
-# to fix various unicode issues
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('../config/options.json', 'rt'))
-
-# version info
-scanner = 'WEnotes Blog Feed Scanner'
-scanner_version = '0.3'
-
-# logging configuration
-LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-#LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/blog_feeds.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# configure the Couch db for mentions
-couch = couchdb.Server(options['url'])
-dbmentions = couch[options['db']]
-
-# configure the Couch db for finding the feed details
-couch2 = couchdb.Server(options['localcouch'])
-dbfeeds = couch2[options['dbfeeds']]
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# return a tuple (text, truncated) of abridged text and truncated flag
-def abridge(text):
-    truncated = False
-    # strip out HTML comments (and MSOffice conditionals)
-    abridged = re.sub(r'<!--.*?-->', '', text)
-    # strip out HTML markup before abridging,
-    #   so we don't stop midtag
-    abridged = re.sub(r'<[^>]*>', ' ', abridged)
-    abridged = re.sub(r'\s*by [^.]+\.\n?', '', abridged)
-    abridged = abridged[:500].strip()
-    abridged = abridged.replace('&nbsp;', ' ')
-    abridged = abridged.replace('&#8211;', "--")
-    abridged = abridged.replace('&#8216;', "'")
-    abridged = abridged.replace('&#8217;', "'")
-    abridged = abridged.replace('&#8220;', '"')
-    abridged = abridged.replace('&#8221;', '"')
-    abridged = abridged.replace('&#8230;', "...")
-    abridged = abridged.replace('&#38;', "&")
-    abridged = abridged.replace('\n', ' ')
-    # get rid of multiple spaces (which the above may have introduced)
-    abridged = re.sub(r'  +', ' ', abridged)
-    i = len(abridged)
-    if i > 200:
-        i = 200
-        while abridged[i] != ' ' and i > 0:
-            i -= 1
-        abridged = abridged[:i] + '...'
-        truncated = True
-    return (abridged, truncated)
-
-def process_feed(feed):
-    new_docs = []
-    doc = feed.doc
-    logging.debug("== %s", doc['feed_url'])
-    try:
-        feed_last = doc['last_checked']
-    except KeyError:
-        feed_last = None
-    type = doc['feed_type']
-    url = doc['feed_url']
-    if type == 'application/rss+xml':
-        logging.debug('processing RSS feed %s', url)
-        content = feedparser.parse(url)
-    elif type == 'application/atom+xml':
-        logging.debug('processing Atom feed %s', url)
-        content = feedparser.parse(url)
-    else:
-        logging.debug('unsupported feed type %s ', type)
-
-    # print the feed...
-    logging.debug("------ the feed ------ %s", pp.pformat(feed));
-
-    # if there is no timestamp, probably no entries
-    #if not content.feed.has_key('updated_parsed') or content.feed['updated_parsed'] is None:
-    try:
-        if not content.feed.has_key('updated_parsed') or content.feed.has_key('updated_parsed') == None:
-            return None
-    except UnboundLocalError:
-        logging.debug('content object not defined')
-        return None
-
-
-    feed_timestamp = time.strftime('%Y-%m-%dT%H:%M:%S.000Z', content.feed['updated_parsed'])
-
-    # check last_updated of feed table
-    if feed_timestamp <= feed_last:
-        return None     # no need to even bother checking items
-
-    # check last timestamp in the mentions database for this feed
-    items = content['items']
-    items.reverse()
-    for item in items:
-        # FIXME perhaps should query to see if article exists
-        #  to avoid "updates" that change pub time
-        we_timestamp = time.strftime('%Y-%m-%dT%H:%M:%S.000Z', item['updated_parsed'])
-        if we_timestamp > feed_last:
-            truncated = False
-            text = ''
-            if len(item['title'].strip()) > 0:
-                text = item['title'].strip() + ': '
-            text = text + item['summary']
-            (abridged, truncated) = abridge(text)
-            mention = {
-                    'from_user': doc['username'],
-                    'from_user_name': doc['display_name'],
-                    'created_at': item['updated'],
-                    'profile_url': item['link'],
-                    #'profile_image_url': doc['profile_image_url'],
-                    'title': item['title'].strip(),
-                    'text': abridged,
-                    'truncated': truncated,
-                    'id': calendar.timegm(item['updated_parsed']),
-                    'we_source': 'feed',
-                    'we_identifier': 'blog_feed',
-                    'we_scanner': scanner,
-                    'we_scanner_version': scanner_version,
-                    'we_feed': content.feed['title'],
-                    'we_feed_url': doc['feed_url'],
-                    'we_timestamp': we_timestamp,
-                    'we_link': item['link']
-                    }
-
-            # if there is an id, use it instead of our made up one
-            if item.has_key('id'):
-                mention['id'] = item['id']
-
-            # if there is width and/or height, copy them
-            if doc.has_key('profile_image_width'):
-                mention['profile_image_width'] = doc['profile_image_width']
-            if doc.has_key('profile_image_height'):
-                mention['profile_image_height'] = doc['profile_image_height']
-
-            # if there is a gravatar hash, copy it
-            if doc.has_key('gravatar'):
-                mention['gravatar'] = doc['gravatar']
-
-            # if tags is empty, we take everything and apply we_tags
-            if len(doc['tags']) == 0:
-                mention['we_tags'] = doc['tags']
-                new_docs.append(mention)
-            else:
-                mention['we_tags'] = []
-                # only save things tagged with tags
-                # or that mention the tag in the title
-                keep = False
-                for we_tag in doc['tags']:
-                    if item.has_key('tags'):
-                        for tag in item['tags']:
-                            logging.debug("==tag: %s we_tag: %s", tag, we_tag)
-                            if tag.has_key('term') and tag['term'].lower().find(we_tag) > -1 and we_tag not in mention['we_tags']:
-                                mention['we_tags'].append(we_tag)
-                                continue
-                    if text.lower().find(we_tag) > -1 and we_tag not in mention['we_tags']:
-                        logging.debug("==we_tag: %s in text search", we_tag)
-                        mention['we_tags'].append(we_tag)
-
-                # keep it if there was one or more interesting tags
-                if len(mention['we_tags']) > 0:
-                    # see if we already have this one
-                    #   only update if tags have changed
-                    existing = dbmentions.view('ids/feed')
-                    exists = existing[mention['we_link']]
-                    if len(exists) > 0:
-                        # ideally there should be at most one...
-                        for exi in exists:
-                            if set(mention['we_tags']) <> set(exi['value']):
-                                logging.debug("++ need to update tags, old>new %s %s", exi['value'], mention['we_tags'])
-                                odoc = dbmentions[exi['id']]
-                                odoc['we_tags'] = mention['we_tags']
-                                dbmentions[exi['id']] = odoc
-                                logging.debug("++updated tags in post")
-                            else:
-                                logging.debug("--old post already has all the tags")
-                                pass
-                    else:
-                        logging.debug("??don't seem to have this post: %s", mention['we_tags'])
-                        new_docs.append(mention)
-                else:
-                    logging.debug("!!!Skipping post with no interesting tags: %s", doc['feed_url'])
-                    #print "!!!", item
-                    pass
-
-    #import pdb; pdb.set_trace()
-    if len(new_docs) > 0:
-        logging.info("**** updating %d new docs", len(new_docs))
-        result = dbmentions.update(new_docs)
-        logging.debug("    %s", result)
-    return time.strftime('%Y-%m-%dT%H:%M:%S.000Z', content.feed['updated_parsed'])
-
-for feed in dbfeeds.view('_design/ids/_view/by_wp_id', include_docs=True):
-    try:
-        logging.debug('checking feed %s for references to %s', feed.doc['feed_url'], feed.doc['tags'])
-        try:
-            last_checked = process_feed(feed)
-        except xml.sax._exceptions.SAXException:
-            last_checked = None
-        except TypeError:
-            logging.debug('Exception hit - typeError %s', doc)
-        if last_checked:
-            logging.debug('doc _rev: %s', feed.doc['_rev'])
-            doc = feed.doc
-            doc['last_updated'] = last_checked
-            doc['last_successful'] =  time.strftime(
-                        '%Y-%m-%dT%H:%M:%S.000Z')
-            try: 
-                dbfeeds[feed.id] = doc
-            except couchdb.http.ResourceConflict:
-                logging.debug('Exception hit - resourceConflict %s', doc['_id'])
-    except IndexError:
-        logging.debug('issue accessing relevant data on doc id %s', feed.doc['_id'])
-logging.info("run finished\n")
diff --git a/bookmarks.py b/bookmarks.py
deleted file mode 100644
index 77f1e13677dfd981c323add55388d3a58ac6d105..0000000000000000000000000000000000000000
--- a/bookmarks.py
+++ /dev/null
@@ -1,226 +0,0 @@
-#!/usr/bin/python
-
-""" Harvest bookmarks with tags specified in options file."""
-
-# Copyright 2017 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-from datetime import datetime, timedelta
-import couchdb
-import urllib
-import json
-import feedparser
-import argparse
-import requests
-import HTMLParser
-import hashlib
-# for debugging
-import logging
-import pprint
-# to fix various unicode issues
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-
-# version info
-scanner = 'WEnotes Semantic Scuttle (Bookmarks) Scanner'
-scanner_version = '0.4.1'
-
-DEBUG = False
-#DEBUG = True
-DELAY = 1.0         # delay in seconds between Discourse hits
-MAX_TEXT_LEN = 300  # max characters before truncation
-
-# retrieve URL from config JSON
-options = json.load(open('../config/options.json', 'r'))
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-# set header this app will report itself as
-headers = {'User-Agent' : 'WEnotes-Bookmarks/0.1'}
-# the URL of the bookmarks RSS feed
-bookmarks_url = options["bookmarks"]["url"]
-bookmarks_rss_url = bookmarks_url + '/rss.php/all'
-# for parsing HTML in bookmark text
-h = HTMLParser.HTMLParser()
-
-# length of content for messages
-message_length = 200;
-
-#logging configuration
-LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-#LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/bookmarks.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# initialising
-lasttime = "2000-01-01T00:00:00.000Z"
-
-# setting up the parser for RSS
-parser = argparse.ArgumentParser(description='Harvest posts from Bookmarks, our Semantic Scuttle instance.')
-parser.add_argument('-f', '--full', action='store_false',
-        help='get list of categories, and then every topic in each')
-args = parser.parse_args([])
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# get all the mention ids currently in the db so we can
-# ensure we don't duplicate
-all_mentions = db.view('ids/bookmarks')
-
-logging.debug('avoiding these mentions we already have: %s', pp.pformat(all_mentions))
-
-# check if we have this mention already
-def have_mention(msg_id):
-    """Return boolean showing if we already have this message."""
-    #print 'id = %s' % msg_id
-    for mention in all_mentions:
-        if msg_id==mention['value']:
-            #print 'Found id %s' % msg_id
-            return True
-    logging.debug('failed to find %s', msg_id)
-    return False
-
-# deal with the +0000 time offset, not supported by datetime
-# see https://stackoverflow.com/questions/23940551/why-z-is-not-supported-by-pythons-strptime
-def dt_parse(t):
-    ret = datetime.strptime(t[0:25], '%a, %d %b %Y %H:%M:%S')
-    if t[26]=='+':
-        ret += timedelta(hours=int(t[27:29]))
-    elif t[26]=='-':
-        ret -= timedelta(hours=int(t[27:29]))
-    return ret
-
-# strip any leading "#" from any tags 
-def striphashes(a):
-    for i in a:
-        if i[0] == '#':
-            i = i[1:]
-    return a
-
-
-# find all of bookmarks
-rss = feedparser.parse(bookmarks_rss_url)
-# find the channel title
-feedtitle = rss['channel']['title']
-
-items = rss['items']
-# reverse them, so oldest is first
-items.reverse()
-logging.debug("found %d items", len(items))
-
-# for each item in RSS check if it has one (or more) of our tags
-for item in items:
-    # is this an error item? If so, bail
-    if item['title'] == 'RSS Error' and item['description'] == 'Error reading RSS data':
-        break
-    logging.debug("looking at bookmark: %s", item['title'])
-    # is this an item with a relevant tag...
-    try:
-       taglist = [t['term'] for t in item['tags']]
-       # strip any leading "#" from tags
-       taglist = striphashes(taglist)
-    except:
-       logging.debug("no tags defined for %s", item['title'])
-       continue
-    common_tags = list(set(taglist) & set(reference_tags))
-    #logging.debug("common tags: %s", common_tags)
-    #logging.debug("taglist: %s\nreference tags: %s\ncommon tags: %s", taglist, reference_tags, common_tags)
-    logging.debug("taglist: %s\ncommon tags: %s", taglist, common_tags)
-    if not common_tags:
-        logging.debug("no common tags, not interested in this one");
-        continue
-    # initialise
-    truncated = False
-    dt = dt_parse(item['published'])
-    we_timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
-    if we_timestamp <= lasttime:
-        logging.debug('the timestamp %s is before our reference time of %s... bailing. ', we_timestamp, lasttime);
-        continue
-    seconds = time.mktime(dt.timetuple())
-    # check if we've seen the gid before...
-    if have_mention(item['id']):
-        continue
-    # strip out HTML markup before abridging, so we don't stop midtag
-    body = item['title'] + ' ' + item['summary']
-    # pull out all the html tags
-    abridged = re.sub(r'<[^>]*>', '', body)
-    # remove any escaped tags
-    abridged = h.unescape(abridged)
-    # remove square brackets (link anchors)
-    abridged = re.sub(r'\[|]', ' ', abridged)
-    # remove multiple spaces
-    abridged = re.sub(r'\s+', ' ', abridged)
-    # remove line feeds and non-breaking spaces
-    abridged = abridged.replace('&nbsp;', ' ')
-    abridged = abridged.replace('\n', ' ')
-    # abridge to message_length characters + ... (i.e. 3)
-    # + 'Link added - ' (i.e. 13)
-    # + url of max 32 char, or 48
-    i = len(abridged)
-    if i > (message_length - 48):
-        i = (message_length - 48)
-        while abridged[i] != ' ' and i > 0:
-            i -= 1
-        abridged = abridged[:i] + '...'
-        truncated = True
-    # prepend link:
-    abridged = 'Link added: ' + item['link'] + ' - ' + abridged
-    # get author's name
-    author = item['author_detail']['name']
-    # username
-    username = item['author']
-    # create the mention object
-    mention = {
-        'user': {
-            'name': item['contributors'][0]['name'],
-            'username': item['contributors'][0]['href'].rsplit('/', 1)[-1],
-            'profile_url': item['contributors'][0]['href']
-        },
-        'from_user_name': author,
-        'created_at': we_timestamp,
-        'text': abridged,
-        'truncated': truncated,
-        'id': item['id'],
-        #'profile_url': item['author'],
-        'we_source': 'bookmarks',
-        'we_feed': '%s' % (feedtitle),
-        'we_tags': common_tags,
-        'we_timestamp': we_timestamp,
-        'we_scanner': scanner,
-        'we_scanner_version': scanner_version,
-        'we_link': item['link']
-    }
-    #pp.pprint(mention)
-    logging.info('adding %s', item['id'])
-    if DEBUG:
-      logging.debug('or would be if debugging was off')
-    else:
-      db.save(mention)
diff --git a/community-discourse.py b/community-discourse.py
deleted file mode 100644
index 9a2834aa78a7a6ef5e579d5ed1ba7866019ddced..0000000000000000000000000000000000000000
--- a/community-discourse.py
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/usr/bin/python
-
-""" Harvest messages from forums category URLs specified in options file."""
-
-# Copyright 2016 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-import couchdb
-import json
-import urllib
-import argparse
-import requests
-# for debugging
-import logging
-import pprint
-# to deal with nasty characters included in Hypothesis quotes
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-
-DEBUG = False
-DELAY = 0.1         # delay in seconds between Discourse hits
-MAX_TEXT_LEN = 300  # max characters before truncation
-SOURCE = "community"  # the source of each mention in CouchDB
-POSTS_PER_PAGE = 20 # Discourse returns posts for a topic in lots of 20 per page
-
-# retrieve URL from config JSON
-options = json.load(open('../config/options.json', 'r'))
-
-#logging configuration
-LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-#LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/community.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-#print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# create the CouchDB object
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# some other settings.
-baseurl = options['community']['url']
-logging.debug('baseurl = %s', pp.pformat(baseurl))
-version = '0.3'
-headers = {'User-Agent' : 'WEnotes-Community-Discourse/%s' % (version)}
-#print headers
-
-# create the parser for returned content from Discourse
-parser = argparse.ArgumentParser(description='Harvest posts from Discourse Forums.')
-parser.add_argument('-f', '--full', action='store_false',
-        help='get list of categories, and then every topic in each')
-args = parser.parse_args([])
-
-def have_mention(msg_id):
-    """Return boolean showing if we already have this message."""
-    view = db.view('ids/communityids')
-    have = (len(view[msg_id]) > 0)
-    return have
-
-# check a tag list against our reference_tags
-def interesting_tags(tags):
-    """Return list of interesting tags, or false if none."""
-    common_tags = list(set(tags) & set(reference_tags))
-    #logging.debug("taglist: %s\nreference tags: %s\ncommon tags: %s", tags, reference_tags, common_tags)
-    if common_tags:
-        logging.debug("interesting tags: %s", common_tags)
-        return common_tags
-    else:
-        return False
-
-if args.full:
-        # get the list of categories
-        categories = '%s/categories.json' % (baseurl)
-        time.sleep(DELAY)
-        r = requests.get(categories, headers=headers, verify=False)
-        d = json.loads(r.text)
-        cat_list = d['category_list']['categories']
-        #logging.debug("categories: %s", json.dumps(cat_list, indent=2, sort_keys=True))
-        for cat in cat_list:
-            logging.debug("category: %s(%s)", cat['name'], cat['id'])
-            topics = '%s/c/%s.json' % (baseurl, cat['id'])
-            if DEBUG:
-                print ">>>>>>>>>>>>>>>>>>>>>>>>>>>", \
-                        cat['id'], cat['name'], cat['slug'], topics
-            #logging.debug("category: %s, %s, %s, %s", cat['id'], cat['name'], cat['slug'], topics)
-            time.sleep(DELAY)
-            r2 = requests.get(topics, headers=headers, verify=False)
-            d = json.loads(r2.text)
-            topic_list = d['topic_list']['topics']
-            #logging.debug("topics: %s", json.dumps(topic_list, indent=2, sort_keys=True))
-            for topic in topic_list:
-                pages = topic['posts_count']/POSTS_PER_PAGE;
-                if (topic['posts_count'] % POSTS_PER_PAGE > 0):
-                    #print "adding a page with modulo %f" % (topic['posts_count'] % POSTS_PER_PAGE)
-                    pages += 1
-                #print "%d pages for %d posts for topic %s" % (pages, topic['posts_count'], topic['title'])
-                for page in (1,pages):
-                    posts = '%s/t/%s.json?page=%d' % (baseurl, topic['id'], page)
-                    logging.debug("topic: %s(%s), page %d", topic['title'], topic['id'], page)
-                    #logging.debug("topic %s: ", json.dumps(topic, indent=2, sort_keys=True))
-                    #logging.debug('     tags: %s', json.dumps(topic['tags'], indent=2, sort_keys=True))
-                    common_tags = interesting_tags(topic['tags'])
-                    if not common_tags:
-                        logging.debug('no interesting tags')
-                        continue
-                    time.sleep(DELAY)
-                    r3 = requests.get(posts, headers=headers, verify=False)
-                    p = json.loads(r3.text)
-                    post_list = p['post_stream']['posts']
-                    #logging.debug("post_list %s: ", json.dumps(post_list, indent=2, sort_keys=True))
-                    if True:
-                        for post in post_list:
-                            #logging.debug("post %s: ", json.dumps(post, indent=2, sort_keys=True))
-                            if post['deleted_at']:
-                                continue
-                            link = "%s/t/%s/%s" % (baseurl, post['topic_id'], post['post_number'])
-                            logging.debug('link: %s', link)
-                            if have_mention(link):
-                                logging.debug('existing link: %s', link)
-                                continue
-                            logging.debug('interesting link: %s', link)
-                            text = post['cooked'].replace('\n', ' ')
-                            text = re.sub(r'<[^>]*?>', ' ', text)   # remove HTML tags
-                            text = re.sub(r' {2,}', ' ', text)      # collapse spaces
-                            text = topic['fancy_title'].strip() + ': ' + text.strip()
-                            truncated = False
-                            i = len(text)
-                            if i > MAX_TEXT_LEN:
-                                i = MAX_TEXT_LEN
-                                while text[i] != ' ' and i > 0:
-                                    i -= 1
-                                text = text[:i] + '...'
-                                truncated = True
-                            from_user_name = post['display_username']
-                            if from_user_name == '':
-                                from_user_name = post['username']
-                            profile_image_url = post['avatar_template'].replace('{size}', '48')
-                            if not re.match(r'^(https?:)?//', profile_image_url):
-                                profile_image_url = baseurl + profile_image_url
-                            mention = {
-                                    'created_at': post['created_at'],
-                                    'from_user': post['username'],
-                                    'from_user_name': from_user_name,
-                                    'id': link,
-                                    'post_id': post['id'],
-                                    'profile_image_url': profile_image_url,
-                                    'profile_url': baseurl + '/users/' + post['username'],
-                                    'text': text,
-                                    'truncated': truncated,
-                                    'we_link': link,
-                                    'we_source': SOURCE,
-                                    'we_version': version,
-                                    'we_tags': common_tags,
-                                    'we_timestamp': post['created_at']
-                                    }
-                            if DEBUG:
-                                print json.dumps(mention, indent=2, sort_keys=True)
-                            else:
-                                logging.info('adding post %s by %s with tag(s): %s', mention['id'], from_user_name, common_tags)
-                                db.save(mention)
diff --git a/feeds.py b/feeds.py
deleted file mode 100644
index af3925cd9e9783eca4de44e401d6ad9c8de5a918..0000000000000000000000000000000000000000
--- a/feeds.py
+++ /dev/null
@@ -1,222 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2012 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-import calendar
-from datetime import datetime
-import cookielib
-import urllib, urllib2
-import couchdb
-import feedparser
-import lxml.html
-import xml.sax
-import sys
-import json
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('./options.json', 'rt'))
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-couch2 = couchdb.Server(options['localcouch'])
-dbfeeds = couch2[options['dbfeeds']]
-
-# get the last time for a moodle post in the database
-#view = db.view('ids/moodle', descending=True, limit=1)
-#if len(view) == 1:
-#    for row in view:
-#        lasttime = row.key
-#else:
-#    lasttime = "2000-01-01T00:00:00.000Z"
-
-# return a tuple (text, truncated) of abridged text and truncated flag
-def abridge(text):
-    truncated = False
-    # strip out HTML comments (and MSOffice conditionals)
-    abridged = re.sub(r'<!--.*?-->', '', text)
-    # strip out HTML markup before abridging,
-    #   so we don't stop midtag
-    abridged = re.sub(r'<[^>]*>', ' ', abridged)
-    abridged = re.sub(r'\s*by [^.]+\.\n?', '', abridged)
-    abridged = abridged[:500].strip()
-    abridged = abridged.replace('&nbsp;', ' ')
-    abridged = abridged.replace('&#8211;', "--")
-    abridged = abridged.replace('&#8216;', "'")
-    abridged = abridged.replace('&#8217;', "'")
-    abridged = abridged.replace('&#8220;', '"')
-    abridged = abridged.replace('&#8221;', '"')
-    abridged = abridged.replace('&#8230;', "...")
-    abridged = abridged.replace('&#38;', "&")
-    abridged = abridged.replace('\n', ' ')
-    # get rid of multiple spaces (which the above may have introduced)
-    abridged = re.sub(r'  +', ' ', abridged)
-    i = len(abridged)
-    if i > 200:
-        i = 200
-        while abridged[i] != ' ' and i > 0:
-            i -= 1
-        abridged = abridged[:i] + '...'
-        truncated = True
-    return (abridged, truncated)
-
-def process_feed(feed):
-    new_docs = []
-    #print "==", feed.doc['url']
-    feed_last = feed.doc['last_updated']
-    rss = feedparser.parse(feed.doc['url'])
-
-    #if 'edem630' in feed.doc['tags']:
-    #    print "!!edem630 blog, re-check the last week"
-    #    feed_last = '2013-07-21T00:00:00.000Z'
-
-    # if there is no timestamp, probably no entries
-    if not rss.feed.has_key('updated_parsed') or rss.feed['updated_parsed'] is None:
-        return None
-
-    feed_timestamp = time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
-            rss.feed['updated_parsed'])
-
-    # check last_updated of feed table
-    if feed_timestamp <= feed_last:
-        return None     # no need to even bother checking items
-
-    # check last timestamp in the mentions database for this feed
-    items = rss['items']
-    items.reverse()
-    for item in items:
-        # FIXME perhaps should query to see if article exists
-        #  to avoid "updates" that change pub time
-        we_timestamp = time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
-                item['updated_parsed'])
-        if we_timestamp > feed_last:
-            truncated = False
-            text = ''
-            if len(item['title'].strip()) > 0:
-                text = item['title'].strip() + ': '
-            text = text + item['summary']
-            (abridged, truncated) = abridge(text)
-            mention = {
-                    'from_user': feed.doc['from_user'],
-                    'from_user_name': feed.doc['from_user_name'],
-                    'created_at': item['updated'],
-                    'profile_url': feed.doc['profile_url'],
-                    'profile_image_url': feed.doc['profile_image_url'],
-                    'title': item['title'].strip(),
-                    'text': abridged,
-                    'truncated': truncated,
-                    'id': calendar.timegm(item['updated_parsed']),
-                    'we_source': 'feed',
-                    'we_feed': rss.feed['title'],
-                    'we_feed_url': feed.doc['url'],
-                    'we_timestamp': we_timestamp,
-                    'we_link': item['link']
-                    }
-
-            # if there is an id, use it instead of our made up one
-            if item.has_key('id'):
-                mention['id'] = item['id']
-
-            # if there is width and/or height, copy them
-            if feed.doc.has_key('profile_image_width'):
-                mention['profile_image_width'] = \
-                        feed.doc['profile_image_width']
-            if feed.doc.has_key('profile_image_height'):
-                mention['profile_image_height'] = \
-                        feed.doc['profile_image_height']
-
-            # if there is a gravatar hash, copy it
-            if feed.doc.has_key('gravatar'):
-                mention['gravatar'] = feed.doc['gravatar']
-
-            # if tags is empty, we take everything and apply we_tags
-            if len(feed.doc['tags']) == 0:
-                mention['we_tags'] = feed.doc['we_tags']
-                new_docs.append(mention)
-            else:
-                mention['we_tags'] = []
-                # only save things tagged with tags
-                # or that mention the tag in the title
-                keep = False
-                for tg in feed.doc['tags']:
-                    if item.has_key('tags'):
-                        for tag in item['tags']:
-                            #print "==tag: %s tg: %s" % (tag, tg)
-                            if tag.has_key('term') and tag['term'].lower().find(tg) > -1 and tg not in mention['we_tags']:
-                                mention['we_tags'].append(tg)
-                                continue
-                    if text.lower().find(tg) > -1 and tg not in mention['we_tags']:
-                        #print "==tg: %s in text search" % tg
-                        mention['we_tags'].append(tg)
-
-                # keep it if there was one or more interesting tags
-                if len(mention['we_tags']) > 0:
-                    # see if we already have this one
-                    #   only update if tags have changed
-                    exists = db.view('ids/feed')
-                    ex = exists[mention['we_link']]
-                    if len(ex) > 0:
-                        # ideally there should be at most one...
-                        for exi in ex:
-                            if set(mention['we_tags']) <> set(exi['value']):
-                                #print "++ need to update tags, old>new", exi['value'], mention['we_tags']
-                                odoc = db[exi['id']]
-                                odoc['we_tags'] = mention['we_tags']
-                                db[exi['id']] = odoc
-                                #print "++updated tags in post"
-                            else:
-                                #print "--old post already has all the tags"
-                                pass
-                    else:
-                        #print "??don't seem to have this post", mention['we_tags']
-                        new_docs.append(mention)
-                else:
-                    #print "!!!Skipping post with no interesting tags."
-                    #print "!!!", feed.doc['url']
-                    #print "!!!", item
-                    pass
-
-    #import pdb; pdb.set_trace()
-    if len(new_docs) > 0:
-        #print "**** updating %d new docs" % len(new_docs)
-        result = db.update(new_docs)
-        #print "    ", result
-    return time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
-            rss.feed['updated_parsed'])
-
-for feed in dbfeeds.view('feed/activerss', include_docs=True):
-    # process feeds based on the 'freq' attribute
-    # FIXME for now, only on/off, but could be polling frequency
-    if feed.doc['freq'] > 0:
-        #print feed.id, feed.value, feed.doc
-        try:
-            last_updated = process_feed(feed)
-        except xml.sax._exceptions.SAXException:
-            last_updated = None
-        if last_updated:
-            doc = feed.doc
-            doc['last_updated'] = last_updated
-            doc['last_successful'] =  time.strftime(
-                        '%Y-%m-%dT%H:%M:%S.000Z')
-            dbfeeds[feed.id] = doc
diff --git a/forums-discourse.py b/forums-discourse.py
deleted file mode 100644
index 8545b4562ebcb81fbf9aec793a299726a2e7afef..0000000000000000000000000000000000000000
--- a/forums-discourse.py
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/usr/bin/python
-
-""" Harvest messages from forums category URLs specified in options file."""
-
-# Copyright 2016 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-import couchdb
-import json
-import urllib
-import argparse
-import requests
-# for debugging
-import logging
-import pprint
-# to deal with nasty characters included in Hypothesis quotes
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-
-DEBUG = False
-DELAY = 0.1         # delay in seconds between Discourse hits
-MAX_TEXT_LEN = 300  # max characters before truncation
-SOURCE = "forums"  # the source of each mention in CouchDB
-POSTS_PER_PAGE = 20 # Discourse returns posts for a topic in lots of 20 per page
-
-# retrieve URL from config JSON
-options = json.load(open('../config/options.json', 'r'))
-
-#logging configuration
-LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-#LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/forums.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-#print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# create the CouchDB object
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# some other settings.
-baseurl = options['forums']['url']
-logging.debug('baseurl = %s', pp.pformat(baseurl))
-version = '0.3'
-headers = {'User-Agent' : 'WEnotes-Forum-Discourse/%s' % (version)}
-#print headers
-
-# create the parser for returned content from Discourse
-parser = argparse.ArgumentParser(description='Harvest posts from Discourse Forums.')
-parser.add_argument('-f', '--full', action='store_false',
-        help='get list of categories, and then every topic in each')
-args = parser.parse_args([])
-
-def have_mention(msg_id):
-    """Return boolean showing if we already have this message."""
-    view = db.view('ids/forumsids')
-    have = (len(view[msg_id]) > 0)
-    return have
-
-# check a tag list against our reference_tags
-def interesting_tags(tags):
-    """Return list of interesting tags, or false if none."""
-    common_tags = list(set(tags) & set(reference_tags))
-    #logging.debug("taglist: %s\nreference tags: %s\ncommon tags: %s", tags, reference_tags, common_tags)
-    if common_tags:
-        logging.debug("interesting tags: %s", common_tags)
-        return common_tags
-    else:
-        return False
-
-if args.full:
-        # get the list of categories
-        categories = '%s/categories.json' % (baseurl)
-        time.sleep(DELAY)
-        r = requests.get(categories, headers=headers, verify=False)
-        d = json.loads(r.text)
-        cat_list = d['category_list']['categories']
-        #logging.debug("categories: %s", json.dumps(cat_list, indent=2, sort_keys=True))
-        for cat in cat_list:
-            logging.debug("category: %s(%s)", cat['name'], cat['id'])
-            topics = '%s/c/%s.json' % (baseurl, cat['id'])
-            if DEBUG:
-                print ">>>>>>>>>>>>>>>>>>>>>>>>>>>", \
-                        cat['id'], cat['name'], cat['slug'], topics
-            #logging.debug("category: %s, %s, %s, %s", cat['id'], cat['name'], cat['slug'], topics)
-            time.sleep(DELAY)
-            r2 = requests.get(topics, headers=headers, verify=False)
-            d = json.loads(r2.text)
-            topic_list = d['topic_list']['topics']
-            #logging.debug("topics: %s", json.dumps(topic_list, indent=2, sort_keys=True))
-            for topic in topic_list:
-                pages = topic['posts_count']/POSTS_PER_PAGE;
-                if (topic['posts_count'] % POSTS_PER_PAGE > 0):
-                    #print "adding a page with modulo %f" % (topic['posts_count'] % POSTS_PER_PAGE)
-                    pages += 1
-                #print "%d pages for %d posts for topic %s" % (pages, topic['posts_count'], topic['title'])
-                for page in (1,pages):
-                    posts = '%s/t/%s.json?page=%d' % (baseurl, topic['id'], page)
-                    logging.debug("topic: %s(%s), page %d", topic['title'], topic['id'], page)
-                    #logging.debug("topic %s: ", json.dumps(topic, indent=2, sort_keys=True))
-                    #logging.debug('     tags: %s', json.dumps(topic['tags'], indent=2, sort_keys=True))
-                    common_tags = interesting_tags(topic['tags'])
-                    if not common_tags:
-                        logging.debug('no interesting tags')
-                        continue
-                    time.sleep(DELAY)
-                    r3 = requests.get(posts, headers=headers, verify=False)
-                    p = json.loads(r3.text)
-                    post_list = p['post_stream']['posts']
-                    #logging.debug("post_list %s: ", json.dumps(post_list, indent=2, sort_keys=True))
-                    if True:
-                        for post in post_list:
-                            #logging.debug("post %s: ", json.dumps(post, indent=2, sort_keys=True))
-                            if post['deleted_at']:
-                                continue
-                            link = "%s/t/%s/%s" % (baseurl, post['topic_id'], post['post_number'])
-                            logging.debug('link: %s', link)
-                            if have_mention(link):
-                                logging.debug('existing link: %s', link)
-                                continue
-                            logging.debug('interesting link: %s', link)
-                            text = post['cooked'].replace('\n', ' ')
-                            text = re.sub(r'<[^>]*?>', ' ', text)   # remove HTML tags
-                            text = re.sub(r' {2,}', ' ', text)      # collapse spaces
-                            text = topic['fancy_title'].strip() + ': ' + text.strip()
-                            truncated = False
-                            i = len(text)
-                            if i > MAX_TEXT_LEN:
-                                i = MAX_TEXT_LEN
-                                while text[i] != ' ' and i > 0:
-                                    i -= 1
-                                text = text[:i] + '...'
-                                truncated = True
-                            from_user_name = post['display_username']
-                            if from_user_name == '':
-                                from_user_name = post['username']
-                            profile_image_url = post['avatar_template'].replace('{size}', '48')
-                            if not re.match(r'^(https?:)?//', profile_image_url):
-                                profile_image_url = baseurl + profile_image_url
-                            mention = {
-                                    'created_at': post['created_at'],
-                                    'from_user': post['username'],
-                                    'from_user_name': from_user_name,
-                                    'id': link,
-                                    'post_id': post['id'],
-                                    'profile_image_url': profile_image_url,
-                                    'profile_url': baseurl + '/users/' + post['username'],
-                                    'text': text,
-                                    'truncated': truncated,
-                                    'we_link': link,
-                                    'we_source': SOURCE,
-                                    'we_version': version,
-                                    'we_tags': common_tags,
-                                    'we_timestamp': post['created_at']
-                                    }
-                            if DEBUG:
-                                print json.dumps(mention, indent=2, sort_keys=True)
-                            else:
-                                logging.info('adding post %s by %s with tag(s): %s', mention['id'], from_user_name, common_tags)
-                                db.save(mention)
diff --git a/gplus.py b/gplus.py
deleted file mode 100644
index 8c31b17f4f51765cb94d11889942645fa948f01b..0000000000000000000000000000000000000000
--- a/gplus.py
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2012-2016 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import urllib
-import urllib2
-import time
-import couchdb
-import json
-from operator import itemgetter
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('./options.json', 'rt'))
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-couch2 = couchdb.Server(options['localcouch'])
-dbfeeds = couch2[options['dbfeeds']]
-
-tags = options['tags']
-communities = options['communities']
-
-gplus_search = 'https://www.googleapis.com/plus/v1/activities'
-gplus_activities_list = "https://www.googleapis.com/plus/v1/people/%s/activities/public"
-
-def abridge(s):
-    "return possibly truncated string and truncated flag"
-    truncated = False
-    # strip out HTML markup before abridging, so we don't stop midtag
-    abridged = re.sub(r'<[^>]*>', ' ', s)
-    abridged = re.sub(r'\s*by [^.]+\.\n?', '', abridged)
-    abridged = abridged.replace('&nbsp;', ' ')
-    abridged = abridged.replace('&#39;', "'")
-    abridged = abridged.replace('\n', ' ')
-    abridged = abridged.strip()
-    # get rid of multiple spaces (which the above may have introduced)
-    abridged = re.sub(r'  +', ' ', abridged)
-    i = len(abridged)
-    if i > 280:
-        i = 280
-        while abridged[i] != ' ' and i > 0:
-            i -= 1
-        abridged = abridged[:i] + '...'
-        truncated = True
-    return (abridged, truncated)
-
-def getitems(url, query={}, person=None):
-    items = []
-    query['key'] = options['googleapikey']
-    if person:
-        url = url % (person)
-    while 1:
-        d = json.loads(urllib2.urlopen(url + '?' + urllib.urlencode(query)).read())
-        if d.has_key('items'):
-            items += d['items']
-        if not d.has_key('nextPageToken') or (d.has_key('items') and len(d['items']) <= 0):
-            return items
-        query['pageToken'] = d['nextPageToken']
-        time.sleep(1)
-
-def formatitem(item, tag):
-    doc = item.copy()
-    doc['we_source'] = 'g+'
-    doc['we_tags'] = [tag]
-    doc['we_timestamp'] = item['published']
-    doc['from_user'] = item['actor']['displayName']
-    doc['from_user_name'] = item['actor']['displayName']
-    doc['profile_image_url'] = item['actor']['image']['url']
-    doc['profile_url'] = item['actor']['url']
-    if not doc.has_key('text'):
-        doc['text'], truncated = abridge(item['object']['content'])
-        if truncated:
-            doc['truncated'] = true
-    # make a consistent we_link to thread
-    if doc.has_key('url'):
-        doc['we_link'] = doc['url']
-        del doc['url']
-    else:
-        doc['we_link'] = item['inReplyTo'][0]['url']
-    return doc
-
-def processitems(items, tag, get_replies = True):
-    time.sleep(1)
-    # loop through all of the items
-    # if we don't have it, save it
-    # if the reply count has changed, process those items
-    for item in items:
-        view = db.view('ids/google', key=item['id'], include_docs=True,
-                limit=1)
-        if len(view) == 1:
-            update = False
-            # check if comments, plusoners, or resharers have changed
-            # or if we are missing this tag
-            for row in view:
-                doc = row.doc
-                newobj = item['object']
-                dbobj = doc['object']
-                if (newobj.has_key('plusoners') and newobj['plusoners']['totalItems'] <> dbobj['plusoners']['totalItems']) \
-                or (newobj.has_key('resharers') and newobj['resharers']['totalItems'] <> dbobj['resharers']['totalItems']):
-                    update = True
-                if newobj.has_key('replies') and newobj['replies']['totalItems'] <> dbobj['replies']['totalItems']:
-                    update = True
-                    processitems(getitems(newobj['replies']['selfLink']), tag)
-                if tag not in doc['we_tags']:
-                    doc['we_tags'].append(tag)
-                    update = True
-            if update:
-                doc['object']['plusoners'] = newobj['plusoners'].copy()
-                doc['object']['resharers'] = newobj['resharers'].copy()
-                doc['object']['replies'] = newobj['replies'].copy()
-                # update CouchDB
-                db[row['id']] = doc
-        else:
-            doc = formatitem(item, tag)
-            db.save(doc)
-            if get_replies and item['object'].has_key('replies') and item['object']['replies']['totalItems'] > 0:
-                processitems(getitems(item['object']['replies']['selfLink']), tag)
-    return
-
-#   for each activity
-#       if we have it in db
-#           get the we_tags
-#           if pluses count changed
-#               update it
-#           if comments count changed
-#               update it
-#               process comments using current we_tags
-#       else
-#           scan the text looking for we_tags
-#           save
-#           process comments using current we_tags
-def plusSearch(tag):
-    """search for all activities for this tag"""
-    query = {'query': '#' + tag,
-            'maxResults': 20}
-    items = getitems(gplus_search, query)
-    for item in items:
-        if item['kind'] <> u'plus#activity':
-            print "unknown item kind tag: %s kind: %s" % (tag, item['kind'])
-        view = db.view('ids/google', key=item['id'], include_docs=True,
-                limit=1)
-        if len(view) == 1:
-            for row in view:
-                #for k,v in row.doc.items():
-                #    print "%20s: %s" % (k, v)
-                doc = row.doc
-                o = item['object']
-                od = doc['object']
-                update = False
-                replies = False
-                if od['plusoners']['totalItems'] <> o['plusoners']['totalItems']:
-                    update = True
-                    doc['object']['plusoners'] = o['plusoners'].copy()
-                if od['resharers']['totalItems'] <> o['resharers']['totalItems']:
-                    update = True
-                    doc['object']['resharers'] = o['resharers'].copy()
-                if od['replies']['totalItems'] <> o['replies']['totalItems']:
-                    update = True
-                    doc['object']['replies'] = o['replies'].copy()
-                    replies = True
-                if update:
-                    db[row['id']] = doc
-                if replies:
-                    replies(item['replies']['selflink'], item['we_tags'])
-        else:
-            print "---news to me"
-
-def main():
-    for tag in tags:
-        items = getitems(gplus_search,
-                         {'maxResults': 20,
-                             'query': '#' + tag})
-        # sort them chronologically
-        items = sorted(items, key=itemgetter('published'))
-        processitems(items, tag)
-        time.sleep(2)
-    # get communities and "g+ as blog" from feed table
-    for feed in dbfeeds.view('feed/activegplus', include_docs=True):
-        print "activegplus", feed
-        id = feed.doc['url']
-        # skip things that don't seem to have a tag
-        if len(feed.doc['we_tags']) < 1:
-            continue
-        tag = feed.doc['we_tags'][0]
-        print "  tag:", tag
-        items = getitems(gplus_activities_list, {'maxResults': 100}, id)
-
-        # sort them chronologically
-        items = sorted(items, key=itemgetter('published'))
-        print "  items:"
-        print items
-        print '======'
-        if 0:
-            for item in items:
-                ob = item['object']
-        if 0:
-            kinds = {}
-            for item in items:
-                kind = item['kind']
-                if kinds.has_key(kind):
-                    kinds[kind] += 1
-                else:
-                    kinds[kind] = 1
-                replies = item['object']['replies']['totalItems']
-                print item['id'], replies, item['object']['replies']['selfLink']
-            print kinds
-        processitems(items, tag,
-               get_replies = (feed.doc['type'] <> 'gplusblog'))
-        time.sleep(2)
-
-if __name__ == "__main__":
-    main()
diff --git a/groups.py b/groups.py
deleted file mode 100644
index 7232c5f99c0fb2c10c6f2d1d649a0249f7eff794..0000000000000000000000000000000000000000
--- a/groups.py
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2015 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-from datetime import datetime
-import urllib, urllib2
-import couchdb
-import feedparser
-import lxml.html
-import sys
-import json
-
-DEBUG = False
-
-# memoized dict with keys: profile url
-#                  values: dict with keys url, height, width
-images = {}
-
-def profile_img(groups, profile):
-    """Return a dictionary of image url, height, width
-
-    Scrapes information from web forum."""
-    global images
-    if not images.has_key(profile):
-        # fetch the user profile
-        f = groups.open(profile)
-        html = lxml.html.parse(f).getroot()
-        img_div = html.find_class('userimage')
-        pics = img_div[0].find('img')
-        attrs = pics.attrib
-        img_url = attrs['src']
-        if len(img_url)<5 or img_url[:5] <> 'data:':
-            img_url = 'http://groups.oeru.org' + img_url
-        images[profile] = {'url': img_url}
-        if attrs.has_key('height'):
-            images[profile]['height'] = attrs['height']
-        if attrs.has_key('width'):
-            images[profile]['width'] = attrs['width']
-    return images[profile]
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('./options.json', 'rt'))
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# get the last time for a groups post in the database
-view = db.view('ids/groups', descending=True, limit=1)
-if len(view) == 1:
-    for row in view:
-        lasttime = row.key
-else:
-    lasttime = "2000-01-01T00:00:00.000Z"
-
-feeds = ['http://groups.oeru.org/s/search.atom?t=0&p=1&r=1&l=20']
-
-# we will only fetch "public" posts, so don't bother logging in
-groups = urllib2.build_opener()
-groups.addheaders = [('User-agent', 'WEnotes-Fetcher/0.1')]
-
-for feed in feeds:
-    rss = feedparser.parse(feed)
-    #feedtitle = rss['channel']['title']
-
-    items = rss['items']
-    items.reverse()
-
-    for item in items:
-        if DEBUG:
-            print item['updated'], item['author'], item['title']
-            print '<<<<<'
-            print item['summary']
-            print '>>>>>', ord(item['summary'][-1])
-            for k,v in item.items():
-                print k.rjust(20), v
-            print '====='
-
-        if item['title'] == 'RSS Error' and item['description'] == 'Error reading RSS data':
-            break
-        if item['updated'] <= lasttime and not DEBUG:
-            continue
-        author = item['author']
-        profile_url = item['author_detail']['href']
-        img = profile_img(groups, profile_url)
-
-        # try to remove signature blocks from summary
-        #  (some posters use a non-standard emdash rather than "-- ")
-        summary = item['summary']
-        summary_parts = re.split(u" ((--)|\u2014) ", summary, maxsplit=1)
-        if len(summary_parts) > 1:
-            summary = summary_parts[0]
-
-        # try to remove quoted text
-        # FIXME this assumes they are doing evil top posting
-        #       by just looking for attribution line and cutting there
-        #       and then adding ellipsis whether it is warranted or not
-        summary_parts = re.split(u"\sOn ((\w{3}, \w{3} \d{1,2})|(\d{4}-\d\d-\d\d)),.*?wrote:\s*",
-                summary, maxsplit=1)
-        if len(summary_parts) > 1:
-            summary = summary_parts[0] + u"\u2026"
-
-        abridged = item['title'] + ':  ' + summary
-        abridged = abridged.replace(u"\u2019", "'")
-
-        # OnlineGroups appears to truncate summaries with unicode ellipsis
-        truncated = False
-        if abridged[-1] == u"\u2026":
-            abridged = abridged[:-1] + '...'
-            truncated = True
-
-        mention = {
-                'from_user': author,
-                'from_user_name': author,
-                'created_at': item['updated'],
-                'profile_image_url': img['url'],
-                'text': abridged,
-                'truncated': truncated,
-                'id': item['id'],
-                'profile_url': profile_url,
-                'we_source': 'groups',
-                #'we_feed': feedtitle,
-                'we_tags': ['oeru'],
-                'we_timestamp': item['updated'],
-                'we_link': item['link']
-                }
-        if img.has_key('height'):
-            mention['profile_image_height'] = img['height']
-        if img.has_key('width'):
-            mention['profile_image_width'] = img['width']
-
-        if DEBUG:
-            print 'vvvvvvvvvvvvvvvvvv'
-            print mention
-            print '^^^^^^^^^^^^^^^^^^'
-        else:
-            db.save(mention)
diff --git a/hypothesis.py b/hypothesis.py
deleted file mode 100644
index 6553023c43af48da48f192a313c5a7cfd2c9f097..0000000000000000000000000000000000000000
--- a/hypothesis.py
+++ /dev/null
@@ -1,289 +0,0 @@
-#!/usr/bin/python
-
-""" Harvest hypothesis feed for tags specified in options file."""
-
-# Copyright 2017 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-from datetime import datetime, timedelta
-import couchdb
-import urllib, urllib2
-import json
-import feedparser
-import argparse
-import requests
-import HTMLParser
-import hashlib
-# for debugging
-import logging
-import pprint
-# to deal with nasty characters included in Hypothesis quotes
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-
-
-# set header this app will report itself as
-headers = {'User-Agent' : 'WEnotes-Hypothesis/0.2'}
-
-# retrieve URL from config JSON
-options = json.load(open('../config/options.json', 'r'))
-
-# Change the database? False for no, usually when debugging
-DryRun = False
-
-#logging configuration
-#LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/hypothesis.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# database configuration
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-
-# the URL of the hypothesis RSS feed
-hypothesis_url = options["hypothesis"]["url"]
-hypothesis_rss_url = hypothesis_url + '/stream.rss?tags='
-hypothesis_rss_url_user = hypothesis_url + '/stream.rss?user='
-
-# for parsing HTML in bookmark text
-h = HTMLParser.HTMLParser()
-
-# length of content for messages
-message_length = 200;
-
-lasttime = "2000-01-01T00:00:00.000Z"
-
-# parse the RSS feed from Hypothesis
-parser = argparse.ArgumentParser(description='Harvest posts from Hypothes.is, global web annotation.')
-parser.add_argument('-f', '--full', action='store_false',
-        help='get list of categories, and then every topic in each')
-args = parser.parse_args([])
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# get all hypothesis mentions already in our database
-# (so we can check if ones we find are already held)
-all_mentions = {}
-all_mention_ids = db.view('ids/hypothesis')
-if len(all_mention_ids):
-    for row in all_mention_ids:
-        try:
-            logging.debug('looking for id %s', row['id'])
-            result = db.get(row['id'])
-            all_mentions[result['id']] = row['id']
-        except:
-            logging.exception('failed to get valid response from db looking for id %s', row['id'])
-else:
-    logging.debug('no previous hypothesis mentions!')
-
-# get the last ID for each of the tags
-#stats = db.view('ids/hypothesis_stats', group=True)
-#last = {}
-#logging.debug("len = %d", len(stats))
-#if len(stats) > 0:
-#    for row in stats:
-#        last[row.key] = row.value['max']
-
-def have_mention(item_id, tag):
-    """Return boolean showing if we already have this message."""
-    logging.debug('item_id = %s', item_id)
-    try:
-        if all_mentions[item_id]:
-            logging.debug('Found id %s', item_id)
-            # now check if this is the same tag as previously associated
-            try:
-                mention = db.get(all_mentions[item_id])
-                try:
-                    if mention['we_tags']:
-                        logging.debug('we_tags = %s', mention['we_tags'])
-                        if tag in mention['we_tags']:
-                            logging.debug('we already have this mention with tag %s', tag)
-                        else:
-                            logging.debug('%s is a new tag for this mention - adding', tag)
-                            # provide the document id, not the hypothesis id, and an array of tags
-                            add_tag_to_mention(all_mentions[item_id], tag)
-                except KeyError:
-                    logging.debug('no we_tags defined')
-            except:
-                logging.exception('failed to retrieve mention %s', all_mentions[item_id])
-            return True
-    except KeyError:
-        logging.debug('failed to find %s', item_id)
-        return False
-
-# add a tag or tags to the existing tags on a hypothesis mention,
-# removing we_tag if set, populating we_tags with any existing tag(s) and
-# any new tags
-def add_tag_to_mention(id, tag):
-    # get the mention
-    mention = db.get(id)
-    mention['we_tags'].append(tag)
-    mention['we_tags'] = unique_tags(mention['we_tags'])
-    mention['we_tags'] = sort_tags(mention['we_tags'])
-    new_id, rev = db.save(mention)
-    logging.debug('updated mention %s (%s), added tag %s', id, new_id, tag)
-
-# ensure there aren't any duplicate tags
-# https://stackoverflow.com/questions/480214/how-do-you-remove-duplicates-from-a-list-in-whilst-preserving-order
-def unique_tags(tags):
-    seen = set()
-    seen_add = seen.add
-    return [tag for tag in tags if not (tag in seen or seen_add(tag))]
-
-# sort tags in alphabetical order
-# https://stackoverflow.com/questions/10269701/case-insensitive-list-sorting-without-lowercasing-the-result
-def sort_tags(tags):
-    return sorted(tags, key=lambda s: s.lower())
-
-# deal with the +0000 time offset, not supported by datetime
-# see https://stackoverflow.com/questions/23940551/why-z-is-not-supported-by-pythons-strptime
-def dt_parse(t):
-    ret = datetime.strptime(t[0:24], '%a, %d %b %Y %H:%M:%S')
-    if t[26]=='+':
-        ret += timedelta(hours=int(t[27:30]))
-    elif t[26]=='-':
-        ret -= timedelta(hours=int(t[27:30]))
-    return ret
-
-def abridge(text, link):
-    logging.debug('mention text: %s', pp.pformat(text))
-    # initialise
-    truncated = False
-    # pull out "blockquote" tags and replace with quotes
-    abridged = re.sub(r'<blockquote>', '', text)
-    abridged = re.sub(r'</blockquote>', '... ', abridged)
-    # pull out all the html tags
-    abridged = re.sub(r'<[^>]*>', '', abridged)
-    # remove any escaped tags
-    abridged = h.unescape(abridged)
-    # remove square brackets (link anchors)
-    abridged = re.sub(r'\[|]', ' ', abridged)
-    # remove | char
-    abridged = re.sub(r'\|', '', abridged)
-    # remove multiple spaces
-    abridged = re.sub(r'\s+', ' ', abridged)
-    # remove line feeds and non-breaking spaces
-    abridged = abridged.replace('&nbsp;', ' ')
-    abridged = abridged.replace('\n', ' ')
-    # abridge to message_length characters + ... (i.e. 3)
-    # + 'Link added - ' (i.e. 13)
-    # + url of max 32 char, or 48
-    i = len(abridged)
-    if i > (message_length - 48):
-        i = (message_length - 48)
-        while abridged[i] != ' ' and i > 0:
-            i -= 1
-        abridged = abridged[:i] + '...'
-        truncated = True
-    # prepend link:
-    abridged = 'Annotation: ' + link + ' - ' + abridged
-    return (abridged, truncated)
-
-def save_mentions(tag, reference_tags):
-    mentions = {}
-    # find all of hypothesis
-    rss = feedparser.parse(hypothesis_rss_url + tag)
-
-    # find the channel title
-    feedtitle = rss['channel']['title']
-
-    items = rss['items']
-    # reverse them, so oldest is first
-    items.reverse()
-
-    # for each item in RSS check if it has one (or more) of our tags
-    for item in items:
-        # is this an error item? If so, bail
-        if item['title'] == 'RSS Error':
-            break
-        #
-        dt = dt_parse(item['published'])
-        we_timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
-        #print we_timestamp
-        if we_timestamp <= lasttime:
-            continue
-        seconds = time.mktime(dt.timetuple())
-        # check if we've seen the gid before...
-        if have_mention(item['id'], tag):
-            continue
-        # strip out HTML markup before abridging, so we don't stop midtag
-        body = item['title'] + ' - ' + item['summary']
-        (abridged, truncated) = abridge(body, item['link'])
-        # get author's name
-        author = item['author_detail']['name']
-        # username
-        username = item['author']
-        # create the mention object
-        mention = {
-            'user': {
-                'name': author,
-                'username': username,
-                'feed_url': hypothesis_rss_url_user + username,
-                'profile_url': hypothesis_url + '/users/' + username
-            },
-            'from_user_name': author,
-            'created_at': item['published'],
-            'text': abridged,
-            'truncated': truncated,
-            'id': item['id'],
-            #'profile_url': item['author'],
-            'we_source': 'hypothesis',
-            'we_feed': '%s' % (feedtitle),
-            'we_tags': [tag],
-            'we_timestamp': we_timestamp,
-            'we_link': item['link']
-        }
-        logging.debug(pp.pformat(mention))
-        logging.info('adding %s', item['id'])
-        logging.info('===========')
-        if not DryRun:
-            logging.info('writing mention %s', item['id'])
-            try:
-                id, rev = db.save(mention)
-                logging.debug('id = %s, rev = %s', id, rev)
-                # add to all_mentions to avoid getting this one again
-                all_mentions[item['id']] = id
-            except:
-                logging.exception('save of item %s failed!', item['id'])
-
-# build a dictionary, indexed by ID so we can quickly merge we_tags
-for tag in reference_tags:
-    try:
-        logging.debug('trying tag %s', tag)
-        # harvest any new mentions for a given tag
-        save_mentions(tag, reference_tags)
-    except urllib2.HTTPError:
-        logging.exception("hypothesis: unable to fetch tag %s", tag)
-        pass
-logging.info('Done.\n')
diff --git a/id.py b/id.py
deleted file mode 100644
index 14a76afeb99a69e6c7cef223188dc6b30d588b70..0000000000000000000000000000000000000000
--- a/id.py
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2012 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-from datetime import datetime
-import cookielib
-import urllib, urllib2
-import couchdb
-import sys
-import json
-
-def formatDent(dent, tag):
-    timestamp = dent['created_at']
-    dt = datetime.strptime(dent['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
-    dent['we_timestamp'] = dt.isoformat() + '.000Z'
-    print dent['we_timestamp']
-    dent['we_source'] = 'identica'
-    dent['we_tags'] = [tag]
-    return dent
-
-def getDents(newDents, tag, sinceID):
-    url = 'http://identi.ca/api/statusnet/tags/timeline/%s.json?since_id=%d' % (tag, sinceID)
-    response = urllib2.urlopen(url)
-    dents = json.loads(response.read())
-    for dent in dents:
-        if dent['id'] > sinceID:
-            id = dent['id']
-            if newDents.has_key(id):
-                print "appending tag", tag
-                newDents[id]['we_tags'].append(tag)
-            else:
-                print "new interesting dent", id, tag
-                newDents[id] = formatDent(dent, tag)
-
-    return
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('./options.json', 'rt'))
-
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# get the last ID for each of the tags
-stats = db.view('ids/identica_stats', group=True)
-last = {}
-for row in stats:
-    last[row.key] = row.value['max']
-
-# build a dictionary, indexed by ID so we can quickly merge we_tags
-newDents = {}
-
-for tag in options['tags']:
-    try:
-        getDents(newDents, tag, last.get(tag, 0))
-    except urllib2.HTTPError:
-        print "identica: unable to fetch tag %s" % tag
-        pass
-
-# convert to a list
-newDents = newDents.values()
-if len(newDents):
-    print len(newDents), newDents
-    for doc in db.update(newDents):
-        print repr(doc)
diff --git a/makefeed.py b/makefeed.py
deleted file mode 100644
index 2a2ea3282f10c00780e4c707c9c5f3b29dcf9e3e..0000000000000000000000000000000000000000
--- a/makefeed.py
+++ /dev/null
@@ -1,217 +0,0 @@
-#!/usr/bin/python
-
-## create an Atom feed of WEnotes for a tag
-#
-# 20130712 jim@OERfoundation.org
-#
-# License: MIT
-
-import re
-import os
-import json
-import copy
-from datetime import datetime
-import couchdb
-from xml.etree.ElementTree import Element, SubElement, Comment, tostring
-import xml.dom.minidom
-from bottle import route, run, default_app, response
-
-item_count = 30
-hostname = 'wenotes.wikieducator.org'
-
-options = json.load(open('./options.json', 'rt'))
-couchurl = options['localcouch']
-dbname = options['db']
-
-couch = couchdb.Server(couchurl)
-db = couch[dbname]
-
-def prettify(x):
-    xm = xml.dom.minidom.parseString(tostring(x))
-    return xm.toprettyxml()
-
-def children(parent, elements):
-    for (e, t) in elements:
-        el = SubElement(parent, e)
-        el.text = t
-
-def unHTML(s):
-    s = re.sub(r'<[^>]*>', '', s)
-    s = s.replace('&amp;', '&')
-    s = s.replace('&#39;', "'")
-    s = s.replace('&quot;', '"')
-    return s
-
-def canonical(d):
-    r = copy.deepcopy(d)
-
-    #print r
-    source = d['we_source']
-    user = d.get('user', d.get('from_user', ''))
-    r['user'] = user
-    if source == 'wikieducator':
-        r['profileURL'] = 'http://WikiEducator.org/User:' + user
-        r['profileURL'] = r['profileURL'].replace(' ', '_')
-        r['profileIMG'] = r['profile_image_url']
-        r['we_link'] = 'http://wikieducator.org/Special:WEnotes?wenoteid=%s' % (r['_id'])
-    elif source == 'twitter':
-        r['profileURL'] = 'http://twitter.com/' + user
-        r['profileIMG'] = r['profile_image_url']
-        r['we_link'] = r['profileURL'] + '/status/' + r['id_str']
-    elif source == 'identica':
-        pass
-    elif source == 'g+' or source == 'gplus':
-        #for k,v in r.items():
-        #    print k,'=',v
-        actor = d['actor']
-        r['text'] = d['object']['content']
-        r['user'] = ''
-        r['from_user_name'] = actor['displayName']
-        r['profileURL'] = actor['url'].replace('https://', 'http://')
-        r['profileIMG'] = actor['image']['url'].replace(
-                'https://', 'http://')
-        r['we_link'] = d['url']
-        r['title'] = unHTML(r['title'])
-    elif source == 'feed':
-        r['profileURL'] = d['profile_url']
-        r['profileURL'] = r['profileURL'].replace(' ', '_')
-        if r['profileURL'] == '' and d.get('gravatar', '') <> '':
-            r['profileURL'] = 'http://gravatar.com/' + d['gravatar']
-        r['profileIMG'] = d['profile_image_url']
-        if r['profileIMG'] == '' and d.get('gravatar', '') <> '':
-            r['profileIMG'] = 'http://gravatar.com/avatar/' + \
-                    d['gravatar'] + '?d=identicon'
-    elif source == 'ask':
-        pass
-    elif source == 'moodle':
-        pass
-    r['text'] = unHTML(r['text'])
-
-    # if we still don't have a profileIMG, but have a gravatar hash use it
-    if r['profileIMG'] == '' and r.has_key('gravatar') \
-        and r['gravatar'] <> '':
-            r['profileIMG'] = 'http://www.gravatar.com/avatar/%s?s=48&d=identicon' % r['gravatar']
-    return r
-
-def return_feed(tag, kinds, item_count=20):
-    kinds = kinds.split(',')
-    # latest update
-    latest = '2010-01-01T00:00:00Z'
-    now = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
-
-    root = Element('feed')
-    root.set('xmlns', 'http://www.w3.org/2005/Atom')
-    root.set('xml:lang', 'en')
-    root.set('xml:base', 'http://OERfoundation.org')
-    root.set('xmlns:gd', 'http://schemas.google.com/g/2005')
-
-    children(root, [
-        ('title', 'WEnotes %s' % tag),
-        ('id', 'http://wenotes.wikieducator.org/atom/%s' % tag),
-        ('generator', 'WEnotes 0.1.0'),
-        ('logo', 'http://t.oerfoundation.org/images/OERu_logo_1420x710.png'),
-        ('icon', 'http://t.oerfoundation.org/images/OERu_logo.ico')
-        ])
-    # updated.text will be filled in after we've scanned through entries
-    updated = SubElement(root, 'updated')
-    link = SubElement(root, 'link')
-    link.set('href', 'http://wikieducator.org/%s' % tag)
-    linkself = SubElement(root, 'link')
-    linkself.set('rel', 'self')
-    if '*' in kinds:
-        linkself.set('href', 'http://%s/atom/%s' % (hostname,tag))
-    else:
-        linkself.set('href', 'http://%s/atom/%s/%s' %
-               (hostname, tag, ",".join(kinds)))
-
-    if 'blog' in kinds:
-        kinds.append('feed')
-    if 'gplus' in kinds:
-        kinds.append('g+')
-    taglc = tag.lower()
-    mentions = db.view('messages/tag_time', startkey=[taglc, '2099-12-31T00:00:00.000Z'], endkey=[taglc, '2011-01-01T00:00:00.000Z'], descending=True, include_docs=True)
-    for item in mentions:
-        if item.has_key('we_d'):
-            continue
-        if (not '*' in kinds) and (not item.doc['we_source'] in kinds):
-            continue
-        doc = canonical(item.doc)
-        entry = SubElement(root, 'entry')
-        source = doc['we_source']
-        if source == 'feed':
-            source = 'blog'
-        if doc.has_key('title'):
-            tstr = doc['title']
-        else:
-            words = doc['text'].split(' ')
-            words = filter(lambda w: len(w)>0 and w[0]<>'@' and w<>'RT', words)
-            tstr = " ".join(words[:5])
-        # don't allow a zero length title
-        if len(tstr) == 0:
-            tstr = doc['from_user_name']
-        if kinds == '*':
-            tstr += ' (%s)' % source
-        title = tstr
-        children(entry, [
-            ('id', doc['we_link']),
-            ('title', title),
-            ('updated', doc['we_timestamp']),
-            ('summary', doc['text']),
-            ('content', doc['text'])
-            ])
-        if doc['we_timestamp'] > latest:
-            latest = doc['we_timestamp']
-        link = SubElement(entry, 'link')
-        link.set('href', doc['we_link'])
-        linkalt = SubElement(entry, 'link')
-        linkalt.set('rel', 'alternate')
-        linkalt.set('type', 'text/html')
-        linkalt.set('href', doc['we_link'])
-
-        author = SubElement(entry, 'author')
-        children(author, [
-            ('name', doc['from_user_name']),
-            ('uri', doc['profileURL']),
-            ('email', 'noreply@OERfoundation.org'),
-            ])
-        image = SubElement(author, 'gd:image')
-        image.set('rel', 'http://schemas.google.com/g/2005#thumbnail')
-        image.set('src', doc['profileIMG'])
-
-        category = SubElement(entry, 'category')
-        category.set('term', tag)
-
-        item_count -= 1
-        if item_count <= 0:
-            break
-
-    updated.text = latest
-
-    #print prettify(root)
-    return prettify(root)
-    return '<?xml version="1.0" encoding="utf-8"?>\n' + tostring(root)
-
-@route('/atom/<tag>')
-def feed(tag='wenotes'):
-    global item_count
-    #for k,v in os.environ.items():
-    #    print k,"=",v
-    #print "tag %s" % tag
-    print tag
-    response.content_type = 'application/atom+xml; charset=utf-8'
-    return return_feed(tag, '*', item_count)
-
-@route('/atom/<tag>/<kinds>')
-def typefeed(tag='wenotes', kinds='*'):
-    global item_count
-    print tag, kinds
-    response.content_type = 'application/atom+xml; charset=utf-8'
-    return return_feed(tag, kinds, item_count)
-
-if __name__ == "__main__":
-    # Interactive mode
-    run(host='10.10.10.2', port=3000)
-else:
-    # Mod WSGI launch
-    #os.chdir(os.path.dirname(__file__))
-    application = default_app()
diff --git a/mastodon.py b/mastodon.py
deleted file mode 100644
index 4f4f7706d8264659ba82fa12a326674ddd9fe313..0000000000000000000000000000000000000000
--- a/mastodon.py
+++ /dev/null
@@ -1,336 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2017 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-from datetime import datetime
-import cookielib
-import urllib, urllib2
-import couchdb
-# to deal with nasty characters included in Hypothesis quotes
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-# others
-import json
-# for debugging
-import logging
-import pprint
-import random
-
-# Defaults
-DEBUG = False
-
-# return a tuple (text, truncated) of abridged text and truncated flag
-def abridge(text):
-    truncated = False
-    # strip out HTML comments (and MSOffice conditionals)
-    abridged = re.sub(r'<!--.*?-->', '', text)
-    # strip out HTML markup before abridging,
-    #   so we don't stop midtag
-    abridged = re.sub(r'<[^>]*>', '', abridged)
-    abridged = re.sub(r'\s*by [^.]+\.\n?', '', abridged)
-    abridged = abridged[:500].strip()
-    abridged = abridged.replace('&nbsp;', ' ')
-    abridged = abridged.replace('&#8211;', "--")
-    abridged = abridged.replace('&#8216;', "'")
-    abridged = abridged.replace('&#8217;', "'")
-    abridged = abridged.replace('&#8220;', '"')
-    abridged = abridged.replace('&#8221;', '"')
-    abridged = abridged.replace('&#8230;', "...")
-    abridged = abridged.replace('&#38;', "&")
-    abridged = abridged.replace('\n', ' ')
-    # get rid of multiple spaces (which the above may have introduced)
-    abridged = re.sub(r'  +', ' ', abridged)
-    i = len(abridged)
-    if i > 200:
-        i = 200
-        while abridged[i] != ' ' and i > 0:
-            i -= 1
-        abridged = abridged[:i] + '...'
-        truncated = True
-    return (abridged, truncated)
-
-def formatToot(toot, tag, instance, instance_url):
-    timestamp = toot['created_at']
-    dt = datetime.strptime(toot['created_at'][0:18], '%Y-%m-%dT%H:%M:%S')
-    toot['we_timestamp'] = dt.isoformat() + '.000Z'
-    #logging.debug('timestamp: %s', toot['we_timestamp'])
-    toot['we_source'] = 'mastodon'
-    toot['instance'] = instance
-    toot['instance_url'] = instance_url
-    toot['we_tags'] = [tag]
-    # convert native structure to compatible structure for WEnotes.js
-    #logging.debug('unabridged content: %s' % toot['content'])
-    (abridged, truncated) = abridge(toot['content']) # strip HTML tags
-    #logging.debug('abridged content: %s' % abridged)
-    toot['text'] = abridged
-    toot['truncated'] = truncated
-    logging.debug('user URL: %s', toot['account']['url'])
-    toot['profile_url'] = toot['account']['url']
-    logging.debug('toot text: %s', pp.pformat(toot['text']))
-    toot['user'] = {}
-    toot['user']['screen_name'] = toot['account']['username']
-    toot['user']['name'] = toot['account']['display_name']
-    toot['user']['profile_image_url'] = toot['account']['avatar_static']
-    return toot
-
-
-def profileToot(toot):
-    #profile = str(toot['account']['id'])+' :: '+str(toot['created_at'])+' :: '+toot['content']
-    #profile = str(toot['account']['id'])+' :: '+str(toot['created_at'])[:-5]+' :: '+toot['content']
-    #profile = str(toot['account']['id'])+' :: '+str(toot['created_at'])[:-5]
-    #profile = str(toot['created_at'])[:-5]
-    #profile = str(toot['uri']+'::'+toot['text'])
-    profile = str(toot['text'])
-    final = abs(hash(profile))
-    logging.debug('profile for toot %s (%s): %s', str(toot['id']), final, profile)
-    return final
-
-# a URL is in the form of https://mastodon.milll.ws/@lightweight
-def instanceUrlFromAccountUrl(toot):
-    url = toot['profile_url']
-    x = re.search(r"(http\w?)://(.*)/@", url)
-    instance_url = x.group(1)+'://'+x.group(2)
-    #logging.debug('instance URL: %s', instance_url)
-    return instance_url
-        
-def findTootIdWithSameProfile(toots, find_profile, not_id):
-    for id in toots:
-        toot = toots[id]
-        profile = profileToot(toot)
-        logging.debug('not_id %s = %s vs. compare to toot id %s = %s', not_id, find_profile, id, profile)
-        id = toot['id']
-        if profile == find_profile and not_id != id:
-            logging.debug('found a duplicate for %s -> %s', not_id, id)
-            return id
-    return False
-        
-# return True if the Mastodon instance url provided is among the instances being scanned, False if not.
-def originInstanceIncluded(instanceUrl):
-    included = False;
-    for instance in instances:
-        position = instances[instance].find(instanceUrl)
-        #logging.debug('checking position %d of %s in %s', position, instanceUrl, instances[instance])
-        # position is -1 if the instanceUrl *isn't* in instances[instance]
-        if position >= 0:
-            included = True;
-    return included
-
-# get toots for a given tag from a given instance
-def getToots(newToots, instance, instance_url, tag, sinceID):
-    rss_url = instance_url + options['mastodon']['rss_path']
-    url = '%s/%s?since_id=%d' % (rss_url, tag, int(sinceID))
-    response = urllib2.urlopen(url)
-    toots = json.loads(response.read())
-    #logging.debug("processing %s.", tag)
-    # for all the toots for the tag, see if any are of interest!
-    for toot in toots:
-        #logging.debug('checking on %s for %d against max %d', rss_url, int(toot['id']), int(sinceID))
-        # if the toot id is more recent (i.e. greater than) the last harvested toot from that instance
-        # process it further... 
-        if int(toot['id']) > int(sinceID):
-            # get the toot's ID
-            id = str(toot['id'])
-            logging.debug('working with id %s', id)
-            # if that ID is already represented in the list of 'newToots'
-            if newToots.has_key(id):              
-                count = len(newToots[id])
-                logging.debug('another Toot with id = %s and instance %s', id, instance)
-                #logging.debug('length of newToots[%s] = %d, details: %s', id, count, pp.pformat(newToots[id]))
-                if newToots[id]:
-                    #logging.debug('value of newToots[%s][%s] = %s', id, instance, pp.pformat(newToots[id][instance]))
-                    if newToots[id].has_key('we_tags'):
-                        logging.debug("appending tag %s", tag)
-                        newToots[id]['we_tags'].append(tag)
-                    else:
-                        logging.debug('weird - id %s was already harvested for %s, but has no we_tags', id, instance)
-                else:     
-                    logging.debug('adding new toot for id %s for instance %s', id, instance);
-                    newToots[id].append(formatToot(toot, tag, instance, instance_url))
-
-            # if not, create a new element with an array, and assign this toot object as the 0th one
-            else:
-                #newToots[id] = []
-                #newToots[id][0]['we_tags'].append(tag)
-                #newToots[id] = [toot]
-                newToots[id] = formatToot(toot, tag, instance, instance_url)
-                logging.debug('adding new toot for id %s', id);
-                #logging.info("new interesting toot %s (%s)", id, tag)
-    return
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('../config/options.json', 'rt'))
-
-#logging configuration
-if DEBUG:
-    LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-else:
-    LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/mastodon.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-logging.info('Starting run')
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# couch database
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-#reference_tags = [u'ds4oers', u'oeru', u'wenotes']
-#
-# we now want to randomise the order of tags, as in prior runs,
-# some tags might not have been tested due to rate limiting on
-# the Mastodon instances, so we have to make sure they get checked
-# the next time.
-random.shuffle(reference_tags)
-
-logging.debug("tags we're looking for: %s", reference_tags)
-# set header this app will report itself as
-headers = {'User-Agent' : 'WEnotes-Mastodon/0.5'}
-
-def maxIdForInstance(instance):
-    max_id = 0
-    for row in db.view('ids/mastodon_instance_max_id'):
-        #logging.debug('instance: %s', pp.pformat(row))
-        #logging.debug('instance id: %s', pp.pformat(row.key))
-        if (row.key == instance and row.value > max_id):
-            #logging.debug('instance max id: %d', int(row.value))
-            max_id = row.value
-    return max_id
-
-# build a dictionary, indexed by ID so we can quickly merge we_tags
-TootsToSave = []
-newToots = {}
-instances = options['mastodon']['instances']
-logging.info('starting run...')
-logging.debug('listed instances: %s', pp.pformat(instances))
-# rotate among instances for each tag to distribute API hits across the instances
-# to reduce the likelihood of tripping any API rate limits (see https://docs.joinmastodon.org/api/rate-limits/)
-for tag in reference_tags:
-    for instance, instance_url in instances.items():
-        instance_max_id = maxIdForInstance(instance)
-        try:
-            logging.debug("Checking instance %s for tag %s", instance_url, tag)
-            getToots(newToots, instance, instance_url, tag, instance_max_id)
-        except urllib2.HTTPError:
-            logging.warning("mastodon: unable to fetch tag %s from instance %s - exceeding rate limits?", tag, instance_url)
-            pass
-    # convert Toots to a list
-logging.debug('The number of toots found: %d', len(newToots))
-profileMap = {}
-tootCount = len(newToots)
-if tootCount > 0:
-    count = 0
-
-    for id in newToots:
-        toot = newToots[id]
-        count += 1
-        #logging.debug('looking at toot %s = %s', id, pp.pformat(toot))
-        #id = int(toot['id'])
-        profile = profileToot(toot)
-        logging.debug('profile = %d', int(profile))
-        if profileMap.has_key(profile):
-            profileMap[profile].append(id)
-        else:
-            profileMap[profile] = [id]
-
-    logging.debug('profileMap: %s', str(profileMap))
-    kept_count = 0
-    internal_count = 0
-    external_count = 0
-    popped_count = 0
-    for profile in profileMap:
-        ids = profileMap[profile]
-#        if len(ids) > 1:
-        logging.debug('%d toots for profile %s', len(ids), str(ids))
-        count = 0
-        kept = False
-        for id in ids:
-            count += 1
-            acct = newToots[id]['account']['acct']
-            logging.debug('%d. Toot %s, acct %s', count, id, acct)
-            # check if the author URL has an "@" in it, which means this toot originates at a
-            # separate instance... If there's no @, it's been harvested from the current instance
-            result = acct.split('@')
-            # if the number of array elements is less than 2, it means no '@' was detected, so the reference
-            # comes from one of our scanned instances - so we'll go with that
-            if len(result) < 2 and not kept:
-                logging.debug('%s is a keeper (internal)', id)
-                kept = True
-                kept_count += 1
-                internal_count += 1
-            # if we've already kept one of the posts from this profile, discard the rest.
-            elif kept:                    
-                logging.debug('discard %s (because previous kept)', id)
-                newToots.pop(id)
-                popped_count += 1
-                if not newToots.has_key(id):
-                    logging.debug('popped id %s...', id)
-
-            # otherwise, check if the toot is a) an external toot from an instance we're not checking,
-            # in which case we'll keep it, or b) if it's a toot from an instance we're checking but
-            # isn't the orignal, in which case discard it
-            else:
-                #logging.debug('the result = %s', str(result))
-                # check if the source is not an instance we're polling
-                if len(result) > 1 and not originInstanceIncluded(result[1]):
-                    logging.debug('%s is an domain we\'re not scanning!', result[1])
-                    logging.debug('++++++++++++ %s is a keeper (external) with profile %s and %s', id, profile, repr(profileMap[profile]))
-                    logging.debug('============ profile: %s,  text: %s', profile, newToots[id]['text'])
-                    kept = True
-                    kept_count += 1
-                    external_count += 1
-                # if it is a reference to another instance that we're polling as well,
-                # discard it - we'll use the original one instead.
-                else:
-                    logging.debug('discard %s (default)', id)
-                    newToots.pop(id)
-                    popped_count += 1
-                    if not newToots.has_key(id):
-                        logging.debug('popped id %s...', id)
-                    
-    logging.debug('Kept %d out of %d Toots (%d internal, %d external), popped %d', kept_count, tootCount, internal_count, external_count, popped_count)
-logging.debug('finishing with %d newToots to publish! ids: %s', len(newToots), newToots.keys())
-
-for id in newToots:
-    toot = newToots[id]
-    #logging.debug('Working with %s: %s', id, pp.pformat(toot))
-    if DEBUG:
-        logging.info('we would have saved toot %s', id)
-    else:
-        try:
-            cid, rev = db.save(toot)
-            logging.info('saved toot %s with CouchDB id %s, revision %s', id, cid, rev) 
-        except:
-            logging.info('failed to save toot %s: %s', id, repr(toot))
-
-logging.info('End of run...\n\n')
-      
diff --git a/medium.py b/medium.py
deleted file mode 100644
index 58ddb8b47c0c053b5601368efbe87a97631a50a1..0000000000000000000000000000000000000000
--- a/medium.py
+++ /dev/null
@@ -1,323 +0,0 @@
-#!/usr/bin/python
-
-""" Harvest medium feed for tags specified in options file."""
-
-# Copyright 2017 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-from datetime import datetime, timedelta
-import couchdb
-import urllib, urllib2
-import json
-import feedparser
-import argparse
-import requests
-import HTMLParser
-import hashlib
-# for debugging
-import logging
-import pprint
-# to deal with nasty characters included in Hypothesis quotes
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-
-# set header this app will report itself as
-headers = {'User-Agent' : 'WEnotes-Medium/0.2'}
-
-# retrieve URL from config JSON
-options = json.load(open('../config/options.json', 'r'))
-
-# Change the database? False for no, usually when debugging
-DryRun = False
-
-#logging configuration
-#LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/medium.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# database configuration
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-#reference_tags = ['oer', 'react']
-logging.debug("reference_tags = %s", reference_tags)
-
-# the URL of the medium RSS feed
-medium_url = options["medium"]["url"]
-medium_rss_url = medium_url + '/feed/tag/'
-
-# for parsing HTML in bookmark text
-h = HTMLParser.HTMLParser()
-
-# length of content for messages
-message_length = 200;
-
-lasttime = "2000-01-01T00:00:00.000Z"
-
-# parse the RSS feed from Hypothesis
-parser = argparse.ArgumentParser(description='Harvest posts from Hypothes.is, global web annotation.')
-parser.add_argument('-f', '--full', action='store_false',
-        help='get list of categories, and then every topic in each')
-args = parser.parse_args([])
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# get all medium mentions already in our database
-# (so we can check if ones we find are already held)
-all_mentions = {}
-all_mention_ids = db.view('ids/medium')
-if len(all_mention_ids):
-    for row in all_mention_ids:
-        try:
-            logging.debug('looking for id %s', row['id'])
-            result = db.get(row['id'])
-            all_mentions[result['id']] = row['id']
-        except:
-            logging.exception('failed to get valid response from db looking for id %s', row['id'])
-else:
-    logging.debug('no previous medium mentions!')
-
-def have_mention(item_id, tag):
-    """Return boolean showing if we already have this message."""
-    logging.debug('item_id = %s', item_id)
-    try:
-        if all_mentions[item_id]:
-            logging.debug('Found id %s', item_id)
-            # now check if this is the same tag as previously associated
-            try:
-                mention = db.get(all_mentions[item_id])
-                try:
-                    if mention['we_tags']:
-                        logging.debug('we_tags = %s', mention['we_tags'])
-                        if tag in mention['we_tags']:
-                            logging.debug('we already have this mention with tag %s', tag)
-                        else:
-                            logging.debug('%s is a new tag for this mention - adding', tag)
-                            # provide the document id, not the medium id, and an array of tags
-                            add_tag_to_mention(all_mentions[item_id], tag)
-                except KeyError:
-                    logging.debug('no we_tags defined')
-            except:
-                logging.exception('failed to retrieve mention %s', all_mentions[item_id])
-            return True
-    except KeyError:
-        logging.debug('failed to find %s', item_id)
-        return False
-
-# add a tag or tags to the existing tags on a medium mention,
-# removing we_tag if set, populating we_tags with any existing tag(s) and
-# any new tags
-def add_tag_to_mention(id, tag):
-    # get the mention
-    mention = db.get(id)
-    mention['we_tags'].append(tag)
-    mention['we_tags'] = unique_tags(mention['we_tags'])
-    mention['we_tags'] = sort_tags(mention['we_tags'])
-    new_id, rev = db.save(mention)
-    logging.debug('updated mention %s (%s), added tag %s', id, new_id, tag)
-
-# ensure there aren't any duplicate tags
-# https://stackoverflow.com/questions/480214/how-do-you-remove-duplicates-from-a-list-in-whilst-preserving-order
-def unique_tags(tags):
-    seen = set()
-    seen_add = seen.add
-    return [tag for tag in tags if not (tag in seen or seen_add(tag))]
-
-# sort tags in alphabetical order
-# https://stackoverflow.com/questions/10269701/case-insensitive-list-sorting-without-lowercasing-the-result
-def sort_tags(tags):
-    return sorted(tags, key=lambda s: s.lower())
-
-# deal with the +0000 time offset, not supported by datetime
-# see https://stackoverflow.com/questions/23940551/why-z-is-not-supported-by-pythons-strptime
-def dt_parse(t):
-    ret = datetime.strptime(t[0:24], '%a, %d %b %Y %H:%M:%S')
-    if t[26]=='+':
-        ret += timedelta(hours=int(t[27:30]))
-    elif t[26]=='-':
-        ret -= timedelta(hours=int(t[27:30]))
-    return ret
-
-def abridge(text, link):
-    # initialise
-    truncated = False
-    # pull out all the html tags
-    abridged = re.sub(r'<[^>]*>', '', text)
-    # remove any escaped tags
-    abridged = h.unescape(abridged)
-    # remove square brackets (link anchors)
-    abridged = re.sub(r'\[|]', ' ', abridged)
-    # remove multiple spaces
-    abridged = re.sub(r'\s+', ' ', abridged)
-    # remove line feeds and non-breaking spaces
-    abridged = abridged.replace('&nbsp;', ' ')
-    abridged = abridged.replace('\n', ' ')
-    # abridge to message_length characters + ... (i.e. 3)
-    # + 'Link added - ' (i.e. 13)
-    # + url of max 32 char, or 48
-    i = len(abridged)
-    if i > (message_length - 48):
-        i = (message_length - 48)
-        while abridged[i] != ' ' and i > 0:
-            i -= 1
-        abridged = abridged[:i] + '...'
-        truncated = True
-    # prepend link:
-    abridged = 'Post: ' + link + ' - ' + abridged
-    return (abridged, truncated)
-
-def save_mentions(tag, reference_tags):
-    mentions = {}
-    # find all of medium
-    rss = feedparser.parse(medium_rss_url + tag)
-    logging.info('rss structure: %s', pp.pformat(rss['channel']))
-
-    # find the channel title
-    try:
-        rss['channel']
-        try: 
-            rss['channel']['title']
-            feedtitle = rss['channel']['title']
-        except KeyError:
-            logging.debug('no title for channel')
-            return False
-    except KeyError:
-        logging.debug('no channel in rss object, bailing')
-        return False
-
-    items = rss['items']
-    # reverse them, so oldest is first
-    items.reverse()
-
-    # for each item in RSS check if it has one (or more) of our tags
-    for item in items:
-        # is this an error item? If so, bail
-        if item['title'] == 'RSS Error':
-            break
-        #
-        dt = dt_parse(item['published'])
-        we_timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
-        #print we_timestamp
-        if we_timestamp <= lasttime:
-            continue
-        seconds = time.mktime(dt.timetuple())
-        # check if we've seen the gid before...
-        if have_mention(item['id'], tag):
-            continue
-        # print the content of the item
-        #logging.debug('item %s = %s', item['id'], pp.pformat(item))
-        # merge tags.
-        tags = []
-        for t in item['tags']:
-            logging.debug('t = %s', t['term'])
-            tags.append(t['term'])
-        tags = list(set(tags) & set(reference_tags))
-        # strip out HTML markup before abridging, so we don't stop midtag
-        body = item['title'] + ' - ' + item['summary']
-        (abridged, truncated) = abridge(body, item['link'])
-        # get author's name
-        author = item['author_detail']['name']
-        # username
-        pattern = re.compile('https:\/\/([.\-a-z1-9]*)\/([@._\-a-zA-Z1-9]*)',re.UNICODE)
-        custom_domain = False
-        subsite = False
-        try:
-            domain_name = pattern.search(item['link']).group(1)
-            first_path = pattern.search(item['link']).group(2)
-            logging.debug('link = %s, group 1 = %s, group 2 = %s', item['link'], domain_name, first_path)
-            if domain_name == 'medium.com':
-                # medium users start with a @
-                if first_path[0] == '@':
-                    logging.debug('a proper "@" user - a keeper')
-                    username = first_path[1:]
-                # medium "sub-sites", for collectives of bloggers have no @
-                else:
-                    # we haven't got a user... so we bail out
-                    logging.debug('not a proper "@" user, bailing')
-                    username = first_path
-                    continue
-            else:
-                # not a user we're interested in...
-                custom_domain = 'https://' + domain_name
-                continue
-        except NoneType:
-            username = 'no username'
-            logging.debug('no username, bailing')
-            continue
-        logging.debug('found username %s from link %s', username, item['link'])
-        feed_url = medium_url + '/feed/@' + username
-        profile_url = medium_url + '/@' + username
-        logging.debug('profile url %s', profile_url)
-        # create the mention object
-        mention = {
-            'user': {
-                'name': author,
-                'username': username,
-                'feed_url': feed_url,
-                'profile_url': profile_url
-            },
-            'from_user_name': author,
-            'created_at': item['published'],
-            'text': abridged,
-            'truncated': truncated,
-            'id': item['id'],
-            #'profile_url': item['author'],
-            'we_source': 'medium',
-            'we_feed': '%s' % (feedtitle),
-            'we_tags': tags,
-            'we_timestamp': we_timestamp,
-            'we_link': item['link']
-        }
-        #logging.debug(pp.pformat(mention))
-        logging.info('adding %s', item['id'])
-        logging.info('===========')
-        if not DryRun:
-            logging.info('writing mention %s', item['id'])
-            try:
-                id, rev = db.save(mention)
-                logging.debug('id = %s, rev = %s', id, rev)
-                # add to all_mentions to avoid getting this one again
-                all_mentions[item['id']] = id
-            except:
-                logging.exception('save of item %s failed!', item['id'])
-
-# build a dictionary, indexed by ID so we can quickly merge we_tags
-for tag in reference_tags:
-    try:
-        logging.debug('trying tag %s', tag)
-        # harvest any new mentions for a given tag
-        save_mentions(tag, reference_tags)
-    except urllib2.HTTPError:
-        logging.exception("medium: unable to fetch tag %s", tag)
-        pass
-logging.info('Done.\n')
diff --git a/milll-discourse.py b/milll-discourse.py
deleted file mode 100644
index c5c8276cc8457c06a7948f712c9c8672367ebbe4..0000000000000000000000000000000000000000
--- a/milll-discourse.py
+++ /dev/null
@@ -1,190 +0,0 @@
-#!/usr/bin/python
-
-""" Harvest messages from forums category URLs specified in options file."""
-
-# Copyright 2016 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-import couchdb
-import json
-import urllib
-import argparse
-import requests
-# for debugging
-import logging
-import pprint
-# to deal with nasty characters included in Hypothesis quotes
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-
-DEBUG = False
-DELAY = 0.1         # delay in seconds between Discourse hits
-MAX_TEXT_LEN = 300  # max characters before truncation
-SOURCE = "milllforum"  # the source of each mention in CouchDB
-POSTS_PER_PAGE = 20 # Discourse returns posts for a topic in lots of 20 per page
-
-# retrieve URL from config JSON
-options = json.load(open('../config/options.json', 'r'))
-
-#logging configuration
-LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-#LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/milll-forum.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-#print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# create the CouchDB object
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# some other settings.
-baseurl = options['milll']['url']
-logging.debug('baseurl = %s', pp.pformat(baseurl))
-version = '0.3'
-headers = {'User-Agent' : 'WEnotes-Forum-Discourse/%s' % (version)}
-#print headers
-
-# create the parser for returned content from Discourse
-parser = argparse.ArgumentParser(description='Harvest posts from Discourse Forums.')
-parser.add_argument('-f', '--full', action='store_false',
-        help='get list of categories, and then every topic in each')
-args = parser.parse_args([])
-
-def have_mention(msg_id):
-    """Return boolean showing if we already have this message."""
-    view = db.view('ids/milllids')
-    have = (len(view[msg_id]) > 0)
-    return have
-
-# check a tag list against our reference_tags
-def interesting_tags(tags):
-    """Return list of interesting tags, or false if none."""
-    common_tags = list(set(tags) & set(reference_tags))
-    #logging.debug("taglist: %s\nreference tags: %s\ncommon tags: %s", tags, reference_tags, common_tags)
-    if common_tags:
-        logging.debug("interesting tags: %s", common_tags)
-        return common_tags
-    else:
-        return False
-
-if args.full:
-        # get the list of categories
-        categories = '%s/categories.json' % (baseurl)
-        time.sleep(DELAY)
-        r = requests.get(categories, headers=headers, verify=False)
-        d = json.loads(r.text)
-        cat_list = d['category_list']['categories']
-        #logging.debug("categories: %s", json.dumps(cat_list, indent=2, sort_keys=True))
-        for cat in cat_list:
-            logging.debug("category: %s(%s)", cat['name'], cat['id'])
-            topics = '%s/c/%s.json' % (baseurl, cat['id'])
-            if DEBUG:
-                print ">>>>>>>>>>>>>>>>>>>>>>>>>>>", \
-                        cat['id'], cat['name'], cat['slug'], topics
-            #logging.debug("category: %s, %s, %s, %s", cat['id'], cat['name'], cat['slug'], topics)
-            time.sleep(DELAY)
-            r2 = requests.get(topics, headers=headers, verify=False)
-            d = json.loads(r2.text)
-            topic_list = d['topic_list']['topics']
-            #logging.debug("topics: %s", json.dumps(topic_list, indent=2, sort_keys=True))
-            for topic in topic_list:
-                pages = topic['posts_count']/POSTS_PER_PAGE;
-                if (topic['posts_count'] % POSTS_PER_PAGE > 0):
-                    #print "adding a page with modulo %f" % (topic['posts_count'] % POSTS_PER_PAGE)
-                    pages += 1
-                #print "%d pages for %d posts for topic %s" % (pages, topic['posts_count'], topic['title'])
-                for page in (1,pages):
-                    posts = '%s/t/%s.json?page=%d' % (baseurl, topic['id'], page)
-                    logging.debug("topic: %s(%s), page %d", topic['title'], topic['id'], page)
-                    #logging.debug("topic %s: ", json.dumps(topic, indent=2, sort_keys=True))
-                    #logging.debug('     tags: %s', json.dumps(topic['tags'], indent=2, sort_keys=True))
-                    common_tags = interesting_tags(topic['tags'])
-                    if not common_tags:
-                        logging.debug('no interesting tags')
-                        continue
-                    time.sleep(DELAY)
-                    r3 = requests.get(posts, headers=headers, verify=False)
-                    p = json.loads(r3.text)
-                    post_list = p['post_stream']['posts']
-                    #logging.debug("post_list %s: ", json.dumps(post_list, indent=2, sort_keys=True))
-                    if True:
-                        for post in post_list:
-                            #logging.debug("post %s: ", json.dumps(post, indent=2, sort_keys=True))
-                            if post['deleted_at']:
-                                continue
-                            link = "%s/t/%s/%s" % (baseurl, post['topic_id'], post['post_number'])
-                            logging.debug('link: %s', link)
-                            if have_mention(link):
-                                logging.debug('existing link: %s', link)
-                                continue
-                            logging.debug('interesting link: %s', link)
-                            text = post['cooked'].replace('\n', ' ')
-                            text = re.sub(r'<[^>]*?>', ' ', text)   # remove HTML tags
-                            text = re.sub(r' {2,}', ' ', text)      # collapse spaces
-                            text = topic['fancy_title'].strip() + ': ' + text.strip()
-                            truncated = False
-                            i = len(text)
-                            if i > MAX_TEXT_LEN:
-                                i = MAX_TEXT_LEN
-                                while text[i] != ' ' and i > 0:
-                                    i -= 1
-                                text = text[:i] + '...'
-                                truncated = True
-                            from_user_name = post['display_username']
-                            if from_user_name == '':
-                                from_user_name = post['username']
-                            profile_image_url = post['avatar_template'].replace('{size}', '48')
-                            if not re.match(r'^(https?:)?//', profile_image_url):
-                                profile_image_url = baseurl + profile_image_url
-                            mention = {
-                                    'created_at': post['created_at'],
-                                    'from_user': post['username'],
-                                    'from_user_name': from_user_name,
-                                    'id': link,
-                                    'post_id': post['id'],
-                                    'profile_image_url': profile_image_url,
-                                    'profile_url': baseurl + '/users/' + post['username'],
-                                    'text': text,
-                                    'truncated': truncated,
-                                    'we_link': link,
-                                    'we_source': SOURCE,
-                                    'we_version': version,
-                                    'we_tags': common_tags,
-                                    'we_timestamp': post['created_at']
-                                    }
-                            if DEBUG:
-                                print json.dumps(mention, indent=2, sort_keys=True)
-                            else:
-                                logging.info('adding post %s by %s with tag(s): %s', mention['id'], from_user_name, common_tags)
-                                db.save(mention)
diff --git a/moodle.py b/moodle.py
deleted file mode 100644
index 939d3892c496acd207b5aff396f7fc06c48984ad..0000000000000000000000000000000000000000
--- a/moodle.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2012 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-from datetime import datetime
-import cookielib
-import urllib, urllib2
-import couchdb
-import feedparser
-import lxml.html
-import sys
-import json
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('./options.json', 'rt'))
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# serial number of messages retrieved this poll to uniquify ID
-serial = 0
-
-# get the last time for a moodle post in the database
-view = db.view('ids/moodle', descending=True, limit=1)
-if len(view) == 1:
-    for row in view:
-        lasttime = row.key
-else:
-    lasttime = "2000-01-01T00:00:00.000Z"
-
-feeds = options['feedsMoodle']
-
-cj = cookielib.CookieJar()
-moodle = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
-data = urllib.urlencode({'username': options['moodleuser'],
-                        'password': options['moodlepass']})
-
-li = moodle.open('http://moodle.wikieducator.org/login/index.php', data)
-
-feedno = 0
-for feed in feeds:
-    rss = feedparser.parse(feed)
-    feedtitle = rss['channel']['title']
-
-    items = rss['items']
-    items.reverse()
-
-    for item in items:
-        if item['title'] == 'RSS Error' and item['description'] == 'Error reading RSS data':
-            break
-        truncated = False
-        dt = datetime.strptime(item['date'], '%a, %d %b %Y %H:%M:%S GMT')
-        we_timestamp = dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
-        if we_timestamp <= lasttime:
-            continue
-        seconds = time.mktime(dt.timetuple())
-        # strip out HTML markup before abridging, so we don't stop midtag
-        abridged = re.sub(r'<[^>]*>', '', item['summary'])
-        abridged = re.sub(r'\s*by [^.]+\.\n?', '', abridged)
-        abridged = abridged.replace('&nbsp;', ' ')
-        abridged = abridged.replace('\n', ' ')
-        abridged = abridged.strip()
-        i = len(abridged)
-        if i > 137:
-            i = 137
-            while abridged[i] != ' ' and i > 0:
-                i -= 1
-            abridged = abridged[:i] + '...'
-            truncated = True
-
-        # fetch the original article, try to find author/img
-        f = moodle.open(item['link'])
-        html = lxml.html.parse(f).getroot()
-        authordiv = html.find_class('author')[0]
-        author = authordiv.findtext('a')
-        profile_url = authordiv.find('a').attrib['href']
-        pics = html.find_class('userpicture')
-        attrs = pics[0].attrib
-        imgurl = attrs['src']
-        mention = {
-                'from_user': author,
-                'from_user_name': author,
-                'created_at': item['date'],
-                'profile_image_url': imgurl,
-                'text': abridged,
-                'truncated': truncated,
-                'id': '%d%02d%03d' % (seconds, feedno, serial),
-                'profile_url': profile_url,
-                'we_source': 'moodle',
-                'we_feed': feedtitle,
-                'we_tags': ['oeru'],
-                'we_timestamp': we_timestamp,
-                'we_link': item['link']
-                }
-        db.save(mention)
-        serial += 1
-    feedno += 1
diff --git a/oeglobal-discourse.py b/oeglobal-discourse.py
deleted file mode 100644
index 38d6221ce022e7de8ffc877a3fc3b60b77964408..0000000000000000000000000000000000000000
--- a/oeglobal-discourse.py
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/usr/bin/python
-
-""" Harvest messages from OEGlobal's connect forum category URLs specified in options file."""
-
-# Copyright 2016 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-import couchdb
-import json
-import urllib
-import argparse
-import requests
-# for debugging
-import logging
-import pprint
-# to deal with nasty characters included in Hypothesis quotes
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-
-DEBUG = False
-DELAY = 0.1         # delay in seconds between Discourse hits
-MAX_TEXT_LEN = 300  # max characters before truncation
-SOURCE = "connectoeglobal"  # the source of each mention in CouchDB
-POSTS_PER_PAGE = 20 # Discourse returns posts for a topic in lots of 20 per page
-
-# retrieve URL from config JSON
-options = json.load(open('../config/options.json', 'r'))
-#options = json.load(open('./options.json', 'r'))
-
-#logging configuration
-LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-#LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/connect.oeglobal.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# create the CouchDB object
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-# some other settings.
-baseurl = options['oeglobal']['url']
-logging.debug('baseurl = %s', pp.pformat(baseurl))
-version = '0.3'
-headers = {'User-Agent' : 'WEnotes-OEGlobal-Discourse/%s' % (version)}
-print headers
-
-# create the parser for returned content from Discourse
-parser = argparse.ArgumentParser(description='Harvest posts from Discourse Forums.')
-parser.add_argument('-f', '--full', action='store_false',
-        help='get list of categories, and then every topic in each')
-args = parser.parse_args([])
-
-def have_mention(msg_id):
-    """Return boolean showing if we already have this message."""
-    view = db.view('ids/oeglobalconnect')
-    have = (len(view[msg_id]) > 0)
-    return have
-
-# check a tag list against our reference_tags
-def interesting_tags(tags):
-    """Return list of interesting tags, or false if none."""
-    common_tags = list(set(tags) & set(reference_tags))
-    logging.debug("taglist: %s\nreference tags: %s\ncommon tags: %s", tags, reference_tags, common_tags)
-    if common_tags:
-        logging.debug("interesting tags: %s", common_tags)
-        return common_tags
-    else:
-        return False
-
-if args.full:
-    # get the list of categories
-    categories = '%s/categories.json' % (baseurl)
-    time.sleep(DELAY)
-    r = requests.get(categories, headers=headers, verify=False)
-    d = json.loads(r.text)
-    cat_list = d['category_list']['categories']
-    logging.debug("categories: %s", json.dumps(cat_list, indent=2, sort_keys=True))
-    for cat in cat_list:
-        logging.debug("category: %s(%s)", cat['name'], cat['id'])
-        topics = '%s/c/%s.json' % (baseurl, cat['id'])
-        if DEBUG:
-            print ">>>>>>>>>>>>>>>>>>>>>>>>>>>", \
-            cat['id'], cat['name'], cat['slug'], topics
-        logging.debug("category: %s, %s, %s, %s", cat['id'], cat['name'], cat['slug'], topics)
-        time.sleep(DELAY)
-        r2 = requests.get(topics, headers=headers, verify=False)
-        d = json.loads(r2.text)
-        topic_list = d['topic_list']['topics']
-        logging.debug("topics: %s", json.dumps(topic_list, indent=2, sort_keys=True))
-        for topic in topic_list:
-            pages = topic['posts_count']/POSTS_PER_PAGE;
-            if (topic['posts_count'] % POSTS_PER_PAGE > 0):
-                print "adding a page with modulo %f" % (topic['posts_count'] % POSTS_PER_PAGE)
-                pages += 1
-            print "%d pages for %d posts for topic %s" % (pages, topic['posts_count'], topic['title'])
-            for page in (1,pages):
-                posts = '%s/t/%s.json?page=%d' % (baseurl, topic['id'], page)
-                logging.debug("topic: %s(%s), page %d", topic['title'], topic['id'], page)
-                logging.debug("topic %s: ", json.dumps(topic, indent=2, sort_keys=True))
-                logging.debug('     tags: %s', json.dumps(topic['tags'], indent=2, sort_keys=True))
-                common_tags = interesting_tags(topic['tags'])
-                if not common_tags:
-                    logging.debug('no interesting tags')
-                    continue
-                time.sleep(DELAY)
-                r3 = requests.get(posts, headers=headers, verify=False)
-                p = json.loads(r3.text)
-                post_list = p['post_stream']['posts']
-                logging.debug("post_list %s: ", json.dumps(post_list, indent=2, sort_keys=True))
-                if True:
-                    for post in post_list:
-                        logging.debug("post %s: ", json.dumps(post, indent=2, sort_keys=True))
-                        if post['deleted_at']:
-                            continue
-                        link = "%s/t/%s/%s" % (baseurl, post['topic_id'], post['post_number'])
-                        logging.debug('link: %s', link)
-                        if have_mention(link):
-                            logging.debug('existing link: %s', link)
-                            continue
-                        logging.debug('interesting link: %s', link)
-                        text = post['cooked'].replace('\n', ' ')
-                        text = re.sub(r'<[^>]*?>', ' ', text)   # remove HTML tags
-                        text = re.sub(r' {2,}', ' ', text)      # collapse spaces
-                        text = topic['fancy_title'].strip() + ': ' + text.strip()
-                        truncated = False
-                        i = len(text)
-                        if i > MAX_TEXT_LEN:
-                            i = MAX_TEXT_LEN
-                            while text[i] != ' ' and i > 0:
-                                i -= 1
-                            text = text[:i] + '...'
-                            truncated = True
-                        from_user_name = post['display_username']
-                        if from_user_name == '':
-                            from_user_name = post['username']
-                        profile_image_url = post['avatar_template'].replace('{size}', '48')
-                        if not re.match(r'^(https?:)?//', profile_image_url):
-                            profile_image_url = baseurl + profile_image_url
-                            mention = {
-                                'created_at': post['created_at'],
-                                'from_user': post['username'],
-                                'from_user_name': from_user_name,
-                                'id': link,
-                                'post_id': post['id'],
-                                'profile_image_url': profile_image_url,
-                                'profile_url': baseurl + '/users/' + post['username'],
-                                'text': text,
-                                'truncated': truncated,
-                                'we_link': link,
-                                'we_source': SOURCE,
-                                'we_version': version,
-                                'we_tags': common_tags,
-                                'we_timestamp': post['created_at']
-                                }
-                            if DEBUG:
-                                print json.dumps(mention, indent=2, sort_keys=True)
-                            else:
-                                logging.info('adding post %s by %s with tag(s): %s', mention['id'], from_user_name, common_tags)
-                                db.save(mention)
diff --git a/samples/forums_tags.sample b/samples/forums_tags.sample
new file mode 100644
index 0000000000000000000000000000000000000000..4d8dade193f7128ce85a03902f7272d9cc4703bd
--- /dev/null
+++ b/samples/forums_tags.sample
@@ -0,0 +1,475 @@
+{   u'primary_groups': [],
+    u'topic_list': {   u'can_create_topic': False,
+                       u'draft': None,
+                       u'draft_key': u'new_topic',
+                       u'draft_sequence': None,
+                       u'per_page': 30,
+                       u'tags': [   u'ccom102',
+                                    u'ccom101',
+                                    u'ipm104',
+                                    u'csf101',
+                                    u'ccom103',
+                                    u'art101',
+                                    u'lida103',
+                                    u'lida102',
+                                    u'ient101',
+                                    u'lida101',
+                                    u'whme103',
+                                    u'whme102',
+                                    u'ient102',
+                                    u'lida104',
+                                    u'whme101',
+                                    u'ipm101',
+                                    u'support',
+                                    u'ipm103',
+                                    u'rrap101',
+                                    u'askoeru',
+                                    u'ipm102',
+                                    u'technology',
+                                    u'rrap102',
+                                    u'pman102',
+                                    u'rrap103',
+                                    u'rrap104',
+                                    u'pman101',
+                                    u'pman103',
+                                    u'mae101',
+                                    u'csf103'],
+                       u'topics': [   {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T19:55:34.356Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T19:55:34.278Z',
+                                          u'fancy_title': u'CCOM102 Discussion Forum: Course Wrap',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 259,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T19:55:34.356Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-discussion-forum-course-wrap',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Discussion Forum: Course Wrap',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 55,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T19:52:50.172Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T19:52:50.092Z',
+                                          u'fancy_title': u'CCOM102 Introducing a speaker',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 258,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T19:52:50.172Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-discussion-forum-course-wrap',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Discussion Forum: Course Wrap',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 55,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T19:52:50.172Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T19:52:50.092Z',
+                                          u'fancy_title': u'CCOM102 Introducing a speaker',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 258,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T19:52:50.172Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-introducing-a-speaker',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Introducing a speaker',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 48,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T19:49:20.342Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T19:49:20.210Z',
+                                          u'fancy_title': u'CCOM102 Effective Sound Bites',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 257,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T19:49:20.342Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-effective-sound-bites',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Effective Sound Bites',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 57,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T19:43:50.318Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T19:43:50.245Z',
+                                          u'fancy_title': u'CCOM102 Discussion Forum: Body Language Analysis',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 256,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T19:43:50.318Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-discussion-forum-body-language-analysis',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Discussion Forum: Body Language Analysis',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 62,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T19:41:33.551Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T19:41:33.419Z',
+                                          u'fancy_title': u'CCOM102 Elevator Speech',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 255,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T19:41:33.551Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-elevator-speech',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Elevator Speech',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 58,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T18:46:45.255Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T18:46:45.152Z',
+                                          u'fancy_title': u'CCOM102 Ethical vs. Unethical Persuasion',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 254,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T18:46:45.255Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-ethical-vs-unethical-persuasion',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Ethical vs. Unethical Persuasion',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 61,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T18:40:08.637Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T18:40:08.511Z',
+                                          u'fancy_title': u'CCOM102 Informative Presentation Challenge',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 253,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T18:40:08.637Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-informative-presentation-challenge',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Informative Presentation Challenge',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 50,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T18:15:39.947Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T18:15:39.853Z',
+                                          u'fancy_title': u'CCOM102 Discussion Forum - Informative Presentation Outline',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 252,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T18:15:39.947Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-discussion-forum-informative-presentation-outline',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Discussion Forum - Informative Presentation Outline',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 53,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T18:00:40.047Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T18:00:39.973Z',
+                                          u'fancy_title': u'CCOM102 Communication Style Flexing Challenge',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 251,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T18:00:40.047Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-communication-style-flexing-challenge',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Communication Style Flexing Challenge',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 52,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-04-24T17:59:04.952Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-04-24T17:59:04.868Z',
+                                          u'fancy_title': u'CCOM102 Discussion forum - public speaking strategies',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 250,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-04-24T17:59:04.952Z',
+                                          u'last_poster_username': u'nsingular',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 39}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom102-discussion-forum-public-speaking-strategies',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM102 Discussion forum - public speaking strategies',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 56,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2017-02-03T23:37:20.855Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2017-02-03T23:37:20.799Z',
+                                          u'fancy_title': u'CCOM 102: Creating Effective Business Presentations',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 204,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2017-02-03T23:37:20.855Z',
+                                          u'last_poster_username': u'gmorong',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 30}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom-102-creating-effective-business-presentations',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM 102: Creating Effective Business Presentations',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 66,
+                                          u'visible': True},
+                                      {   u'archetype': u'regular',
+                                          u'archived': False,
+                                          u'bookmarked': None,
+                                          u'bumped': True,
+                                          u'bumped_at': u'2016-10-11T20:48:21.319Z',
+                                          u'category_id': 22,
+                                          u'closed': False,
+                                          u'created_at': u'2016-09-15T22:00:29.913Z',
+                                          u'fancy_title': u'CCOM 102 Creating Effective Business Presentations',
+                                          u'featured_link': None,
+                                          u'has_summary': False,
+                                          u'highest_post_number': 1,
+                                          u'id': 141,
+                                          u'image_url': None,
+                                          u'last_posted_at': u'2016-09-15T22:00:31.987Z',
+                                          u'last_poster_username': u'mackiwg',
+                                          u'like_count': 0,
+                                          u'liked': None,
+                                          u'pinned': False,
+                                          u'pinned_globally': False,
+                                          u'posters': [   {   u'description': u'Original Poster, Most Recent Poster',
+                                                              u'extras': u'latest single',
+                                                              u'primary_group_id': None,
+                                                              u'user_id': 2}],
+                                          u'posts_count': 1,
+                                          u'reply_count': 0,
+                                          u'slug': u'ccom-102-creating-effective-business-presentations',
+                                          u'tags': [u'ccom102'],
+                                          u'title': u'CCOM 102 Creating Effective Business Presentations',
+                                          u'unpinned': None,
+                                          u'unseen': False,
+                                          u'views': 158,
+                                          u'visible': True}]},
+    u'users': [   {   u'avatar_template': u'/letter_avatar_proxy/v2/letter/n/c2a13f/{size}.png',
+                      u'id': 39,
+                      u'username': u'nsingular'},
+                  {   u'avatar_template': u'/letter_avatar_proxy/v2/letter/g/b3f665/{size}.png',
+                      u'id': 30,
+                      u'username': u'gmorong'},
+                  {   u'avatar_template': u'/user_avatar/forums.oeru.org/mackiwg/{size}/24_1.png',
+                      u'id': 2,
+                      u'username': u'mackiwg'}]}
+
diff --git a/samples/mastodon_toot.json b/samples/mastodon_toot.json
new file mode 100644
index 0000000000000000000000000000000000000000..c04498ac0cfbd379c70b10f4dc4fea694730aae0
--- /dev/null
+++ b/samples/mastodon_toot.json
@@ -0,0 +1,126 @@
+{   '108915921927907934': {   u'account': {   u'acct': u'Sohyell',
+                                                      u'avatar': u'https://mastodon.oeru.org/avatars/original/missing.png',
+                                                      u'avatar_static': u'https://mastodon.oeru.org/avatars/original/missing.png',
+                                                      u'bot': False,
+                                                      u'created_at': u'2022-01-27T00:00:00.000Z',
+                                                      u'discoverable': False,
+                                                      u'display_name': u'Sohyell',
+                                                      u'emojis': [],
+                                                      u'fields': [],
+                                                      u'followers_count': 1,
+                                                      u'following_count': 2,
+                                                      u'group': False,
+                                                      u'header': u'https://mastodon.oeru.org/headers/original/missing.png',
+                                                      u'header_static': u'https://mastodon.oeru.org/headers/original/missing.png',
+                                                      u'id': u'93556',
+                                                      u'last_status_at': u'2022-08-31',
+                                                      u'locked': True,
+                                                      u'note': u'',
+                                                      u'statuses_count': 4,
+                                                      u'url': u'https://mastodon.oeru.org/@Sohyell',
+                                                      u'username': u'Sohyell'},
+                                      u'application': None,
+                                      u'card': None,
+                                      u'content': u'<p>Bula All,</p><p>I believe this will be a great idea as the teachers will be engaged in creating original TR. This would allow the teachers to collaborate and communicate with the other teachers. Teachers would also feel challenged when they see other teachers&#39; work, which would likely motivate them to do better. </p><p>Thi
+s type of initiative will significantly enhance the teachers&#39; ability to develop TRs suitable to their individual classes.</p><p><a href="https://mastodon.oeru.org/tags/DS4OERS" class="mention hashtag" rel="tag">#<span>DS4OERS</span></a><br /><a href="https://mastodon.oeru.org/tags/thoughtactivity" class="mention hashtag" rel="tag">#<span>thoughtactivity</span></a></p><p>Shoul
+d Fiji have an OER day?</p>',
+                                      u'created_at': u'2022-08-31T05:51:18.249Z',
+                                      u'edited_at': None,
+                                      u'emojis': [],
+                                      u'favourites_count': 1,
+                                      u'id': u'108915921927907934',
+                                      u'in_reply_to_account_id': None,
+                                      u'in_reply_to_id': None,
+                                      'instance': u'mastodon.oeru',
+                                      u'language': u'en',
+                                      u'media_attachments': [],
+                                      u'mentions': [],
+                                      u'poll': {   u'emojis': [],
+                                                   u'expired': False,
+                                                   u'expires_at': u'2022-09-01T05:51:18.240Z',
+                                                   u'id': u'8062',
+                                                   u'multiple': False,
+                                                   u'options': [   {   u'title': u'Yes',
+                                                                       u'votes_count': 1},
+                                                                   {   u'title': u'No',
+                                                                       u'votes_count': 0}],
+                                                   u'voters_count': 1,
+                                                   u'votes_count': 1},
+                                      'profile_url': u'https://mastodon.oeru.org/@Sohyell',
+                                      u'reblog': None,
+                                      u'reblogs_count': 0,
+                                      u'replies_count': 0,
+                                      u'sensitive': False,
+                                      u'spoiler_text': u'',
+                                      u'tags': [   {   u'name': u'ds4oers',
+                                                       u'url': u'https://mastodon.oeru.org/tags/ds4oers'},
+                                                   {   u'name': u'thoughtactivity',
+                                                       u'url': u'https://mastodon.oeru.org/tags/thoughtactivity'}],
+                                      'text': u'Bula All,I believe this will be a great idea as the teachers will be engaged in creating original TR. This would allow the teachers to collaborate and communicate with the other teachers. Teachers...',
+                                      'truncated': True,
+                                      u'uri': u'https://mastodon.oeru.org/users/Sohyell/statuses/108915921927907934',
+                                      u'url': u'https://mastodon.oeru.org/@Sohyell/108915921927907934',
+                                      'user': {   'name': u'Sohyell',
+                                                  'profile_image_url': u'https://mastodon.oeru.org/avatars/original/missing.png',
+                                                  'screen_name': u'Sohyell'},
+                                      u'visibility': u'public',
+                                      'we_source': 'mastodon',
+                                      'we_tags': [u'ds4oers'],
+                                      'we_timestamp': '2022-08-31T05:51:01.000Z'},
+            '108919831298971808': {   u'account': {   u'acct': u'Sohyell',
+                                                      u'avatar': u'https://mastodon.oeru.org/avatars/original/missing.png',
+                                                      u'avatar_static': u'https://mastodon.oeru.org/avatars/original/missing.png',
+                                                      u'bot': False,
+                                                      u'created_at': u'2022-01-27T00:00:00.000Z',
+                                                      u'discoverable': False,
+                                                      u'display_name': u'Sohyell',
+                                                      u'emojis': [],
+                                                      u'fields': [],
+                                                      u'followers_count': 1,
+                                                      u'following_count': 2,
+                                                      u'group': False,
+                                                      u'header': u'https://mastodon.oeru.org/headers/original/missing.png',
+                                                      u'header_static': u'https://mastodon.oeru.org/headers/original/missing.png',
+                                                      u'id': u'93556',
+                                                      u'last_status_at': u'2022-08-31',
+                                                      u'locked': True,
+                                                      u'note': u'',
+                                                      u'statuses_count': 4,
+                                                      u'url': u'https://mastodon.oeru.org/@Sohyell',
+                                                      u'username': u'Sohyell'},
+                                      u'application': None,
+                                      u'card': None,
+                                      u'content': u'<p>Bula All,</p><p>OER has the ability to improve the classroom learning process and as a primary school teacher I believe students should also be included in the OER development process</p><p><a href="https://mastodon.oeru.org/tags/DS4OERS" class="mention hashtag" rel="tag">#<span>DS4OERS</span></a></p>',
+                                      u'created_at': u'2022-08-31T22:25:30.521Z',
+                                      u'edited_at': None,
+                                      u'emojis': [],
+                                      u'favourites_count': 0,
+                                      u'id': u'108919831298971808',
+                                      u'in_reply_to_account_id': None,
+                                      u'in_reply_to_id': None,
+                                      'instance': u'mastodon.oeru',
+                                      u'language': u'en',
+                                      u'media_attachments': [],
+                                      u'mentions': [],
+                                      u'poll': None,
+                                      'profile_url': u'https://mastodon.oeru.org/@Sohyell',
+                                      u'reblog': None,
+                                      u'reblogs_count': 0,
+                                      u'replies_count': 0,
+                                      u'sensitive': False,
+                                      u'spoiler_text': u'',
+                                      u'tags': [   {   u'name': u'ds4oers',
+                                                       u'url': u'https://mastodon.oeru.org/tags/ds4oers'}],
+                                      'text': u'Bula All,OER has the ability to improve the classroom learning process and as a primary school teacher I believe students should also be included in the OER development process#DS4OERS',
+                                      'truncated': False,
+                                      u'uri': u'https://mastodon.oeru.org/users/Sohyell/statuses/108919831298971808',
+                                      u'url': u'https://mastodon.oeru.org/@Sohyell/108919831298971808',
+                                      'user': {   'name': u'Sohyell',
+                                                  'profile_image_url': u'https://mastodon.oeru.org/avatars/original/missing.png',
+                                                  'screen_name': u'Sohyell'},
+                                      u'visibility': u'public',
+                                      'we_source': 'mastodon',
+                                      'we_tags': [u'ds4oers'],
+                                      'we_timestamp': '2022-08-31T22:25:03.000Z'}}])
+                                      }
+}
diff --git a/samples/mastodon_toot2.json b/samples/mastodon_toot2.json
new file mode 100644
index 0000000000000000000000000000000000000000..bca3ed059535636570ba98d66ca51cc9f8da0564
--- /dev/null
+++ b/samples/mastodon_toot2.json
@@ -0,0 +1,152 @@
+{
+    '108915921927907934': {
+        u'account': {
+            u'acct': u'Sohyell',
+            u'avatar': u'https://mastodon.oeru.org/avatars/original/missing.png',
+            u'avatar_static': u'https://mastodon.oeru.org/avatars/original/missing.png',
+            u'bot': False,
+            u'created_at': u'2022-01-27T00:00:00.000Z',
+            u'discoverable': False,
+            u'display_name': u'Sohyell',
+            u'emojis': [],
+            u'fields': [],
+            u'followers_count': 1,
+            u'following_count': 2,
+            u'group': False,
+            u'header': u'https://mastodon.oeru.org/headers/original/missing.png',
+            u'header_static': u'https://mastodon.oeru.org/headers/original/missing.png',
+            u'id': u'93556',
+            u'last_status_at': u'2022-08-31',
+            u'locked': True,
+            u'note': u'',
+            u'statuses_count': 4,
+            u'url': u'https://mastodon.oeru.org/@Sohyell',
+            u'username': u'Sohyell'
+        },
+        u'application': None,
+        u'card': None,
+        u'content': u'<p>Bula All,</p><p>I believe this will be a great idea as the teachers will be engaged in creating original TR. This would allow the teachers to collaborate and communicate with the other teachers. Teachers would also feel challenged when they see other teachers&#39; work, which would likely motivate them to do better. </p><p>This type of initiative will significantly enhance the teachers&#39; ability to develop TRs suitable to their individual classes.</p><p><a href="https://mastodon.oeru.org/tags/DS4OERS" class="mention hashtag" rel="tag">#<span>DS4OERS</span></a><br /><a href="https://mastodon.oeru.org/tags/thoughtactivity" class="mention hashtag" rel="tag">#<span>thoughtactivity</span></a></p><p>Should Fiji have an OER day?</p>',
+        u'created_at': u'2022-08-31T05:51:18.249Z',
+        u'edited_at': None,
+        u'emojis': [],
+        u'favourites_count': 1,
+        u'id': u'108915921927907934',
+        u'in_reply_to_account_id': None,
+        u'in_reply_to_id': None,
+        'instance': u'mastodon.oeru',
+        u'language': u'en',
+        u'media_attachments': [],
+        u'mentions': [],
+        u'poll': {
+            u'emojis': [],
+            u'expired': False,
+            u'expires_at': u'2022-09-01T05:51:18.240Z',
+            u'id': u'8062',
+            u'multiple': False,
+            u'options': [
+                {
+                    u'title': u'Yes',
+                    u'votes_count': 1
+                },{
+                    u'title': u'No',
+                    u'votes_count': 0
+                }
+            ],
+            u'voters_count': 1,
+            u'votes_count': 1
+        },
+        'profile_url': u'https://mastodon.oeru.org/@Sohyell',
+        u'reblog': None,
+        u'reblogs_count': 0,
+        u'replies_count': 0,
+        u'sensitive': False,
+        u'spoiler_text': u'',
+        u'tags': [
+            {
+                u'name': u'ds4oers',
+                u'url': u'https://mastodon.oeru.org/tags/ds4oers'
+            },{
+                u'name': u'thoughtactivity',
+                u'url': u'https://mastodon.oeru.org/tags/thoughtactivity'
+            }
+        ],
+        'text': u'Bula All,I believe this will be a great idea as the teachers will be engaged in creating original TR. This would allow the teachers to collaborate and communicate with the other teachers. Teachers...',
+        'truncated': True,
+        u'uri': u'https://mastodon.oeru.org/users/Sohyell/statuses/108915921927907934',
+        u'url': u'https://mastodon.oeru.org/@Sohyell/108915921927907934',
+        'user': {
+            'name': u'Sohyell',
+            'profile_image_url': u'https://mastodon.oeru.org/avatars/original/missing.png',
+            'screen_name': u'Sohyell'
+        },
+        u'visibility': u'public',
+        'we_source': 'mastodon',
+        'we_tags': [u'ds4oers'],
+        'we_timestamp': '2022-08-31T05:51:01.000Z'
+    },
+    '108919831298971808': {
+        u'account': {
+            u'acct': u'Sohyell',
+            u'avatar': u'https://mastodon.oeru.org/avatars/original/missing.png',
+            u'avatar_static': u'https://mastodon.oeru.org/avatars/original/missing.png',
+            u'bot': False,
+            u'created_at': u'2022-01-27T00:00:00.000Z',
+            u'discoverable': False,
+            u'display_name': u'Sohyell',
+            u'emojis': [],
+            u'fields': [],
+            u'followers_count': 1,
+            u'following_count': 2,
+            u'group': False,
+            u'header': u'https://mastodon.oeru.org/headers/original/missing.png',
+            u'header_static': u'https://mastodon.oeru.org/headers/original/missing.png',
+            u'id': u'93556',
+            u'last_status_at': u'2022-08-31',
+            u'locked': True,
+            u'note': u'',
+            u'statuses_count': 4,
+            u'url': u'https://mastodon.oeru.org/@Sohyell',
+            u'username': u'Sohyell'
+        },
+        u'application': None,
+        u'card': None,
+        u'content': u'<p>Bula All,</p><p>OER has the ability to improve the classroom learning process and as a primary school teacher I believe students should also be included in the OER development process</p><p><a href="https://mastodon.oeru.org/tags/DS4OERS" class="mention hashtag" rel="tag">#<span>DS4OERS</span></a></p>',
+        u'created_at': u'2022-08-31T22:25:30.521Z',
+        u'edited_at': None,
+        u'emojis': [],
+        u'favourites_count': 0,
+        u'id': u'108919831298971808',
+        u'in_reply_to_account_id': None,
+        u'in_reply_to_id': None,
+        'instance': u'mastodon.oeru',
+        u'language': u'en',
+        u'media_attachments': [],
+        u'mentions': [],
+        u'poll': None,
+        'profile_url': u'https://mastodon.oeru.org/@Sohyell',
+        u'reblog': None,
+        u'reblogs_count': 0,
+        u'replies_count': 0,
+        u'sensitive': False,
+        u'spoiler_text': u'',
+        u'tags': [
+            {
+                u'name': u'ds4oers',
+                u'url': u'https://mastodon.oeru.org/tags/ds4oers'
+            }
+        ],
+        'text': u'Bula All,OER has the ability to improve the classroom learning process and as a primary school teacher I believe students should also be included in the OER development process#DS4OERS',
+        'truncated': False,
+        u'uri': u'https://mastodon.oeru.org/users/Sohyell/statuses/108919831298971808',
+        u'url': u'https://mastodon.oeru.org/@Sohyell/108919831298971808',
+        'user': {
+            'name': u'Sohyell',
+            'profile_image_url': u'https://mastodon.oeru.org/avatars/original/missing.png',
+            'screen_name': u'Sohyell'
+        },
+        u'visibility': u'public',
+        'we_source': 'mastodon',
+        'we_tags': [u'ds4oers'],
+        'we_timestamp': '2022-08-31T22:25:03.000Z'
+    }
+}
diff --git a/samples/mastodon_toot_short.json b/samples/mastodon_toot_short.json
new file mode 100644
index 0000000000000000000000000000000000000000..f603f8a056e0439c76287204831eccd77ff41eaa
--- /dev/null
+++ b/samples/mastodon_toot_short.json
@@ -0,0 +1,5 @@
+{
+    '108915921927907934': {
+        u'id': u'108915921927907934'
+    }
+}
diff --git a/samples/mastodon_toot_test.py b/samples/mastodon_toot_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d6cb471055fedc27a7bb36de3248d07ffac8bfd
--- /dev/null
+++ b/samples/mastodon_toot_test.py
@@ -0,0 +1,26 @@
+import json
+import pprint
+
+pp = pprint.PrettyPrinter(indent=4)
+
+data = {
+    's1': {
+        '1': {
+            'id': '1', 'val': 1, 'other': 'blue'
+        }
+    },
+    's2': {
+        '1': {
+            'id': '2', 'val': 2, 'other': 'red'
+        }
+    },
+    's1': {
+        '1': {
+            'id': '2', 'val': 3, 'other': 'green'
+        }
+    }
+}
+
+
+
+print(data.items())
diff --git a/samples/regextest.py b/samples/regextest.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae4a347f4d3d2dc3f95f37ceabb90a95035fa564
--- /dev/null
+++ b/samples/regextest.py
@@ -0,0 +1,16 @@
+import re
+
+txt = "https://mastodon.milll.ws/@lightweight"
+
+try:
+#    x = re.search(r"^http?://(.*)/@(.*)$", txt)
+    x = re.search(r"(http\w?)://(.*)/@", txt)
+except e:
+    print ('error', e)
+
+
+print(x.group(1)+'://'+x.group(2))
+
+y = x.group(2).split('.')
+
+print(y[0]+'.'+y[1])
diff --git a/samples/wenotes-lida101-20180313_231643.py b/samples/wenotes-lida101-20180313_231643.py
new file mode 100644
index 0000000000000000000000000000000000000000..4430e9d11313bcf2d25a693e2f3bb152184b3237
--- /dev/null
+++ b/samples/wenotes-lida101-20180313_231643.py
@@ -0,0 +1,110 @@
+feeds = [
+'{ from_user': '', 'from_user_name': 'Jadia7', 'from_user_wp_id': 318, 'site_id': 65', 'from_user_email': 'Kerinesylvester@gmail.com, 'tag': 'lida101', 'feed_url': 'Jadia', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Aakash', 'from_user_wp_id': 161, 'site_id': 65', 'from_user_email': 'akashmita10@gmail.com, 'tag': 'lida101', 'feed_url': 'eaxample.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Musivale', 'from_user_wp_id': 300, 'site_id': 65', 'from_user_email': 'edwardlusimbo@gmail.com, 'tag': 'lida101', 'feed_url': 'http://CBT.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Dr. Naraginti Amareswaran', 'from_user_wp_id': 356, 'site_id': 65', 'from_user_email': 'amareswaran@gmail.com, 'tag': 'lida101', 'feed_url': 'amareswaran', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Angelica Martinez', 'from_user_wp_id': 223, 'site_id': 65', 'from_user_email': 'angelica.martinez.ochoa@gmail.com, 'tag': 'lida101', 'feed_url': 'https://drive.google.com/open?id=1R5SS7Lma2JFzU4yYT3b0h1QeWHzBNT1E', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Angelos Konstantinidis', 'from_user_wp_id': 439, 'site_id': 65', 'from_user_email': 'angelntini@gmail.com, 'tag': 'lida101', 'feed_url': 'http://angelos.ict4all.gr/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'anikkumar', 'from_user_wp_id': 297, 'site_id': 65', 'from_user_email': 'anikkumar551@gmail.com, 'tag': 'lida101', 'feed_url': 'aktv', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Anil Prasad P', 'from_user_wp_id': 434, 'site_id': 65', 'from_user_email': 'apletters@gmail.com, 'tag': 'lida101', 'feed_url': 'http://www.wikieducator.org/Anil_Prasad', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'assanakammana', 'from_user_wp_id': 328, 'site_id': 65', 'from_user_email': 'ak.kayalad@gmail.com, 'tag': 'lida101', 'feed_url': 'lida101', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Ashok Atrey', 'from_user_wp_id': 162, 'site_id': 65', 'from_user_email': 'Ashok.supp@gmail.com, 'tag': 'lida101', 'feed_url': 'Google.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Charlie Gihiala', 'from_user_wp_id': 282, 'site_id': 65', 'from_user_email': 'awocapitalelsuru@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'dave', 'from_user_wp_id': 2, 'site_id': 65', 'from_user_email': 'dave@oerfoundation.org, 'tag': 'lida101', 'feed_url': 'https://davelane.nz', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'James', 'from_user_wp_id': 220, 'site_id': 65', 'from_user_email': 'fantasyworld23@outlook.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'David Bravo Ortiz', 'from_user_wp_id': 252, 'site_id': 65', 'from_user_email': 'davidbravoortiz@yahoo.es, 'tag': 'lida101', 'feed_url': 'trytospeakit.blogspot.com.es', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'deekonda', 'from_user_wp_id': 326, 'site_id': 65', 'from_user_email': 'deekonda2014@gmail.com, 'tag': 'lida101', 'feed_url': 'http://ravi.com/feed.rss', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Rama Devchand', 'from_user_wp_id': 431, 'site_id': 65', 'from_user_email': 'devchand_r@usp.ac.fj, 'tag': 'lida101', 'feed_url': 'http://course.oeru.org/lida/101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Durgesh Kumar Verma', 'from_user_wp_id': 294, 'site_id': 65', 'from_user_email': 'durgeshkumarverma4@gmail.com, 'tag': 'lida101', 'feed_url': 'Fisheries science', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Epio Tom', 'from_user_wp_id': 372, 'site_id': 65', 'from_user_email': 'epiotom@gmai.com, 'tag': 'lida101', 'feed_url': 'http//www.Google.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Fayrouz Elserogy', 'from_user_wp_id': 128, 'site_id': 65', 'from_user_email': 'fayrouzelserogy@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'fayrouzelserogy', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'hollissankar', 'from_user_wp_id': 413, 'site_id': 65', 'from_user_email': 'hcsankar@gmail.com, 'tag': 'lida101', 'feed_url': 'https://www.facebook.com/hollis.sankar', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Ismail', 'from_user_wp_id': 334, 'site_id': 65', 'from_user_email': 'Ismailyassin106@gmail.com, 'tag': 'lida101', 'feed_url': 'Physics', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Iva Rawaico', 'from_user_wp_id': 429, 'site_id': 65', 'from_user_email': 'ivarawaico@gmail.com, 'tag': 'lida101', 'feed_url': 'ivarawaico@gmail.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Julie Reed', 'from_user_wp_id': 187, 'site_id': 65', 'from_user_email': 'jreed@jcsu.edu, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Justine Pepperell', 'from_user_wp_id': 251, 'site_id': 65', 'from_user_email': 'justine.pepperell@gmail.com, 'tag': 'lida101', 'feed_url': 'www.citizenmoderno.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Ana Rosa', 'from_user_wp_id': 209, 'site_id': 65', 'from_user_email': 'arblue@shaw.ca, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/interactions/course-feed/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'mamafantu', 'from_user_wp_id': 164, 'site_id': 65', 'from_user_email': 'afratparabodar@gmail.com, 'tag': 'lida101', 'feed_url': 'http://kingtv.org/all-sports/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Maria', 'from_user_wp_id': 134, 'site_id': 65', 'from_user_email': 'mariamaged@aucgept.edu, 'tag': 'lida101', 'feed_url': 'http://mariamaged.wordpress.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Ayo', 'from_user_wp_id': 385, 'site_id': 65', 'from_user_email': 'Victorypublishing77@yahoo.com, 'tag': 'lida101', 'feed_url': 'Graphics', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'mingle76', 'from_user_wp_id': 210, 'site_id': 65', 'from_user_email': 'mingle76@gmail.com, 'tag': 'lida101', 'feed_url': 'https://megingle.wordpress.com/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'moasaf', 'from_user_wp_id': 129, 'site_id': 65', 'from_user_email': 'mohamedsafwat@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'https://diglit.creativitycourse.org/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Mohamed Hatem', 'from_user_wp_id': 130, 'site_id': 65', 'from_user_email': 'm_hatem@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'https://mohamedhatemblog.wordpress.com/about/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Narayan Prasad Chaudhari', 'from_user_wp_id': 321, 'site_id': 65', 'from_user_email': 'npc.riebhopal@gmail.com, 'tag': 'lida101', 'feed_url': 'https://wikieducator.org/User:Narayana', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'NELLIE DEUTSCH', 'from_user_wp_id': 387, 'site_id': 65', 'from_user_email': 'nellie.muller.deutsch@gmail.com, 'tag': 'lida101', 'feed_url': 'https://nellie-deutsch.com/feed.rss', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'nishasingh', 'from_user_wp_id': 342, 'site_id': 65', 'from_user_email': 'nisha2k04@gmail.com, 'tag': 'lida101', 'feed_url': 'https://myonlinevoyage.blogspot.in/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'napathak02', 'from_user_wp_id': 391, 'site_id': 65', 'from_user_email': 'niyatipathak02@gmail.com, 'tag': 'lida101', 'feed_url': 'niaytipathak.blogspot.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Parveen', 'from_user_wp_id': 172, 'site_id': 65', 'from_user_email': 'parveenwrites@gmail.com, 'tag': 'lida101', 'feed_url': 'Unskilled.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Pradip babasaheb shelke', 'from_user_wp_id': 289, 'site_id': 65', 'from_user_email': 'pradipshelke49@gmail.com, 'tag': 'lida101', 'feed_url': 'www.researchfellowjrf.wordpress.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Dr. Rajeev Tyagi', 'from_user_wp_id': 199, 'site_id': 65', 'from_user_email': 'rajeevtyagi1971@gmail.com, 'tag': 'lida101', 'feed_url': 'drrajeev.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Rajesh Kumar', 'from_user_wp_id': 259, 'site_id': 65', 'from_user_email': 'rajeshkrm1545@gmail.com, 'tag': 'lida101', 'feed_url': 'b sc it', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Rekha', 'from_user_wp_id': 248, 'site_id': 65', 'from_user_email': 'govind.up28@gmail.com, 'tag': 'lida101', 'feed_url': 'DLEd', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'royal2018', 'from_user_wp_id': 358, 'site_id': 65', 'from_user_email': 'theoabel2018@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Sagar Mandal', 'from_user_wp_id': 304, 'site_id': 65', 'from_user_email': 'sagarmandal.m@gmail.com, 'tag': 'lida101', 'feed_url': 'online learning.edu', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'SA Shuvo sheikh', 'from_user_wp_id': 330, 'site_id': 65', 'from_user_email': 'mdsuvosheikh@gmail.com, 'tag': 'lida101', 'feed_url': 'facebook.com/diary.unfinished', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'smartie', 'from_user_wp_id': 264, 'site_id': 65', 'from_user_email': 'srisarvamech@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Sandeep', 'from_user_wp_id': 240, 'site_id': 65', 'from_user_email': 'sr480029@gmail.com, 'tag': 'lida101', 'feed_url': 'ssdds1087.blogspot.co.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Terezinha Marcondes Diniz Biazi', 'from_user_wp_id': 217, 'site_id': 65', 'from_user_email': 'emebiazi@hotmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Cheryl Todd', 'from_user_wp_id': 257, 'site_id': 65', 'from_user_email': 'toddcher@meredith.edu, 'tag': 'lida101', 'feed_url': 'MCOER', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Hamaluba', 'from_user_wp_id': 215, 'site_id': 65', 'from_user_email': 'thamaluba@staff.bou.ac.bw, 'tag': 'lida101', 'feed_url': 'https://www.blogger.com/blogger.g?blogID=7674153765285536169#allposts', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Venkata', 'from_user_wp_id': 253, 'site_id': 65', 'from_user_email': 'Umavenkata1992@gmail.com, 'tag': 'lida101', 'feed_url': 'Google', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Lisa', 'from_user_wp_id': 206, 'site_id': 65', 'from_user_email': 'mccutchen_l@usp.ac.fj, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'vili', 'from_user_wp_id': 430, 'site_id': 65', 'from_user_email': 'togavou8@gmail.com, 'tag': 'lida101', 'feed_url': 'none', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'vulani', 'from_user_wp_id': 355, 'site_id': 65', 'from_user_email': 'prosperitynkalo@gmail.com, 'tag': 'lida101', 'feed_url': 'travel operation', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'wasilat raji', 'from_user_wp_id': 165, 'site_id': 65', 'from_user_email': 'wasilatraji@yahoo.com, 'tag': 'lida101', 'feed_url': 'www.nutrion.comblogfeed URL', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+'{ from_user': '', 'from_user_name': 'Georgios Zardavas', 'from_user_wp_id': 274, 'site_id': 65', 'from_user_email': 'zardavas@csd.auth.gr, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+
+]
diff --git a/samples/wenotes-lida101.py b/samples/wenotes-lida101.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a973a3da8bac2a49dfc2d02f0528ddb08cc3bf0
--- /dev/null
+++ b/samples/wenotes-lida101.py
@@ -0,0 +1,56 @@
+feeds = [
+{ from_user': '537jadia', 'from_user_name': 'Jadia7', 'from_user_wp_id': 318, 'site_id': 65', 'from_user_email': 'Kerinesylvester@gmail.com, 'tag': 'lida101', 'feed_url': 'Jadia', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'aakshmita10', 'from_user_name': 'Aakash', 'from_user_wp_id': 161, 'site_id': 65', 'from_user_email': 'akashmita10@gmail.com, 'tag': 'lida101', 'feed_url': 'eaxample.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'aeyz', 'from_user_name': 'Musivale', 'from_user_wp_id': 300, 'site_id': 65', 'from_user_email': 'edwardlusimbo@gmail.com, 'tag': 'lida101', 'feed_url': 'http://CBT.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'amareswaran', 'from_user_name': 'Dr. Naraginti Amareswaran', 'from_user_wp_id': 356, 'site_id': 65', 'from_user_email': 'amareswaran@gmail.com, 'tag': 'lida101', 'feed_url': 'amareswaran', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'angelicamartinez', 'from_user_name': 'Angelica Martinez', 'from_user_wp_id': 223, 'site_id': 65', 'from_user_email': 'angelica.martinez.ochoa@gmail.com, 'tag': 'lida101', 'feed_url': 'https://drive.google.com/open?id=1R5SS7Lma2JFzU4yYT3b0h1QeWHzBNT1E', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'angelntini', 'from_user_name': 'Angelos Konstantinidis', 'from_user_wp_id': 439, 'site_id': 65', 'from_user_email': 'angelntini@gmail.com, 'tag': 'lida101', 'feed_url': 'http://angelos.ict4all.gr/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'anikkumar', 'from_user_name': 'anikkumar', 'from_user_wp_id': 297, 'site_id': 65', 'from_user_email': 'anikkumar551@gmail.com, 'tag': 'lida101', 'feed_url': 'aktv', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'anil', 'from_user_name': 'Anil Prasad P', 'from_user_wp_id': 434, 'site_id': 65', 'from_user_email': 'apletters@gmail.com, 'tag': 'lida101', 'feed_url': 'http://www.wikieducator.org/Anil_Prasad', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'assanakammana', 'from_user_name': 'assanakammana', 'from_user_wp_id': 328, 'site_id': 65', 'from_user_email': 'ak.kayalad@gmail.com, 'tag': 'lida101', 'feed_url': 'lida101', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'atrey', 'from_user_name': 'Ashok Atrey', 'from_user_wp_id': 162, 'site_id': 65', 'from_user_email': 'Ashok.supp@gmail.com, 'tag': 'lida101', 'feed_url': 'Google.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'charlie', 'from_user_name': 'Charlie Gihiala', 'from_user_wp_id': 282, 'site_id': 65', 'from_user_email': 'awocapitalelsuru@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'dave', 'from_user_name': 'dave', 'from_user_wp_id': 2, 'site_id': 65', 'from_user_email': 'dave@oerfoundation.org, 'tag': 'lida101', 'feed_url': 'https://davelane.nz', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'dblwindmill', 'from_user_name': 'James', 'from_user_wp_id': 220, 'site_id': 65', 'from_user_email': 'fantasyworld23@outlook.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'dbo1972', 'from_user_name': 'David Bravo Ortiz', 'from_user_wp_id': 252, 'site_id': 65', 'from_user_email': 'davidbravoortiz@yahoo.es, 'tag': 'lida101', 'feed_url': 'trytospeakit.blogspot.com.es', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'deekonda', 'from_user_name': 'deekonda', 'from_user_wp_id': 326, 'site_id': 65', 'from_user_email': 'deekonda2014@gmail.com, 'tag': 'lida101', 'feed_url': 'http://ravi.com/feed.rss', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'devchandr1', 'from_user_name': 'Rama Devchand', 'from_user_wp_id': 431, 'site_id': 65', 'from_user_email': 'devchand_r@usp.ac.fj, 'tag': 'lida101', 'feed_url': 'http://course.oeru.org/lida/101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'dkverma8588', 'from_user_name': 'Durgesh Kumar Verma', 'from_user_wp_id': 294, 'site_id': 65', 'from_user_email': 'durgeshkumarverma4@gmail.com, 'tag': 'lida101', 'feed_url': 'Fisheries science', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'epiotom04', 'from_user_name': 'Epio Tom', 'from_user_wp_id': 372, 'site_id': 65', 'from_user_email': 'epiotom@gmai.com, 'tag': 'lida101', 'feed_url': 'http//www.Google.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'fayrouzelserogy', 'from_user_name': 'Fayrouz Elserogy', 'from_user_wp_id': 128, 'site_id': 65', 'from_user_email': 'fayrouzelserogy@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'fayrouzelserogy', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'hollissankar', 'from_user_name': 'hollissankar', 'from_user_wp_id': 413, 'site_id': 65', 'from_user_email': 'hcsankar@gmail.com, 'tag': 'lida101', 'feed_url': 'https://www.facebook.com/hollis.sankar', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'ismail1', 'from_user_name': 'Ismail', 'from_user_wp_id': 334, 'site_id': 65', 'from_user_email': 'Ismailyassin106@gmail.com, 'tag': 'lida101', 'feed_url': 'Physics', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'ivarawaico1', 'from_user_name': 'Iva Rawaico', 'from_user_wp_id': 429, 'site_id': 65', 'from_user_email': 'ivarawaico@gmail.com, 'tag': 'lida101', 'feed_url': 'ivarawaico@gmail.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'julie440', 'from_user_name': 'Julie Reed', 'from_user_wp_id': 187, 'site_id': 65', 'from_user_email': 'jreed@jcsu.edu, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'justine', 'from_user_name': 'Justine Pepperell', 'from_user_wp_id': 251, 'site_id': 65', 'from_user_email': 'justine.pepperell@gmail.com, 'tag': 'lida101', 'feed_url': 'www.citizenmoderno.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'lifelonglearner4ever', 'from_user_name': 'Ana Rosa', 'from_user_wp_id': 209, 'site_id': 65', 'from_user_email': 'arblue@shaw.ca, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/interactions/course-feed/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'mamafantu', 'from_user_name': 'mamafantu', 'from_user_wp_id': 164, 'site_id': 65', 'from_user_email': 'afratparabodar@gmail.com, 'tag': 'lida101', 'feed_url': 'http://kingtv.org/all-sports/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'mariamagedd', 'from_user_name': 'Maria', 'from_user_wp_id': 134, 'site_id': 65', 'from_user_email': 'mariamaged@aucgept.edu, 'tag': 'lida101', 'feed_url': 'http://mariamaged.wordpress.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'martins', 'from_user_name': 'Ayo', 'from_user_wp_id': 385, 'site_id': 65', 'from_user_email': 'Victorypublishing77@yahoo.com, 'tag': 'lida101', 'feed_url': 'Graphics', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'mingle76', 'from_user_name': 'mingle76', 'from_user_wp_id': 210, 'site_id': 65', 'from_user_email': 'mingle76@gmail.com, 'tag': 'lida101', 'feed_url': 'https://megingle.wordpress.com/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'moasaf', 'from_user_name': 'moasaf', 'from_user_wp_id': 129, 'site_id': 65', 'from_user_email': 'mohamedsafwat@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'https://diglit.creativitycourse.org/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'mohamedhatemm', 'from_user_name': 'Mohamed Hatem', 'from_user_wp_id': 130, 'site_id': 65', 'from_user_email': 'm_hatem@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'https://mohamedhatemblog.wordpress.com/about/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'narayana', 'from_user_name': 'Narayan Prasad Chaudhari', 'from_user_wp_id': 321, 'site_id': 65', 'from_user_email': 'npc.riebhopal@gmail.com, 'tag': 'lida101', 'feed_url': 'https://wikieducator.org/User:Narayana', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'nelliemuller', 'from_user_name': 'NELLIE DEUTSCH', 'from_user_wp_id': 387, 'site_id': 65', 'from_user_email': 'nellie.muller.deutsch@gmail.com, 'tag': 'lida101', 'feed_url': 'https://nellie-deutsch.com/feed.rss', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'nishasingh', 'from_user_name': 'nishasingh', 'from_user_wp_id': 342, 'site_id': 65', 'from_user_email': 'nisha2k04@gmail.com, 'tag': 'lida101', 'feed_url': 'https://myonlinevoyage.blogspot.in/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'niyatipathak02', 'from_user_name': 'napathak02', 'from_user_wp_id': 391, 'site_id': 65', 'from_user_email': 'niyatipathak02@gmail.com, 'tag': 'lida101', 'feed_url': 'niaytipathak.blogspot.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'parveenwrites', 'from_user_name': 'Parveen', 'from_user_wp_id': 172, 'site_id': 65', 'from_user_email': 'parveenwrites@gmail.com, 'tag': 'lida101', 'feed_url': 'Unskilled.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'pradipshelke49', 'from_user_name': 'Pradip babasaheb shelke', 'from_user_wp_id': 289, 'site_id': 65', 'from_user_email': 'pradipshelke49@gmail.com, 'tag': 'lida101', 'feed_url': 'www.researchfellowjrf.wordpress.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'rajeevtyagi1971', 'from_user_name': 'Dr. Rajeev Tyagi', 'from_user_wp_id': 199, 'site_id': 65', 'from_user_email': 'rajeevtyagi1971@gmail.com, 'tag': 'lida101', 'feed_url': 'drrajeev.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'rajeshkrm1545', 'from_user_name': 'Rajesh Kumar', 'from_user_wp_id': 259, 'site_id': 65', 'from_user_email': 'rajeshkrm1545@gmail.com, 'tag': 'lida101', 'feed_url': 'b sc it', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'rekhajoshi', 'from_user_name': 'Rekha', 'from_user_wp_id': 248, 'site_id': 65', 'from_user_email': 'govind.up28@gmail.com, 'tag': 'lida101', 'feed_url': 'DLEd', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'royal2018', 'from_user_name': 'royal2018', 'from_user_wp_id': 358, 'site_id': 65', 'from_user_email': 'theoabel2018@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'sagarmandal18', 'from_user_name': 'Sagar Mandal', 'from_user_wp_id': 304, 'site_id': 65', 'from_user_email': 'sagarmandal.m@gmail.com, 'tag': 'lida101', 'feed_url': 'online learning.edu', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'sasuvosheikh', 'from_user_name': 'SA Shuvo sheikh', 'from_user_wp_id': 330, 'site_id': 65', 'from_user_email': 'mdsuvosheikh@gmail.com, 'tag': 'lida101', 'feed_url': 'facebook.com/diary.unfinished', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'smartsarva', 'from_user_name': 'smartie', 'from_user_wp_id': 264, 'site_id': 65', 'from_user_email': 'srisarvamech@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'ssdd', 'from_user_name': 'Sandeep', 'from_user_wp_id': 240, 'site_id': 65', 'from_user_email': 'sr480029@gmail.com, 'tag': 'lida101', 'feed_url': 'ssdds1087.blogspot.co.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'terezinha', 'from_user_name': 'Terezinha Marcondes Diniz Biazi', 'from_user_wp_id': 217, 'site_id': 65', 'from_user_email': 'emebiazi@hotmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'toddcher', 'from_user_name': 'Cheryl Todd', 'from_user_wp_id': 257, 'site_id': 65', 'from_user_email': 'toddcher@meredith.edu, 'tag': 'lida101', 'feed_url': 'MCOER', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'tommie', 'from_user_name': 'Hamaluba', 'from_user_wp_id': 215, 'site_id': 65', 'from_user_email': 'thamaluba@staff.bou.ac.bw, 'tag': 'lida101', 'feed_url': 'https://www.blogger.com/blogger.g?blogID=7674153765285536169#allposts', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'umavenkata', 'from_user_name': 'Venkata', 'from_user_wp_id': 253, 'site_id': 65', 'from_user_email': 'Umavenkata1992@gmail.com, 'tag': 'lida101', 'feed_url': 'Google', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'uspnauru', 'from_user_name': 'Lisa', 'from_user_wp_id': 206, 'site_id': 65', 'from_user_email': 'mccutchen_l@usp.ac.fj, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'vili', 'from_user_name': 'vili', 'from_user_wp_id': 430, 'site_id': 65', 'from_user_email': 'togavou8@gmail.com, 'tag': 'lida101', 'feed_url': 'none', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'vulaniconfidence', 'from_user_name': 'vulani', 'from_user_wp_id': 355, 'site_id': 65', 'from_user_email': 'prosperitynkalo@gmail.com, 'tag': 'lida101', 'feed_url': 'travel operation', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'wasilatraji007', 'from_user_name': 'wasilat raji', 'from_user_wp_id': 165, 'site_id': 65', 'from_user_email': 'wasilatraji@yahoo.com, 'tag': 'lida101', 'feed_url': 'www.nutrion.comblogfeed URL', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+{ from_user': 'zardavas', 'from_user_name': 'Georgios Zardavas', 'from_user_wp_id': 274, 'site_id': 65', 'from_user_email': 'zardavas@csd.auth.gr, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
+]
diff --git a/saylor-discourse.py b/saylor-discourse.py
deleted file mode 100644
index 78fdaec76baa0c3156be404cf9f87eb6eee82370..0000000000000000000000000000000000000000
--- a/saylor-discourse.py
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/python
-
-""" Harvest messages from Saylor forum category URLs specified in options file."""
-
-# Copyright 2016 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import re
-import time
-import couchdb
-import json
-import urllib
-import argparse
-import requests
-# for debugging
-import logging
-import pprint
-# to deal with nasty characters included in Hypothesis quotes
-import sys
-reload(sys)
-sys.setdefaultencoding('utf8')
-
-#DEBUG = True
-DEBUG = False
-DELAY = 0.1         # delay in seconds between Discourse hits
-MAX_TEXT_LEN = 300  # max characters before truncation
-SOURCE = "saylor-discourse"  # the source of each post in CouchDB
-POSTS_PER_PAGE = 20 # Discourse returns posts for a topic in lots of 20 per page
-
-# retrieve URL from config JSON
-options = json.load(open('../config/options.json', 'r'))
-
-#logging configuration
-#LogLevel = logging.DEBUG # or logging.INFO or logging.WARN, etc.
-LogLevel = logging.INFO # or logging.INFO or logging.WARN, etc.
-LogFilename = options['logdir'] + '/saylor-forums.log'
-LogFormat = '%(asctime)s - %(levelname)s: %(message)s'
-#print 'logfile %s, level %s' % (LogFilename, LogLevel)
-logging.basicConfig(format=LogFormat,level=LogLevel,filename=LogFilename)
-
-# get tag list from URL
-tagurl = options['settings-url']
-jsoncontent = urllib.urlopen(tagurl)
-reference_tags = json.loads(jsoncontent.read())["tags"]
-
-# Set up the prettyprinter object for debugging
-pp = pprint.PrettyPrinter(indent=4)
-
-# create the CouchDB object
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-#print "couch info: %s, %s" % (couch, db)
-
-# some other settings.
-baseurl = options['saylor']['url']
-logging.debug('baseurl = %s', pp.pformat(baseurl))
-version = '0.3'
-headers = {'User-Agent' : 'WEnotes-Saylor-Discourse/%s' % (version)}
-#print headers
-
-# create the parser for returned content from Discourse
-parser = argparse.ArgumentParser(description='Harvest posts from Discourse Forums.')
-parser.add_argument('-f', '--full', action='store_false',
-        help='get list of categories, and then every topic in each')
-args = parser.parse_args([])
-
-def have_mention(msg_id):
-    """Return boolean showing if we already have this message."""
-    view = db.view('ids/saylorids')
-    have = (len(view[msg_id]) > 0)
-    return have
-
-# check a tag list against our reference_tags
-def interesting_tags(tags):
-    """Return list of interesting tags, or false if none."""
-    common_tags = list(set(tags) & set(reference_tags))
-    #logging.debug("taglist: %s\nreference tags: %s\ncommon tags: %s", tags, reference_tags, common_tags)
-    if common_tags:
-        logging.debug("interesting tags: %s", common_tags)
-        return common_tags
-    else:
-        return False
-
-if args.full:
-        # get the list of tags.. 18 is the "OERu" Tag Group...
-        tags = '%s/tag_groups/18.json' % (baseurl)
-        time.sleep(DELAY)
-        r = requests.get(tags, headers=headers, verify=False)
-        d = json.loads(r.text)
-        #print d['tag_group']['tag_names']
-        for tag in d['tag_group']['tag_names']:
-            posts = '%s/tags/%s.json' % (baseurl, tag)
-            logging.debug("tag: %s", tag)
-            ## unnecessary test, as all tags in this tag_group are OERu tags...
-            time.sleep(DELAY)
-            r2 = requests.get(posts, headers=headers, verify=False)
-            t = json.loads(r2.text)
-            topic_list = t['topic_list']['topics']
-            for topic in topic_list:
-                pages = topic['posts_count']/POSTS_PER_PAGE;
-                if (topic['posts_count'] % POSTS_PER_PAGE > 0):
-                    print "adding a page with modulo %f" % (topic['posts_count'] % POSTS_PER_PAGE)
-                    pages += 1
-                print "%d pages for %d posts for topic %s" % (pages, topic['posts_count'], topic['title'])
-                for page in (1,pages):
-                    #print "%d %s" % (topic['id'], topic['title'])
-                    #print topic_list
-                    posts = '%s/t/%s.json?page=%d' % (baseurl, topic['id'], page)
-                    logging.debug("topic: %s(%s), page %d", topic['title'], topic['id'], page)
-                    time.sleep(DELAY)
-                    r3 = requests.get(posts, headers=headers, verify=False)
-                    p = json.loads(r3.text)
-                    post_list = p['post_stream']['posts']
-                    logging.debug("post_list %s: ", json.dumps(post_list, indent=2, sort_keys=True))
-                    # we only get here because we know this *topic* has a tag of interest.
-                    common_tags = interesting_tags(topic['tags'])
-                    #print "tags: %s, common tags: %s" % (topic['tags'], common_tags)
-                    for post in post_list:
-                        logging.debug("post %s: ", json.dumps(post, indent=2, sort_keys=True))
-                        if post['deleted_at']:
-                            continue
-                        link = "%s/t/%s/%s" % (baseurl, post['topic_id'], post['post_number'])
-                        logging.debug('link: %s', link)
-                        if have_mention(link):
-                            logging.debug('existing link: %s', link)
-                            continue
-                        #common_tags = interesting_tags(tags)
-                        #print "tags: %s, common tags: %s" % (tags, common_tags)
-                        #if not common_tags:
-                        #    logging.debug('no interesting tags')
-                        #    continue
-                        logging.debug('interesting link: %s', link)
-                        text = post['cooked'].replace('\n', ' ')
-                        text = re.sub(r'<[^>]*?>', ' ', text)   # remove HTML tags
-                        text = re.sub(r' {2,}', ' ', text)      # collapse spaces
-                        text = topic['fancy_title'].strip() + ': ' + text.strip()
-                        truncated = False
-                        i = len(text)
-                        if i > MAX_TEXT_LEN:
-                            i = MAX_TEXT_LEN
-                            while text[i] != ' ' and i > 0:
-                                i -= 1
-                            text = text[:i] + '...'
-                            truncated = True
-                        try:
-                            from_user_name = post['display_username']
-                        except KeyError:
-                            from_user_name = post['username']
-                        #print "avatar template: %s" % (post['avatar_template'])
-                        profile_image_url = post['avatar_template'].replace('{size}', '64')
-                        if not re.match(r'^(https?:)?//', profile_image_url):
-                            profile_image_url = baseurl + profile_image_url
-                        mention = {
-                                'created_at': post['created_at'],
-                                'from_user': post['username'],
-                                'from_user_name': from_user_name,
-                                'id': link,
-                                'post_id': post['id'],
-                                'profile_image_url': profile_image_url,
-                                'profile_url': baseurl + '/users/' + post['username'],
-                                'text': text,
-                                'truncated': truncated,
-                                'we_link': link,
-                                'we_source': SOURCE,
-                                'we_version': version,
-                                'we_tags': common_tags,
-                                'we_timestamp': post['created_at']
-                                }
-                        if DEBUG:
-                            print json.dumps(mention, indent=2, sort_keys=True)
-                        else:
-                            logging.info('adding post %s by %s with tag(s): %s', mention['id'], from_user_name, common_tags)
-                            db.save(mention)
diff --git a/tools/age_votes.py b/tools/age_votes.py
deleted file mode 100644
index 3c05d54738ab8d4c1c047adc626bea88bc6c9711..0000000000000000000000000000000000000000
--- a/tools/age_votes.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/python
-
-## set old=1 on WEnotes votes past a certain age
-
-import os
-import json
-import time
-import couchdb
-
-freshsecs = 2 * 24 * 60 * 60
-startkey = '2013-01-01T00:00:00.000Z'
-endkey = time.strftime('%Y-%m-%dT%H:%M:%S.000',
-       time.gmtime(time.time() - freshsecs))
-
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['url'])
-db = couch[options['dbvotes']]
-
-updates = []
-
-fresh = db.view('vote/WEnotesFresh', include_docs=True)
-
-stale = fresh[startkey:endkey]
-
-for s in stale:
-    doc = s.doc
-    doc['old'] = 1
-    updates.append(doc)
-
-db.update(updates)
-
diff --git a/tools/array-to-feeds.py b/tools/array-to-feeds.py
deleted file mode 100644
index ecb006465c26d45fc706d78e0d94d16646173b52..0000000000000000000000000000000000000000
--- a/tools/array-to-feeds.py
+++ /dev/null
@@ -1,122 +0,0 @@
-#!/usr/bin/python
-
-# use a Google Sheet to set up feeds to harvest
-#  sheet-to-feeds.py GOOGLE-SHEETS-URL (readable by bot)
-
-import os
-import sys
-import json
-from hashlib import md5
-import urllib2
-from bs4 import BeautifulSoup
-import couchdb
-import gspread
-
-# note: Lida101 has site_id 65 on course.oeru.org
-
-feeds = [
-    { from_user': '537jadia', 'from_user_name': 'Jadia7', 'from_user_wp_id': 318, 'site_id': 65', 'from_user_email': 'Kerinesylvester@gmail.com, 'tag': 'lida101', 'feed_url': 'Jadia', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'aakshmita10', 'from_user_name': 'Aakash', 'from_user_wp_id': 161, 'site_id': 65', 'from_user_email': 'akashmita10@gmail.com, 'tag': 'lida101', 'feed_url': 'eaxample.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'aeyz', 'from_user_name': 'Musivale', 'from_user_wp_id': 300, 'site_id': 65', 'from_user_email': 'edwardlusimbo@gmail.com, 'tag': 'lida101', 'feed_url': 'http://CBT.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'amareswaran', 'from_user_name': 'Dr. Naraginti Amareswaran', 'from_user_wp_id': 356, 'site_id': 65', 'from_user_email': 'amareswaran@gmail.com, 'tag': 'lida101', 'feed_url': 'amareswaran', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'angelicamartinez', 'from_user_name': 'Angelica Martinez', 'from_user_wp_id': 223, 'site_id': 65', 'from_user_email': 'angelica.martinez.ochoa@gmail.com, 'tag': 'lida101', 'feed_url': 'https://drive.google.com/open?id=1R5SS7Lma2JFzU4yYT3b0h1QeWHzBNT1E', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'angelntini', 'from_user_name': 'Angelos Konstantinidis', 'from_user_wp_id': 439, 'site_id': 65', 'from_user_email': 'angelntini@gmail.com, 'tag': 'lida101', 'feed_url': 'http://angelos.ict4all.gr/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'anikkumar', 'from_user_name': 'anikkumar', 'from_user_wp_id': 297, 'site_id': 65', 'from_user_email': 'anikkumar551@gmail.com, 'tag': 'lida101', 'feed_url': 'aktv', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'anil', 'from_user_name': 'Anil Prasad P', 'from_user_wp_id': 434, 'site_id': 65', 'from_user_email': 'apletters@gmail.com, 'tag': 'lida101', 'feed_url': 'http://www.wikieducator.org/Anil_Prasad', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'assanakammana', 'from_user_name': 'assanakammana', 'from_user_wp_id': 328, 'site_id': 65', 'from_user_email': 'ak.kayalad@gmail.com, 'tag': 'lida101', 'feed_url': 'lida101', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'atrey', 'from_user_name': 'Ashok Atrey', 'from_user_wp_id': 162, 'site_id': 65', 'from_user_email': 'Ashok.supp@gmail.com, 'tag': 'lida101', 'feed_url': 'Google.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'charlie', 'from_user_name': 'Charlie Gihiala', 'from_user_wp_id': 282, 'site_id': 65', 'from_user_email': 'awocapitalelsuru@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'dave', 'from_user_name': 'dave', 'from_user_wp_id': 2, 'site_id': 65', 'from_user_email': 'dave@oerfoundation.org, 'tag': 'lida101', 'feed_url': 'https://davelane.nz', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'dblwindmill', 'from_user_name': 'James', 'from_user_wp_id': 220, 'site_id': 65', 'from_user_email': 'fantasyworld23@outlook.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'dbo1972', 'from_user_name': 'David Bravo Ortiz', 'from_user_wp_id': 252, 'site_id': 65', 'from_user_email': 'davidbravoortiz@yahoo.es, 'tag': 'lida101', 'feed_url': 'trytospeakit.blogspot.com.es', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'deekonda', 'from_user_name': 'deekonda', 'from_user_wp_id': 326, 'site_id': 65', 'from_user_email': 'deekonda2014@gmail.com, 'tag': 'lida101', 'feed_url': 'http://ravi.com/feed.rss', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'devchandr1', 'from_user_name': 'Rama Devchand', 'from_user_wp_id': 431, 'site_id': 65', 'from_user_email': 'devchand_r@usp.ac.fj, 'tag': 'lida101', 'feed_url': 'http://course.oeru.org/lida/101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'dkverma8588', 'from_user_name': 'Durgesh Kumar Verma', 'from_user_wp_id': 294, 'site_id': 65', 'from_user_email': 'durgeshkumarverma4@gmail.com, 'tag': 'lida101', 'feed_url': 'Fisheries science', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'epiotom04', 'from_user_name': 'Epio Tom', 'from_user_wp_id': 372, 'site_id': 65', 'from_user_email': 'epiotom@gmai.com, 'tag': 'lida101', 'feed_url': 'http//www.Google.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'fayrouzelserogy', 'from_user_name': 'Fayrouz Elserogy', 'from_user_wp_id': 128, 'site_id': 65', 'from_user_email': 'fayrouzelserogy@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'fayrouzelserogy', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'hollissankar', 'from_user_name': 'hollissankar', 'from_user_wp_id': 413, 'site_id': 65', 'from_user_email': 'hcsankar@gmail.com, 'tag': 'lida101', 'feed_url': 'https://www.facebook.com/hollis.sankar', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'ismail1', 'from_user_name': 'Ismail', 'from_user_wp_id': 334, 'site_id': 65', 'from_user_email': 'Ismailyassin106@gmail.com, 'tag': 'lida101', 'feed_url': 'Physics', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'ivarawaico1', 'from_user_name': 'Iva Rawaico', 'from_user_wp_id': 429, 'site_id': 65', 'from_user_email': 'ivarawaico@gmail.com, 'tag': 'lida101', 'feed_url': 'ivarawaico@gmail.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'julie440', 'from_user_name': 'Julie Reed', 'from_user_wp_id': 187, 'site_id': 65', 'from_user_email': 'jreed@jcsu.edu, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'justine', 'from_user_name': 'Justine Pepperell', 'from_user_wp_id': 251, 'site_id': 65', 'from_user_email': 'justine.pepperell@gmail.com, 'tag': 'lida101', 'feed_url': 'www.citizenmoderno.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'lifelonglearner4ever', 'from_user_name': 'Ana Rosa', 'from_user_wp_id': 209, 'site_id': 65', 'from_user_email': 'arblue@shaw.ca, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/interactions/course-feed/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'mamafantu', 'from_user_name': 'mamafantu', 'from_user_wp_id': 164, 'site_id': 65', 'from_user_email': 'afratparabodar@gmail.com, 'tag': 'lida101', 'feed_url': 'http://kingtv.org/all-sports/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'mariamagedd', 'from_user_name': 'Maria', 'from_user_wp_id': 134, 'site_id': 65', 'from_user_email': 'mariamaged@aucgept.edu, 'tag': 'lida101', 'feed_url': 'http://mariamaged.wordpress.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'martins', 'from_user_name': 'Ayo', 'from_user_wp_id': 385, 'site_id': 65', 'from_user_email': 'Victorypublishing77@yahoo.com, 'tag': 'lida101', 'feed_url': 'Graphics', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'mingle76', 'from_user_name': 'mingle76', 'from_user_wp_id': 210, 'site_id': 65', 'from_user_email': 'mingle76@gmail.com, 'tag': 'lida101', 'feed_url': 'https://megingle.wordpress.com/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'moasaf', 'from_user_name': 'moasaf', 'from_user_wp_id': 129, 'site_id': 65', 'from_user_email': 'mohamedsafwat@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'https://diglit.creativitycourse.org/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'mohamedhatemm', 'from_user_name': 'Mohamed Hatem', 'from_user_wp_id': 130, 'site_id': 65', 'from_user_email': 'm_hatem@aucegypt.edu, 'tag': 'lida101', 'feed_url': 'https://mohamedhatemblog.wordpress.com/about/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'narayana', 'from_user_name': 'Narayan Prasad Chaudhari', 'from_user_wp_id': 321, 'site_id': 65', 'from_user_email': 'npc.riebhopal@gmail.com, 'tag': 'lida101', 'feed_url': 'https://wikieducator.org/User:Narayana', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'nelliemuller', 'from_user_name': 'NELLIE DEUTSCH', 'from_user_wp_id': 387, 'site_id': 65', 'from_user_email': 'nellie.muller.deutsch@gmail.com, 'tag': 'lida101', 'feed_url': 'https://nellie-deutsch.com/feed.rss', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'nishasingh', 'from_user_name': 'nishasingh', 'from_user_wp_id': 342, 'site_id': 65', 'from_user_email': 'nisha2k04@gmail.com, 'tag': 'lida101', 'feed_url': 'https://myonlinevoyage.blogspot.in/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'niyatipathak02', 'from_user_name': 'napathak02', 'from_user_wp_id': 391, 'site_id': 65', 'from_user_email': 'niyatipathak02@gmail.com, 'tag': 'lida101', 'feed_url': 'niaytipathak.blogspot.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'parveenwrites', 'from_user_name': 'Parveen', 'from_user_wp_id': 172, 'site_id': 65', 'from_user_email': 'parveenwrites@gmail.com, 'tag': 'lida101', 'feed_url': 'Unskilled.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'pradipshelke49', 'from_user_name': 'Pradip babasaheb shelke', 'from_user_wp_id': 289, 'site_id': 65', 'from_user_email': 'pradipshelke49@gmail.com, 'tag': 'lida101', 'feed_url': 'www.researchfellowjrf.wordpress.com', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'rajeevtyagi1971', 'from_user_name': 'Dr. Rajeev Tyagi', 'from_user_wp_id': 199, 'site_id': 65', 'from_user_email': 'rajeevtyagi1971@gmail.com, 'tag': 'lida101', 'feed_url': 'drrajeev.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'rajeshkrm1545', 'from_user_name': 'Rajesh Kumar', 'from_user_wp_id': 259, 'site_id': 65', 'from_user_email': 'rajeshkrm1545@gmail.com, 'tag': 'lida101', 'feed_url': 'b sc it', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'rekhajoshi', 'from_user_name': 'Rekha', 'from_user_wp_id': 248, 'site_id': 65', 'from_user_email': 'govind.up28@gmail.com, 'tag': 'lida101', 'feed_url': 'DLEd', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'royal2018', 'from_user_name': 'royal2018', 'from_user_wp_id': 358, 'site_id': 65', 'from_user_email': 'theoabel2018@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'sagarmandal18', 'from_user_name': 'Sagar Mandal', 'from_user_wp_id': 304, 'site_id': 65', 'from_user_email': 'sagarmandal.m@gmail.com, 'tag': 'lida101', 'feed_url': 'online learning.edu', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'sasuvosheikh', 'from_user_name': 'SA Shuvo sheikh', 'from_user_wp_id': 330, 'site_id': 65', 'from_user_email': 'mdsuvosheikh@gmail.com, 'tag': 'lida101', 'feed_url': 'facebook.com/diary.unfinished', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'smartsarva', 'from_user_name': 'smartie', 'from_user_wp_id': 264, 'site_id': 65', 'from_user_email': 'srisarvamech@gmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'ssdd', 'from_user_name': 'Sandeep', 'from_user_wp_id': 240, 'site_id': 65', 'from_user_email': 'sr480029@gmail.com, 'tag': 'lida101', 'feed_url': 'ssdds1087.blogspot.co.in', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'terezinha', 'from_user_name': 'Terezinha Marcondes Diniz Biazi', 'from_user_wp_id': 217, 'site_id': 65', 'from_user_email': 'emebiazi@hotmail.com, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'toddcher', 'from_user_name': 'Cheryl Todd', 'from_user_wp_id': 257, 'site_id': 65', 'from_user_email': 'toddcher@meredith.edu, 'tag': 'lida101', 'feed_url': 'MCOER', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'tommie', 'from_user_name': 'Hamaluba', 'from_user_wp_id': 215, 'site_id': 65', 'from_user_email': 'thamaluba@staff.bou.ac.bw, 'tag': 'lida101', 'feed_url': 'https://www.blogger.com/blogger.g?blogID=7674153765285536169#allposts', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'umavenkata', 'from_user_name': 'Venkata', 'from_user_wp_id': 253, 'site_id': 65', 'from_user_email': 'Umavenkata1992@gmail.com, 'tag': 'lida101', 'feed_url': 'Google', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'uspnauru', 'from_user_name': 'Lisa', 'from_user_wp_id': 206, 'site_id': 65', 'from_user_email': 'mccutchen_l@usp.ac.fj, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'vili', 'from_user_name': 'vili', 'from_user_wp_id': 430, 'site_id': 65', 'from_user_email': 'togavou8@gmail.com, 'tag': 'lida101', 'feed_url': 'none', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'vulaniconfidence', 'from_user_name': 'vulani', 'from_user_wp_id': 355, 'site_id': 65', 'from_user_email': 'prosperitynkalo@gmail.com, 'tag': 'lida101', 'feed_url': 'travel operation', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'wasilatraji007', 'from_user_name': 'wasilat raji', 'from_user_wp_id': 165, 'site_id': 65', 'from_user_email': 'wasilatraji@yahoo.com, 'tag': 'lida101', 'feed_url': 'www.nutrion.comblogfeed URL', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-    { from_user': 'zardavas', 'from_user_name': 'Georgios Zardavas', 'from_user_wp_id': 274, 'site_id': 65', 'from_user_email': 'zardavas@csd.auth.gr, 'tag': 'lida101', 'feed_url': 'https://course.oeru.org/lida101/', 'we_source': 'array-to-feeds.py', 'we_wp_version': 'na', 'type': 'feed' },
-]
-
-print feeds
-
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['localcouch'])
-db = couch[options['dbfeeds']]
-
-print sys.argv[1]
-#gc = gspread.login(options['gdata']['user'], options['gdata']['pass'])
-wks = gc.open_by_url(sys.argv[1]).sheet1
-
-sheet = wks.get_all_values()
-print sheet
-for row in sheet[STARTROW:]:
-    web = row[SHEET_URL]
-    page = urllib2.urlopen(web).read()
-    soup = BeautifulSoup(page)
-    url = ''
-    freq = 0
-    for l in soup.find_all('link', attrs={
-        'rel': 'alternate',
-        'type': 'application/rss+xml'}):
-        url = l['href']
-        freq = 1
-        break
-
-    doc = {
-            'freq':                 freq,
-            'from_user':            row[SHEET_USER],
-            'from_user_name':       row[SHEET_FULL_NAME],
-            'gravatar':             md5(row[SHEET_EMAIL].lower().strip()).hexdigest(),
-            'last_successful':      '2015-01-01T00:00:00.000Z',
-            'last_updated':         '2015-01-01T00:00:00.000Z',
-            'profile_image_url':    '',
-            'profile_image_width':  48,
-            'profile_url':          'http://WikiEducator.org/User:' + \
-                    row[SHEET_USER].replace(' ', '_'),
-            'tags':                 SETTAGS,
-            'type':                 'feed',
-            'url':                  url,
-            'we_tags':              '',
-            'web':                  row[SHEET_URL]
-            }
-    print doc
-    db.save(doc)
-    print
diff --git a/tools/convert-tag-to-tags.py b/tools/convert-tag-to-tags.py
deleted file mode 100644
index b3a789af51bfdd3d4cd7feb83521e739b061c7ed..0000000000000000000000000000000000000000
--- a/tools/convert-tag-to-tags.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2016 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-# make we_tags for every mention that has only we_tag
-
-import os
-import json
-import couchdb
-
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['localcouch'])
-db = couch[options['db']]
-
-has_tag = 0
-has_both = 0
-for row in db.view('_all_docs', include_docs=True):
-    doc = row.doc
-    if doc.has_key('we_tag'):
-        has_tag = has_tag + 1
-        if doc.has_key('we_tags'):
-            has_both = has_both + 1
-        doc['we_tags'] = [doc['we_tag']]
-        del doc['we_tag']
-        db[doc.id] = doc
-
-print "mentions with we_tag: %d with both we_tag and we_tags: %d\n" % (has_tag, has_both)
-
diff --git a/tools/del_feed_mentions.py b/tools/del_feed_mentions.py
deleted file mode 100644
index 2946aab80a96ef4cdf9863e12757a6957e193d95..0000000000000000000000000000000000000000
--- a/tools/del_feed_mentions.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/python
-
-import json
-import couchdb
-import pprint
-pp = pprint.PrettyPrinter(indent=4)
-
-options = json.load(open('../options.json', 'rt'))
-
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-count = 1
-mentions = db.view('messages/feed-combined', include_docs=True)
-print('number of mentions: %d', len(mentions))
-for row in mentions:
-    print pp.pformat(row.doc)
-    print count #, row
-    #print '----', row.id, row.doc.id, row.doc.rev, row.doc
-    count += 1
-    try:
-        if row.doc['we_identifier'] == "blog_feed":
-            print "deleting!"
-            db.delete(row.doc)
-    except AttributeError:
-        print "deleting anyway"
-        db.delete(row.doc)
diff --git a/tools/disable_feeds.py b/tools/disable_feeds.py
deleted file mode 100644
index be42e7d499030da2e49aa67391cb8a852bcb1fba..0000000000000000000000000000000000000000
--- a/tools/disable_feeds.py
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/usr/bin/python
-
-# disable some feeds
-
-import os
-import json
-import couchdb
-
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['localcouch'])
-db = couch[options['dbfeeds']]
-
-for row in db.view('_all_docs', include_docs=True):
-    if row.id[0] == '_':
-        continue
-    doc = row.doc
-    tags = []
-    if doc.has_key('tags'):
-        tags = doc['tags']
-    print row.id, doc['freq'], doc['from_user'], tags
-    
-    #if doc['freq'] == 0:
-    #    # disable people that were registered only for SP4Ed
-    #    if len(tags) == 1 and tags[0] == 'ocl4ed':
-    #        print "  disable"
-    #        doc['freq'] = 0
-    #        db[doc.id] = doc
-
-    if doc['freq'] > 0:
-        # disable people that were registered only for OCL4Ed
-        if len(tags) == 1 and tags[0] == 'ocl4ed':
-            print "  disable"
-            doc['freq'] = 0
-            db[doc.id] = doc
-
diff --git a/tools/discourse-clean.py b/tools/discourse-clean.py
deleted file mode 100644
index d7d7850e8b7f9a290153c5a6cd58c2fae900011f..0000000000000000000000000000000000000000
--- a/tools/discourse-clean.py
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2015 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import os
-import re
-from datetime import datetime
-import couchdb
-import hashlib
-import json
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-tags = options['tags']
-services = ['discourse']
-
-ids = {}
-
-# change http to https, change forum to community
-for d in db.view('ids/community', include_docs=True):
-    doc = d.doc
-    id = doc['id']
-    print id
-    if id in ids:
-        print "duplicate id", id, ids[id], doc['_id']
-    else:
-        ids[id] = doc['_id']
-
diff --git a/tools/dup-twitter-tags.py b/tools/dup-twitter-tags.py
deleted file mode 100644
index a4860711b70de80cced35dcdfd5006be631f5572..0000000000000000000000000000000000000000
--- a/tools/dup-twitter-tags.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/python
-# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
-
-# Copyright 2015 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import os
-import re
-from datetime import datetime
-import couchdb
-import hashlib
-import json
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-tags = options['tags']
-
-for d in db.view('ids/twitter', include_docs=True):
-    doc = d.doc
-    if 'we_tag' in doc:
-        # legacy single tag
-        continue
-    tags = doc['we_tags']
-    tagset = set(tags)
-    # if there are duplicates, update to only save unique tags
-    if len(tags) != len(tagset):
-        print tags,'<>',tagset
-        doc['we_tags'] = list(tagset)
-        db.save(doc)
-
-
diff --git a/tools/dupcheck.py b/tools/dupcheck.py
deleted file mode 100644
index c0080e28fb4ff7c25fdbcf25ee69585b8fcad640..0000000000000000000000000000000000000000
--- a/tools/dupcheck.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2012 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import couchdb
-import hashlib
-import json
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('options.json', 'rt'))
-couch = couchdb.Server(options['url'])
-db = couch[options['db'])
-
-tags = options['tags']
-services = ['twitter', 'identica']
-
-"""
-# dup by text content
-print '============== check by text content ============='
-for tag in tags:
-    print tag
-    for service in services:
-        print service
-        sums = {}
-        for id in db:
-            doc = db[id]
-            if doc['we_source'] == service and doc['we_tag'] == tag:
-                sha1 = hashlib.sha1(doc['text'].encode('utf-8')).hexdigest()
-                if sums.has_key(sha1):
-                    print 'duplicates:', sums[sha1], id
-                    if service == 'twitter':
-                        hashtags = doc['entities']['hashtags']
-                        print " ",
-                        for v in range(len(hashtags)):
-                            print hashtags[v]['text'],
-                        print
-                else:
-                    sums[sha1] = id
-"""
-
-# dup by timestamp
-print '============== check by timestamp ============='
-for tag in tags:
-    print tag
-    #for service in services:
-    for service in ['twitter']:
-        print service
-        stamps = {}
-        for id in db:
-            doc = db[id]
-            if doc['we_source'] == service and doc['we_tag'] == tag:
-                stamp = doc['we_timestamp']
-                if stamps.has_key(stamp):
-                    print 'duplicates:', stamps[stamp], id
-                    if service == 'twitter':
-                        hashtags = doc['entities']['hashtags']
-                        print " ",
-                        for v in range(len(hashtags)):
-                            print hashtags[v]['text'],
-                        print
-                else:
-                    stamps[stamp] = id
-
-
diff --git a/tools/dupcheckgplus.py b/tools/dupcheckgplus.py
deleted file mode 100644
index 92bb47bf7b559a5d65dc5384e0896cdb36563323..0000000000000000000000000000000000000000
--- a/tools/dupcheckgplus.py
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2012 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import os
-from datetime import datetime
-import couchdb
-import hashlib
-import json
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-
-tags = options['tags']
-services = ['g+']
-
-"""
-# dup by text content
-print '============== check by text content ============='
-for tag in tags:
-    print tag
-    for service in services:
-        print service
-        sums = {}
-        for id in db:
-            doc = db[id]
-            if doc['we_source'] == service and doc['we_tag'] == tag:
-                sha1 = hashlib.sha1(doc['text'].encode('utf-8')).hexdigest()
-                if sums.has_key(sha1):
-                    print 'duplicates:', sums[sha1], id
-                    if service == 'twitter':
-                        hashtags = doc['entities']['hashtags']
-                        print " ",
-                        for v in range(len(hashtags)):
-                            print hashtags[v]['text'],
-                        print
-                else:
-                    sums[sha1] = id
-"""
-
-"""
-# dup by timestamp
-print '============== check by timestamp ============='
-for tag in tags:
-    print tag
-    #for service in services:
-    for service in ['twitter']:
-        print service
-        stamps = {}
-        for id in db:
-            doc = db[id]
-            if doc['we_source'] == service and doc['we_tag'] == tag:
-                stamp = doc['we_timestamp']
-                if stamps.has_key(stamp):
-                    print 'duplicates:', stamps[stamp], id
-                    if service == 'twitter':
-                        hashtags = doc['entities']['hashtags']
-                        print " ",
-                        for v in range(len(hashtags)):
-                            print hashtags[v]['text'],
-                        print
-                else:
-                    stamps[stamp] = id
-"""
-
-# dup by gplus id
-print '============== check by gplus-id ============='
-
-gids = {}
-
-for g in db.view('ids/google', include_docs=True):
-    doc = g.doc
-    gid = doc['id']
-    ourid = g['id']
-    # if it hasn't been deleted, remember it
-    if not doc.has_key('we_d'):
-        if gids.has_key(gid):
-            gids[gid].append(ourid)
-        else:
-            gids[gid] = [ourid]
-
-count = 0
-for gid, ids in gids.items():
-    count += 1
-    if len(ids) > 1:
-        print gid, ids
-        for id in ids[1:]:
-            print "del", id
-            doc = db[id]
-            doc['we_d'] = True
-            doc['we_d_by'] = 'dupbot'
-            doc['we_d_at'] = datetime.utcnow().strftime(
-                    "%Y-%m-%dT%H:%M:%SZ")
-            db[id] = doc
-print count
diff --git a/tools/makefeed.py b/tools/makefeed.py
deleted file mode 100644
index d7d43d088ae0a6d5cac4cbe12c0b6a51ff3297cb..0000000000000000000000000000000000000000
--- a/tools/makefeed.py
+++ /dev/null
@@ -1,139 +0,0 @@
-#!/usr/bin/python
-
-## create an Atom feed of WEnotes for a tag
-#
-# 20130712 jim@OERfoundation.org
-#
-# License: MIT
-
-import re
-import os
-import json
-import copy
-from datetime import datetime
-import couchdb
-from xml.etree.ElementTree import Element, SubElement, Comment, tostring
-import xml.dom.minidom
-
-item_count = 10
-dest = '/home/www/oeru/ucan/EDEM630/wenotes.atom'
-
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['url'])
-db = couch[options['db']]
-tag = 'EDEM630'
-tag_specific = True    # make tag-specific feeds?
-
-def prettify(x):
-    xm = xml.dom.minidom.parseString(tostring(x))
-    return xm.toprettyxml()
-
-def children(parent, elements):
-    for (e, t) in elements:
-        el = SubElement(parent, e)
-        el.text = t
-
-def canonical(d):
-    r = copy.deepcopy(d)
-    source = d['we_source']
-    user = d.get('user', d.get('from_user', ''))
-    r['user'] = user
-    if source == 'wikieducator':
-        r['profileURL'] = 'http://WikiEducator.org/User:' + user
-    elif source == 'twitter':
-        r['profileURL'] = 'http://twitter.com/' + user
-    elif source == 'identica':
-        pass
-    elif source == 'g+':
-        actor = d['actor']
-        r['text'] = d['object']['content']
-        r['user'] = ''
-        r['from_user_name'] = actor['displayName']
-        r['profileURL'] = actor['url'].replace('https://', 'http://')
-        r['profileIMG'] = actor['image']['url'].replace('https://', 'http://')
-        r['we_link'] = d['object']['url']
-    elif source == 'feed':
-        r['profileURL'] = d['profile_url']
-        if r['profileURL'] == '' and d.get('gravatar', '') <> '':
-            r['profileURL'] = 'http://gravatar.com/' + d['gravatar']
-        r['profileIMG'] = d['profile_image_url']
-        if r['profileIMG'] == '' and d.get('gravatar', '') <> '':
-            r['profileIMG'] = 'http://gravatar.com/avatar/' + d['gravatar'] \
-                    + '?d=identicon'
-    elif source == 'ask':
-        pass
-    elif source == 'moodle':
-        pass
-    r['text'] = re.sub(r'<[^>]*>', '', r['text'])
-    return r
-
-now = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
-
-root = Element('feed')
-root.set('xmlns', 'http://www.w3.org/2005/Atom')
-root.set('xml:lang', 'en')
-root.set('xml:base', 'http://OERuniversity.org')
-root.set('xmlns:gd', 'http://schemas.google.com/g/2005')
-
-children(root, [
-    ('title', 'OERu UCan EDEM630 WEnotes'),
-    ('id', 'http://wikieducator.org/Scenario_planning_for_educators/EDEM630'),
-    ('updated', now),
-    ('generator', 'WEnotes 0.1.0'),
-    ])
-link = SubElement(root, 'link')
-link.set('href', 'http://wikieducator.org/Scenario_planning_for_educators/EDEM630')
-linkself = SubElement(root, 'link')
-linkself.set('rel', 'self')
-linkself.set('href', 'http://UCan.OERuniversity.org/EDEM630/wenotes.atom')
-
-taglc = tag.lower()
-mentions = db.view('messages/tag_time', startkey=[taglc, '2099-12-31T00:00:00.000Z'], endkey=[taglc, '2011-01-01T00:00:00.000Z'], descending=True, include_docs=True)
-for item in mentions:
-    if item.has_key('we_d'):
-        continue
-    doc = canonical(item.doc)
-    entry = SubElement(root, 'entry')
-    source = doc['we_source']
-    if source == 'feed':
-        source = 'blog'
-    title = "%s (%s)" % (doc['from_user_name'], source)
-    children(entry, [
-        ('id', doc['we_link']),
-        ('title', title),
-        ('updated', doc['we_timestamp']),
-        ('summary', doc['text']),
-        ('content', doc['text'])
-        ])
-    link = SubElement(entry, 'link')
-    link.set('href', doc['we_link'])
-    linkalt = SubElement(entry, 'link')
-    linkalt.set('rel', 'alternate')
-    linkalt.set('type', 'text/html')
-    linkalt.set('href', doc['we_link'])
-
-    author = SubElement(entry, 'author')
-    children(author, [
-        ('name', doc['from_user_name']),
-        ('uri', doc['profileURL']),
-        ('email', 'noreply@OERuniversity.org'),
-        ])
-    image = SubElement(author, 'gd:image')
-    image.set('rel', 'http://schemas.google.com/g/2005#thumbnail')
-    image.set('src', doc['profileIMG'])
-
-    category = SubElement(entry, 'category')
-    category.set('term', 'EDEM630')
-
-    item_count -= 1
-    if item_count <= 0:
-        break
-
-#print prettify(root)
-open(dest, 'wt').write('<?xml version="1.0" encoding="utf-8"?>\n'
-        + tostring(root))
-
diff --git a/tools/opml.py b/tools/opml.py
deleted file mode 100644
index 04e7748603581718a17a346edf39dbabd6829451..0000000000000000000000000000000000000000
--- a/tools/opml.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/python
-
-## create an OPML file for the registered blogs in the course
-#
-# 20130710 jim@OERfoundation.org
-#
-# License: MIT
-
-import re
-import os
-import json
-from datetime import datetime
-import couchdb
-from xml.etree.ElementTree import Element, SubElement, Comment, tostring
-import xml.dom.minidom
-
-dest = '/home/www/oeru/ucan/EDEM630/blogs.xml'
-
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['url'])
-dbfeeds = couch[options['dbfeeds']]
-tag = 'EDEM630'
-tag_specific = True    # make tag-specific feeds?
-
-def prettify(x):
-    xm = xml.dom.minidom.parseString(tostring(x))
-    return xm.toprettyxml()
-
-def children(parent, elements):
-    for (e, t) in elements:
-        el = SubElement(parent, e)
-        el.text = t
-
-now = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S GMT")
-
-root = Element('opml')
-root.set('version', '2.0')
-
-head = SubElement(root, 'head')
-children(head, [
-    ('title', 'UCan EDEM630 Blogs'),
-    ('dateCreated', now)
-    ])
-
-body = SubElement(root, 'body')
-blogs = SubElement(body, 'outline')
-blogs.set('text', 'EDEM630 Blogs')
-#blogs.set('title', 'EDEM630 Blogs')
-
-active = dbfeeds.view('feed/activebytag', include_docs=True)
-for feed in active[tag.lower()]:
-    b = SubElement(blogs, 'outline')
-    b.set('text', feed.doc['from_user_name'])
-    b.set('type', 'rss')
-    if (feed.doc['type'] == 'gplus'):
-        url = 'http://gplus-to-rss.appspot.com/rss/%s' % feed.doc['url']
-    else:
-        url = feed.doc['url']
-        # use special case knowledge to find tag specific feeds
-        if tag_specific:
-            if re.search(r'\.((blogger)|(blogspot))\.co', url):
-                if url[-1] <> '/':
-                    url += '/'
-                url = url + '-/%s/' % tag
-            else:
-                mo = re.match(r'(?P<site>.*?\.wordpress\.com/)feed.*', url)
-                if mo:
-                    url = mo.group('site') + 'tag/%s/feed' % tag
-    b.set('xmlUrl', url)
-    b.set('htmlUrl', feed.doc['web'])
-
-print prettify(root)
-open(dest, 'wt').write('<?xml version="1.0" encoding="utf-8"?>\n'
-        + tostring(root))
-
diff --git a/tools/sheet-to-feeds.py b/tools/sheet-to-feeds.py
deleted file mode 100644
index f6792e4de7039eed7decba75f884b1f396aa20c2..0000000000000000000000000000000000000000
--- a/tools/sheet-to-feeds.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/python
-
-# use a Google Sheet to set up feeds to harvest
-#  sheet-to-feeds.py GOOGLE-SHEETS-URL (readable by bot)
-
-import os
-import sys
-import json
-from hashlib import md5
-import urllib2
-from bs4 import BeautifulSoup
-import couchdb
-import gspread
-
-STARTROW = 1    # 0 indexed
-SETTAGS = ['ds4oer']
-
-SHEET_FULL_NAME = 1     # column indicies
-SHEET_EMAIL = 2
-SHEET_USER = 3
-SHEET_URL = 4
-
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['localcouch'])
-db = couch[options['dbfeeds']]
-
-print sys.argv[1]
-gc = gspread.login(options['gdata']['user'], options['gdata']['pass'])
-wks = gc.open_by_url(sys.argv[1]).sheet1
-
-sheet = wks.get_all_values()
-print sheet
-for row in sheet[STARTROW:]:
-    web = row[SHEET_URL]
-    page = urllib2.urlopen(web).read()
-    soup = BeautifulSoup(page)
-    url = ''
-    freq = 0
-    for l in soup.find_all('link', attrs={
-        'rel': 'alternate',
-        'type': 'application/rss+xml'}):
-        url = l['href']
-        freq = 1
-        break
-
-    doc = {
-            'freq':                 freq,
-            'from_user':            row[SHEET_USER],
-            'from_user_name':       row[SHEET_FULL_NAME],
-            'gravatar':             md5(row[SHEET_EMAIL].lower().strip()).hexdigest(),
-            'last_successful':      '2015-01-01T00:00:00.000Z',
-            'last_updated':         '2015-01-01T00:00:00.000Z',
-            'profile_image_url':    '',
-            'profile_image_width':  48,
-            'profile_url':          'http://WikiEducator.org/User:' + \
-                    row[SHEET_USER].replace(' ', '_'),
-            'tags':                 SETTAGS,
-            'type':                 'feed',
-            'url':                  url,
-            'we_tags':              '',
-            'web':                  row[SHEET_URL]
-            }
-    print doc
-    db.save(doc)
-    print
-
diff --git a/tools/text.py b/tools/text.py
deleted file mode 100644
index 7855043b1a71ba6ddcce42877ab8e7cfca12e8b3..0000000000000000000000000000000000000000
--- a/tools/text.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/python
-
-""" Pull out WEnotes text for a tag for building wordcloud """
-
-# Copyright 2012 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import couchdb
-import json
-import re
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('options.json', 'rt'))
-couch = couchdb.Server(options['localcouch'])
-db = couch[options['db']]
-
-f = open('ocl4ed.txt', 'wt')
-results = db.view('messages/tag_time', include_docs=True)
-for row in results[['ocl4ed', '2012-10-01']:['ocl4ed', '2099-12-31']]:
-    source = row.doc['we_source']
-    text = row.doc['text']
-    if source == 'twitter' or source == 'wikieducator' or source == 'ask':
-        user = row.doc['from_user']
-        username = row.doc['from_user_name']
-    elif source == 'identica':
-        user = row.doc['user']['screen_name']
-        username = row.doc['user']['name']
-    elif source == 'moodle':
-        user = row.doc['from_user_name']
-        username = user
-        if row.doc['truncated']:
-            text = text[:-3]
-    else:
-        print '===== unknown source'
-        for k,v in row.doc.items():
-            print '    %s: %s' % (k, v)
-    # strip out our hashtag
-    text = re.sub(r'(?i)#ocl4ed', '', text)
-    # strip out retweets
-    text = re.sub(r'(?i)rt:?\b', '', text)
-    # strip out addressees
-    text = re.sub(r'@\S+', '', text)
-    # strip out URLs
-    text = re.sub(r'(?i)http(s?)://\S+', '', text)
-    # strip out elipsis
-    text = text.replace('...', ' ')
-    text = text.replace('. . .', ' ')
-    f.write(text.encode('utf-8'))
-    f.write("\n")
-f.close()
-
diff --git a/tools/user-outputs.py b/tools/user-outputs.py
deleted file mode 100644
index edc5b75f42ae5b43b2fd7e17bf70a8822d8b6c6e..0000000000000000000000000000000000000000
--- a/tools/user-outputs.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/python
-
-## update Google spreadsheet with user participation metrics
-#  derived from the WEnotes aggregate feed
-#
-# 2013      jim@OERfoundation.org
-
-from __future__ import print_function
-import re
-import os
-import sys
-import json
-import urllib2
-import ConfigParser
-import gdata.spreadsheet.service
-import couchdb
-
-VERSION = '0.0.2'
-
-if len(sys.argv) <> 2:
-    sys.exit("Usage: user-outputs.py SPREADSHEET_KEY")
-gskey = sys.argv[1]
-
-options = json.load(open(os.path.join(
-    os.path.dirname(os.path.abspath(__file__)),
-    os.path.pardir,
-    'options.json'), 'rt'))
-
-couch = couchdb.Server(options['localcouch'])
-db = couch['mentions']
-
-# outputs is a dictionary indexed by user name
-#     of dictionaries of source: count
-outputs = {}
-for row in db.view('user/postcount', group_level=2):
-    source = row.key[0]
-    user = row.key[1]
-    if outputs.has_key(user):
-        outputs[user][source] = row.value
-    else:
-        outputs[user] = {source: row.value}
-print(outputs)
-print('---------------------------------------------------')
-
-# get login credentials from ~/.wikieducator.rc
-config = ConfigParser.SafeConfigParser()
-config.read(os.path.expanduser('~/.wikieducator.rc'))
-try:
-    guser = config.get('google', 'user')
-    gpassword = config.get('google', 'password')
-except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
-    print("missing user/password in ~/.wikieducator.rc", file=sys.stderr)
-
-# read the existing spreadsheet
-gs = gdata.spreadsheet.service.SpreadsheetsService()
-gs.email = guser
-gs.password = gpassword
-gs.source = 'user_outputs'
-gs.ProgrammaticLogin()
-gsws = 'od6'
-
-for entry in gs.GetListFeed(gskey, gsws).entry:
-    sr = dict(zip(entry.custom.keys(),
-        [value.text for value in entry.custom.values()]))
-
-    changed = False
-
-    # blogposts and WEnotes are indexed by wikiname
-    wikiname = sr['wikiname']
-    if outputs.has_key(wikiname):
-        uouts = outputs[wikiname]
-        if uouts.has_key('feed'):
-            sr['blogposts'] = str(uouts['feed'])
-            changed = True
-        if uouts.has_key('wikieducator'):
-            sr['wenotes'] = str(uouts['wikieducator'])
-            changed = True
-    # twitter
-    if sr['twitter'] is not None:
-        twitterhandle = sr['twitter'].lower()
-        if outputs.has_key(twitterhandle):
-            sr['tweets'] = str(outputs[twitterhandle]['twitter'])
-            changed = True
-    # g+
-    if sr['gplus'] is not None:
-        mo = re.search(r'(?P<gpid>\d{20,21})', sr['gplus'])
-        if mo:
-            gpid = mo.group('gpid')
-            if outputs.has_key(gpid):
-                sr['gposts'] = str(outputs[gpid]['g+'])
-                changed = True
-    if changed:
-        gs.UpdateRow(entry, sr)
-
diff --git a/tools/util.py b/tools/util.py
deleted file mode 100644
index cb403a28c960e65e81b3169f2e06973120f91268..0000000000000000000000000000000000000000
--- a/tools/util.py
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/usr/bin/python
-
-# Copyright 2012 Open Education Resource Foundation
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-import couchdb
-import hashlib
-import json
-
-# retrieve URL including authentication credentials from config JSON
-options = json.load(open('options.json', 'rt'))
-couch = couchdb.Server(options['url'])
-db = couch[options['db'])
-
-for id in db:
-    doc = db[id]
-    if doc['we_source'] == 'twitter' and doc['we_tag'] == 'oeru':
-        hashtags = doc['entities']['hashtags']
-        print id, hashlib.sha1(doc['text'].encode('utf-8')).hexdigest()
-        print " ",
-        for v in range(len(hashtags)):
-            print hashtags[v]['text'],
-        print
-
-