#!/usr/bin/env python

# -*- coding: utf-8 -*-

import re
import sys
import os
import datetime
import PyRSS2Gen
import urllib
import xml.dom.minidom
from BeautifulSoup import BeautifulSoup

try:
    import cPickle as pickle
except:
    import pickle

# Configuration

package_filename = "packages.pickle"
rss_filename     = "pypi.xml"
amount_items     = 20

# Don't change anything below here.

pathname = os.path.dirname(sys.argv[0])
path     = os.path.abspath(pathname)

package_file = path + "/" + package_filename
rss_file     = path + "/" + rss_filename

# We load a file with a list of serialized objects.  
# These contain the list of packages from the last script execution.  
# If this file does not exist, we start our list from scratch.  

try:
    f_content_alt = open(package_file).read()
    old_packages  = pickle.loads(f_content_alt)
except:
    old_packages = set()
 
# Now we fetch the HTML page from the Python Packages Index
# to get a complete list of available packages.

content       = urllib.urlopen('http://pypi.python.org/pypi?:action=index').read()
soup          = BeautifulSoup(content)
curr_packages = set()

# From this HTML page we extract all package names and its description
# and fill them to our 'set' variable.

rows_odd  = soup.findAll('tr', 'odd')
rows_even = soup.findAll('tr', 'even')
rows      = rows_odd + rows_even

for row in rows:
    link = row.findAll('td')[0].contents
    matches = re.findall(r'<a href="/pypi/(.*)/.*">', str(link[0]))
    package = matches[0]
    _description = row.findAll('td')[1].contents
    if not _description:
        description = ""
    else:
        # If there are some problems with encodings,
        # we don't care about the description
        try:
            description = _description[0].decode('ascii')
        except:
            description = ""

    curr_packages.add((package, description))


# With this we compute the difference
# and get only the new packages.

new_packages = curr_packages - old_packages

list_new_packages = list(new_packages)
list_old_packages = list(old_packages)
list_old_packages.extend(list_new_packages)

# We only want the last x packages in our RSS feed.

list_rss_packages = list_old_packages[-amount_items:]
list_rss_packages.reverse()

# If there are no new packages, we don't touch anything.

if new_packages:

    item_list =[]

    for package in list_rss_packages:

        item_list.append(
           PyRSS2Gen.RSSItem(
             title = str(package[0]),
             link = "http://pypi.python.org/pypi/%s/" % package[0],
             description = str(package[1]),
           )
        )
    
    rss = PyRSS2Gen.RSS2(
       title = "New Python Packages @ http://pypi.python.org/pypi",
       link  = "http://pypi.python.org/pypi?:action=index",
       items = item_list,    
       description   = "The latest modules at the Python Package Index",
       lastBuildDate = datetime.datetime.now(),
          )    

    # Not really needed for our RSS feed, but we think it is nice
    # to have a pretty XML file which looks nice in a text editor also.

    rss_xml =  rss.to_xml()
    xml = xml.dom.minidom.parseString(rss_xml)
    pretty_xml = xml.toprettyxml()

    f = open(rss_file, "w")
    f.write(pretty_xml)
    f.close()

    # We save the list as a serialized object,
    # so the next script run can detect only new packages.

    old_packages = set(list_old_packages)
    content_alt = pickle.dumps(old_packages)

    f_content_alt = open(package_file, mode="w")
    f_content_alt.write(content_alt)
    f_content_alt.close()


