Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python
"""
This little script presents new heise-news-articles individually by title
and asks if it should download the corresponding mp3-file.
"""

import threading
import Queue
import os
import feedparser
from urllib import urlretrieve
#-----------------------------------------------------------------------------#
n_threads
= 10
feed_url
= "http://www.heise.de/newsticker/heise.rdf"
left_link
= "http://www.heise.de/fastbin/audio_download" \
   
"?meldung=http://www.heise.de/newsticker/meldung/"
try:
    archive_filename
= "%s/.heise" % os.environ["HOME"]
except KeyError:
    archive_filename
= "%s%sheise_archive" % (os.environ["HOMEPATH"], os.sep)
#-----------------------------------------------------------------------------#
class Downloader(threading.Thread):
   
""" Class for worker-threads that download files. Don't tell Marx! """
   
def __init__(self, links_filenames):
        threading
.Thread.__init__(self)
       
self.setDaemon(True)
       
self.links_filenames = links_filenames
       
self.start()
   
#-------------------------------------------------------------------------#
   
def run(self):
       
while True:
            link
, filename = self.links_filenames.get()
            urlretrieve
(link, filename)
           
self.links_filenames.task_done()
#-----------------------------------------------------------------------------#
class Archive(object):
   
def __init__(self):
        feed
= feedparser.parse(feed_url)
       
try:
            archive_file
= open(archive_filename)
            old_links
= archive_file.readlines()
           
self.old_links = [link.strip() for link in old_links]
            archive_file
.close()
       
except IOError:
           
self.old_links = []
       
self.entries_i = range(len(feed["entries"]))
       
self.feed_links = [feed["entries"][entry_i]["link"].encode("utf-8")
                           
for entry_i in self.entries_i]
       
self.feed = feed
   
#-------------------------------------------------------------------------#
   
def get_new_entries(self):
        new_links
= [link for link in self.feed_links
                     
if link not in self.old_links]
        titles
= [self.feed["entries"][entry_i]["title"].encode("utf-8")
                 
for entry_i in self.entries_i
                 
if self.feed["entries"][entry_i]["link"].encode("utf-8")
                 
in new_links]
       
# the article_id is in the link between "meldung/" and "/from"
        article_ids
= [link.split("meldung/")[1].split("/from")[0]
                       
for link in new_links]
       
return new_links, titles, article_ids
   
#-------------------------------------------------------------------------#
   
def store(self):
        archive_file
= open(archive_filename, "w")
        archive_file
.writelines("\n".join(self.feed_links))
        archive_file
.close()
#-----------------------------------------------------------------------------#
def prepare_workers():
    links_filenames
= Queue.Queue()
   
return [Downloader(links_filenames) for ii in range(n_threads)][0]
#-----------------------------------------------------------------------------#
def start_download(link, title, id, downloader):
   
for bad, good in zip(("/", ":", " ", '"', "?"), ("", "", "_", "", "")):
        title
= title.replace(bad, good)
    filename
= "heise_%s_%s.mp3" % (id, title)
    mp3_link
= left_link + id
    downloader
.links_filenames.put((mp3_link, filename))
#-----------------------------------------------------------------------------#
if __name__ == "__main__":
    downloader
= prepare_workers()

    feed_archive
= Archive()
    links
, titles, ids = feed_archive.get_new_entries()

   
for link, title, id in zip(links, titles, ids):
        download_yn
= None
       
while download_yn != "y" and download_yn != "n" and download_yn != "c":
           
print title
            download_yn
= raw_input('Download mp3? (y/[n]/c)')
           
if download_yn == "":
                download_yn
= "n"
       
if download_yn == "y":
            start_download
(link, title, id, downloader)
       
if download_yn == "c":
           
break

   
if links:
       
print "Waiting for downloads to end..."
        downloader
.links_filenames.join()
        feed_archive
.store()

History