Recipe 511444 revision 1 « ActiveState Code

#! /usr/bin/env python
# -*- coding: utf-8 -*-


__author__ = 'gian paolo ciceri <gp.ciceri@gmail.com>'
__version__ = '0.1'
__date__ = '20070401'
__credits__ = "queue and MT code was shamelessly stolen from pycurl example retriever-multi.py"

#
# Usage: python grabYahooDataMt.py -h 
#
#
# for selecting tickers and starting date it uses an input file of this format 
# <ticker> <fromdate as YYYYMMDD>
# like
# ^GSPC 19500103 # S&P 500
# ^N225 19840104 # Nikkei 225

import sys, threading, Queue, datetime
import urllib
from optparse import OptionParser


# this thread ask the queue for job and does it!
class WorkerThread(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self.queue = queue

    def run(self):
        while 1:
            try:
                # fetch a job from the queue
                ticker, fromdate, todate = self.queue.get_nowait()
            except Queue.Empty:
                raise SystemExit
            if ticker[0] == "^": 
                tick = ticker[1:]
            else:
                tick = ticker
            filename = downloadTo + "%s_%s.csv" % (tick, todate)
            fp = open(filename, "wb")
            if options.verbose:
                print "last date asked:", todate, todate[0:4], todate[4:6], todate[6:8] 
                print "first date asked:", fromdate, fromdate[0:4], fromdate[4:6], fromdate[6:8]
            quote = dict()
            quote['s'] = ticker
            quote['d'] = str(int(todate[4:6]) - 1)
            quote['e'] = str(int(todate[6:8]))
            quote['f'] = str(int(todate[0:4]))
            quote['g'] = "d" 
            quote['a'] = str(int(fromdate[4:6]) - 1)
            quote['b'] = str(int(fromdate[6:8]))
            quote['c'] = str(int(fromdate[0:4]))
            #print quote
            params = urllib.urlencode(quote)
            params += "&ignore=.csv"

            url = "http://ichart.yahoo.com/table.csv?%s" % params
            if options.verbose:
                print "fetching:", url           
            try:
                f = urllib.urlopen(url)
                fp.write(f.read())
            except:
                import traceback
                traceback.print_exc(file=sys.stderr)
                sys.stderr.flush()
            fp.close()
            if options.verbose:
                print url, "...fetched"
            else:
                sys.stdout.write(".")
                sys.stdout.flush()



if __name__ == '__main__':

    # today is
    today = datetime.datetime.now().strftime("%Y%m%d")
    
    # parse arguments
    parser = OptionParser()
    parser.add_option("-f", "--file", dest="tickerfile", action="store", default = "./tickers.txt",
    				  help="read ticker list from file, it uses ./tickers.txt as default")
    parser.add_option("-c", "--concurrent", type="int", dest="connections", default = 10, action="store",
    				  help="# of concurrent connections")
    parser.add_option("-d", "--dir", dest="downloadTo", action="store", default = "./rawdata/",
    				  help="save date to this directory, it uses ./rawdata/ as default")
    				  
    parser.add_option("-t", "--todate", dest="todate", default = today, action="store",
    				  help="most recent date needed")
    parser.add_option("-v", "--verbose",
    					  action="store_true", dest="verbose")
    parser.add_option("-q", "--quiet",
    					  action="store_false", dest="verbose")					 
    (options, args) = parser.parse_args()

    
    tickerfile = options.tickerfile
    downloadTo = options.downloadTo
    connections =  options.connections
    today = options.todate
    

    # get input list
    try:
    	tickers = open(tickerfile).readlines()
    except:
    	parser.error("ticker file %s not found" % (tickerfile,))
    	raise SystemExit
    
    
    # build a queue with (ticker, fromdate, todate) tuples
    queue = Queue.Queue()
    for tickerRow in tickers:
    	#print tickerRow
    	tickerRow = tickerRow.strip()
    	if not tickerRow or tickerRow[0] == "#":
    		continue
    	tickerSplit = tickerRow.split()
    	# ticker, fromdate, todate
    	queue.put((tickerSplit[0], tickerSplit[1], today))

    

    
    # Check args
    assert queue.queue, "no Tickers given"
    numTickers = len(queue.queue)
    connections = min(connections, numTickers)
    assert 1 <= connections <= 255, "too much concurrent connections asked"

    
    if options.verbose:
    	print "----- Getting", numTickers, "Tickers using", connections, "simultaneous connections -----"
    
    
    # start a bunch of threads, passing them the queue of jobs to do
    threads = []
    for dummy in range(connections):
    	t = WorkerThread(queue)
    	t.start()
    	threads.append(t)
    
    
    # wait for all threads to finish
    for thread in threads:
    	thread.join()
    sys.stdout.write("\n")
    sys.stdout.flush()
       	
    # tell something to the user before exiting
    if options.verbose:	   
    	print "all threads are finished - goodbye."
Recipe 511444 revision 1

History

Accounts

Code Recipes

Feedback & Information

ActiveState