ActiveState Code

Recipe 576495: get a stock historical value from google finance


how to get a stock historical value from google finance

Python
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import urllib2
import re
from table_parser import *


"""
By bussiere bussiere @at gmail.com
thanks to :
http://simbot.wordpress.com/2006/05/17/html-tables-parsed-using-python/
Nigel Sim <nigel.sim @at gmail.com>
http://simbot.wordpress.com

"""

__Author__ ="bussiere"
__Email__ = "bussiere @at gmail.com"
__Titre__ = "get some value with google finance"
__Description__ = "get the historical value on google finance"
__Discussion__ = "A beginnig for a stock analyze program."
__Tags__ ="google fiance stock value historical"

def get_finance(value):
    #we make a url for google finance with the value given
    link = "http://finance.google.com/finance?q=%s"%value
    #we open this url
    page = urllib2.urlopen(link).read()
    #we find where is the hisctorical link
    findhistorical = re.findall("/finance/historical\?q.*\"", page)
    print findhistorical
    #we substract the " at the end of the string
    findhistorical = findhistorical[0].replace('\"','')
    #we make a link for the historical page of the value
    histlink = "http://finance.google.com%s"%findhistorical
    #we open the historical page
    hist = urllib2.urlopen(histlink).read()
    #we find the link for getting the data in csv mode
    findcsv = re.findall('http://finance.*csv',hist)
    #we open the link
    csv = ''
    try :
        #we try to get the csv file if existent
        csv = urllib2.urlopen(findcsv[0]).read()
    except :
        #else we parse the google finance page with table parser
        findcsv = re.findall('<div id=prices>.*?</table>',hist,re.S)
        p = TableParser()
        p.feed(findcsv[0])
        csv = p.doc
    #we return the csv data for the value
    return csv
    
    

def main(argv=None):
    # we get the argument passed on the command line
    value =  sys.argv[1]
    print get_finance(value)








if __name__ == "__main__":
    import sys
    #we call the main function
    sys.exit(main())

Discussion

the beginnig of a stock value analysys program

Comments

  1. 1. At 3:17 a.m. on 15 sep 2008, brano sobotka said:

    Nice but where table_parser came from?

  2. 2. At 8:18 p.m. on 20 sep 2008, Michele Bertoldi said:

    I used the csv output from http://ichart.finance.yahoo.com/table.csv: here a multithreaded example:

    import urllib
    from datetime import datetime
    from threading import Thread
    from Queue import Queue
    
    base_url="http://ichart.finance.yahoo.com/table.csv?"
    
    def get_historical(symbols,start=None,end=None,threads=0):
        if isinstance(symbols,str):
            return get_historical_single(symbols,start,end)
        quotes={}
        if threads:
            def quoter(): 
                while True: 
                    data = q.get()
                    quotes[data[0]]=get_historical_single(data[0],data[1],data[2])
                    q.task_done()
            q = Queue() 
            for i in range(threads): 
                 t = Thread(target=quoter)
                 t.setDaemon(True)
                 t.start() 
            for sym in symbols: q.put((sym,start,end))
            q.join()
        else:
            for sym in symbols:
                quotes[sym]=get_historical_single(sym,start,end)
        return quotes
    
    def get_historical_single(symbol,start=None,end=None):
        full_url=base_url+"&s="+symbol
        if start:
            full_url+="&a=%i&b=%i&c=%i"%(start.month-1,start.day,start.year)
        if end:
            full_url+="&d=%i&e=%i&f=%i"%(end.month-1,end.day,end.year)
        full_url+="&g=d"
        quotes={}
        quotes['raw']=[]
        quotes['by_date']={}
        quotes['dates']=[]
        quotes['opens']=[]
        quotes['highs']=[]
        quotes['lows']=[]
        quotes['closes']=[]
        quotes['volumes']=[]
        quotes['adjusted_closes']=[]
        quotes_lines=urllib.urlopen(actual_url).read().split('\n')[1:-1]
        for quote_line in quotes_lines:
            #quote_line structure: Date,Open,High,Low,Close,Volume,Adj Close
            splt_q=quote_line.split(',')
            date=datetime(*(map(int,splt_q[0].split('-'))))
            op=float(splt_q[1])
            hi=float(splt_q[2])
            lo=float(splt_q[3])
            close=float(splt_q[4])
            vol=int(splt_q[5])
            adj_close=float(splt_q[6])
            quote=dict(date=date,open=op,high=hi,low=lo,close=close,volume=vol,adj_close=adj_close)
            quotes['raw'].append(quote)
            quotes['by_date'][date]=quote
            quotes['dates'].append(date)
            quotes['opens'].append(op)
            quotes['highs'].append(hi)
            quotes['lows'].append(lo)
            quotes['closes'].append(close)
            quotes['volumes'].append(volume)
            quotes['adjusted_closes'].append(adj_close)
        return quotes
    
    if __name__ == '__main__':
        start_date=datetime(2005,1,1)
        symbols=['F.MI','AAPL','IBM','GOOG']
        quotes=get_historical(symbols,start_date=start_date,threads=4)
        for k in symbols:
            print '%s: %i quotes'%(k,len(quotes[k]['closes']))
    
  3. 3. At 2:50 p.m. on 30 sep 2008, sebastien.renard said:

    Look at itrade (written in Python) that have lots of financial interface. It also supports proxy and handle timeout connexion.

    http://itrade.sf.net

Sign in to comment