Welcome, guest | Sign In | My Account | Store | Cart

how to get a stock historical value from google finance

Python, 69 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import urllib2
import re
from table_parser import *


"""
By bussiere bussiere @at gmail.com
thanks to :
http://simbot.wordpress.com/2006/05/17/html-tables-parsed-using-python/
Nigel Sim <nigel.sim @at gmail.com>
http://simbot.wordpress.com

"""

__Author__ ="bussiere"
__Email__ = "bussiere @at gmail.com"
__Titre__ = "get some value with google finance"
__Description__ = "get the historical value on google finance"
__Discussion__ = "A beginnig for a stock analyze program."
__Tags__ ="google fiance stock value historical"

def get_finance(value):
    #we make a url for google finance with the value given
    link = "http://finance.google.com/finance?q=%s"%value
    #we open this url
    page = urllib2.urlopen(link).read()
    #we find where is the hisctorical link
    findhistorical = re.findall("/finance/historical\?q.*\"", page)
    print findhistorical
    #we substract the " at the end of the string
    findhistorical = findhistorical[0].replace('\"','')
    #we make a link for the historical page of the value
    histlink = "http://finance.google.com%s"%findhistorical
    #we open the historical page
    hist = urllib2.urlopen(histlink).read()
    #we find the link for getting the data in csv mode
    findcsv = re.findall('http://finance.*csv',hist)
    #we open the link
    csv = ''
    try :
        #we try to get the csv file if existent
        csv = urllib2.urlopen(findcsv[0]).read()
    except :
        #else we parse the google finance page with table parser
        findcsv = re.findall('<div id=prices>.*?</table>',hist,re.S)
        p = TableParser()
        p.feed(findcsv[0])
        csv = p.doc
    #we return the csv data for the value
    return csv
    
    

def main(argv=None):
    # we get the argument passed on the command line
    value =  sys.argv[1]
    print get_finance(value)








if __name__ == "__main__":
    import sys
    #we call the main function
    sys.exit(main())

the beginnig of a stock value analysys program

6 comments

brano sobotka 15 years, 7 months ago  # | flag

Nice but where table_parser came from?

Michele Bertoldi 15 years, 7 months ago  # | flag

I used the csv output from http://ichart.finance.yahoo.com/table.csv: here a multithreaded example:

import urllib
from datetime import datetime
from threading import Thread
from Queue import Queue

base_url="http://ichart.finance.yahoo.com/table.csv?"

def get_historical(symbols,start=None,end=None,threads=0):
    if isinstance(symbols,str):
        return get_historical_single(symbols,start,end)
    quotes={}
    if threads:
        def quoter(): 
            while True: 
                data = q.get()
                quotes[data[0]]=get_historical_single(data[0],data[1],data[2])
                q.task_done()
        q = Queue() 
        for i in range(threads): 
             t = Thread(target=quoter)
             t.setDaemon(True)
             t.start() 
        for sym in symbols: q.put((sym,start,end))
        q.join()
    else:
        for sym in symbols:
            quotes[sym]=get_historical_single(sym,start,end)
    return quotes

def get_historical_single(symbol,start=None,end=None):
    full_url=base_url+"&s="+symbol
    if start:
        full_url+="&a=%i&b=%i&c=%i"%(start.month-1,start.day,start.year)
    if end:
        full_url+="&d=%i&e=%i&f=%i"%(end.month-1,end.day,end.year)
    full_url+="&g=d"
    quotes={}
    quotes['raw']=[]
    quotes['by_date']={}
    quotes['dates']=[]
    quotes['opens']=[]
    quotes['highs']=[]
    quotes['lows']=[]
    quotes['closes']=[]
    quotes['volumes']=[]
    quotes['adjusted_closes']=[]
    quotes_lines=urllib.urlopen(actual_url).read().split('\n')[1:-1]
    for quote_line in quotes_lines:
        #quote_line structure: Date,Open,High,Low,Close,Volume,Adj Close
        splt_q=quote_line.split(',')
        date=datetime(*(map(int,splt_q[0].split('-'))))
        op=float(splt_q[1])
        hi=float(splt_q[2])
        lo=float(splt_q[3])
        close=float(splt_q[4])
        vol=int(splt_q[5])
        adj_close=float(splt_q[6])
        quote=dict(date=date,open=op,high=hi,low=lo,close=close,volume=vol,adj_close=adj_close)
        quotes['raw'].append(quote)
        quotes['by_date'][date]=quote
        quotes['dates'].append(date)
        quotes['opens'].append(op)
        quotes['highs'].append(hi)
        quotes['lows'].append(lo)
        quotes['closes'].append(close)
        quotes['volumes'].append(volume)
        quotes['adjusted_closes'].append(adj_close)
    return quotes

if __name__ == '__main__':
    start_date=datetime(2005,1,1)
    symbols=['F.MI','AAPL','IBM','GOOG']
    quotes=get_historical(symbols,start_date=start_date,threads=4)
    for k in symbols:
        print '%s: %i quotes'%(k,len(quotes[k]['closes']))
sebastien.renard 15 years, 6 months ago  # | flag

Look at itrade (written in Python) that have lots of financial interface. It also supports proxy and handle timeout connexion.

http://itrade.sf.net

angelina carrera 13 years, 8 months ago  # | flag
Hi,Whats the use of This function(def get_finance(value):).In below you are not call the function any where.From where you are get this table(table_parser).

http://www.samestock.com

John Deere 12 years, 5 months ago  # | flag

Hi,

I just copied and pasted Michele Bertoldi's logic but it gives me syntaxError on print '%s: %i quotes'%(k,len(quotes[k]['closes'])). Could anyone tell me why I am getting syntaxError ?

Thanks, jd

Michele Bertoldi 11 years ago  # | flag

John, this is the correct version:

import urllib
from datetime import datetime
from threading import Thread
from Queue import Queue

base_url="http://ichart.finance.yahoo.com/table.csv?"

def get_historical(symbols,start=None,end=None,threads=0):
    if isinstance(symbols,str):
        return get_historical_single(symbols,start,end)
    quotes={}
    if threads:
        def quoter(): 
            while True: 
                data = q.get()
                quotes[data[0]]=get_historical_single(data[0],data[1],data[2])
                q.task_done()
        q = Queue() 
        for i in range(threads): 
             t = Thread(target=quoter)
             t.setDaemon(True)
             t.start() 
        for sym in symbols: q.put((sym,start,end))
        q.join()
    else:
        for sym in symbols:
            quotes[sym]=get_historical_single(sym,start,end)
    return quotes

def get_historical_single(symbol,start=None,end=None):
    full_url=base_url+"&s="+symbol
    if start:
        full_url+="&a=%i&b=%i&c=%i"%(start.month-1,start.day,start.year)
    if end:
        full_url+="&d=%i&e=%i&f=%i"%(end.month-1,end.day,end.year)
    full_url+="&g=d"
    quotes={}
    quotes['raw']=[]
    quotes['by_date']={}
    quotes['dates']=[]
    quotes['opens']=[]
    quotes['highs']=[]
    quotes['lows']=[]
    quotes['closes']=[]
    quotes['volumes']=[]
    quotes['adjusted_closes']=[]
    quotes_lines=urllib.urlopen(full_url).read().split('\n')[1:-1]
    for quote_line in quotes_lines:
        #quote_line structure: Date,Open,High,Low,Close,Volume,Adj Close
        splt_q=quote_line.split(',')
        date=datetime(*(map(int,splt_q[0].split('-'))))
        op=float(splt_q[1])
        hi=float(splt_q[2])
        lo=float(splt_q[3])
        close=float(splt_q[4])
        volume=int(splt_q[5])
        adj_close=float(splt_q[6])
        quote=dict(date=date,open=op,high=hi,low=lo,close=close,volume=volume,adj_close=adj_close)
        quotes['raw'].append(quote)
        quotes['by_date'][date]=quote
        quotes['dates'].append(date)
        quotes['opens'].append(op)
        quotes['highs'].append(hi)
        quotes['lows'].append(lo)
        quotes['closes'].append(close)
        quotes['volumes'].append(volume)
        quotes['adjusted_closes'].append(adj_close)
    return quotes

if __name__ == '__main__':
    start_date=datetime(2005,1,1)
    symbols=['F.MI','AAPL','IBM','GOOG']
    quotes=get_historical(symbols,start=start_date,threads=4)
    for k in symbols:
        print '%s: %i quotes'%(k,len(quotes[k]['closes']))