Fetching Intraday data from Google using Python
All credits to RMIKE
Here is an yet another interesting python tutorial to fetch intraday data using Google Finance API , store the data in csv format and also plot the intraday data as candlestick format. We are using plotly library for plotting candlestick charts and pandas to manage time-series data. Luckily found and interesting python code which fetches google intraday data and store in csv format. Done some little modification in the code (exchange added to the Google API) so that one can fetch data for any exchange.
In our example we try to retrieve the data (Date,Time,Symbol,Open,High,Low,Close,Volume data) for RCOM (Reliance Communication) and plot as candlesticks using plotly library. Download the sample RCOM CSV file fetched from Google Finance
Sample IPython Notebook using Plotly and pandas to plot Interactive Intraday Candlestick Charts using Google Finance API :
Here is an yet another interesting python tutorial to fetch intraday data using Google Finance API , store the data in csv format and also plot the intraday data as candlestick format. We are using plotly library for plotting candlestick charts and pandas to manage time-series data. Luckily found and interesting python code which fetches google intraday data and store in csv format. Done some little modification in the code (exchange added to the Google API) so that one can fetch data for any exchange.
In our example we try to retrieve the data (Date,Time,Symbol,Open,High,Low,Close,Volume data) for RCOM (Reliance Communication) and plot as candlesticks using plotly library. Download the sample RCOM CSV file fetched from Google Finance
Sample IPython Notebook using Plotly and pandas to plot Interactive Intraday Candlestick Charts using Google Finance API :
In [1]:
import requests.packages.urllib3
requests.packages.urllib3.disable_warnings()
In [2]:
import plotly
plotly.__version__
Out[2]:
'1.9.0'
Code to Fetch Google Intrday Data and Save in CSV Format
In [7]:
# Copyright (c) 2011, Mark Chenoweth
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
#
# - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import urllib,time,datetime
import pandas as pd
class Quote(object):
DATE_FMT = '%Y-%m-%d'
TIME_FMT = '%H:%M:%S'
def __init__(self):
self.symbol = ''
self.date,self.time,self.open_,self.high,self.low,self.close,self.volume = ([] for _ in range(7))
def append(self,dt,open_,high,low,close,volume):
self.date.append(dt.date())
self.time.append(dt.time())
self.open_.append(float(open_))
self.high.append(float(high))
self.low.append(float(low))
self.close.append(float(close))
self.volume.append(int(volume))
def to_csv(self):
return ''.join(["{0},{1},{2},{3:.2f},{4:.2f},{5:.2f},{6:.2f},{7}\n".format(self.symbol,
self.date[bar].strftime('%Y-%m-%d'),self.time[bar].strftime('%H:%M:%S'),
self.open_[bar],self.high[bar],self.low[bar],self.close[bar],self.volume[bar])
for bar in xrange(len(self.close))])
def write_csv(self,filename):
with open(filename,'w') as f:
f.write(self.to_csv())
def read_csv(self,filename):
self.symbol = ''
self.date,self.time,self.open_,self.high,self.low,self.close,self.volume = ([] for _ in range(7))
for line in open(filename,'r'):
symbol,ds,ts,open_,high,low,close,volume = line.rstrip().split(',')
self.symbol = symbol
dt = datetime.datetime.strptime(ds+' '+ts,self.DATE_FMT+' '+self.TIME_FMT)
self.append(dt,open_,high,low,close,volume)
return True
def __repr__(self):
return self.to_csv()
class GoogleIntradayQuote(Quote):
''' Intraday quotes from Google. Specify interval seconds and number of days '''
def __init__(self,symbol,interval_seconds=300,num_days=5):
super(GoogleIntradayQuote,self).__init__()
self.symbol = symbol.upper()
url_string = "http://www.google.com/finance/getprices?q={0}".format(self.symbol)
url_string += "&x=NSE&i={0}&p={1}d&f=d,o,h,l,c,v".format(interval_seconds,num_days)
csv = urllib.urlopen(url_string).readlines()
for bar in xrange(7,len(csv)):
if csv[bar].count(',')!=5: continue
offset,close,high,low,open_,volume = csv[bar].split(',')
if offset[0]=='a':
day = float(offset[1:])
offset = 0
else:
offset = float(offset)
open_,high,low,close = [float(x) for x in [open_,high,low,close]]
dt = datetime.datetime.fromtimestamp(day+(interval_seconds*offset))
self.append(dt,open_,high,low,close,volume)
if __name__ == '__main__':
q = GoogleIntradayQuote('RCOM',300,30)
#print q # print it out
q.write_csv('c://data//rcom.csv')
In [4]:
dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
df = pd.read_csv('c://data//rcom.csv',sep=',',header=None, parse_dates={'datetime': [1, 2]}, date_parser=dateparse)
df.columns = ['Datetime', 'Symbol','Open','High','Low','Close','Volume']
#df.index = df['Datetime']
#df.index.name = None
df.head(5)
Out[4]:
Datetime | Symbol | Open | High | Low | Close | Volume | |
---|---|---|---|---|---|---|---|
0 | 2015-10-14 09:20:00 | RCOM | 77.80 | 78.50 | 77.60 | 78.40 | 552244 |
1 | 2015-10-14 09:25:00 | RCOM | 78.40 | 79.05 | 78.30 | 78.85 | 546950 |
2 | 2015-10-14 09:30:00 | RCOM | 78.75 | 78.85 | 78.25 | 78.25 | 223054 |
3 | 2015-10-14 09:35:00 | RCOM | 78.30 | 78.50 | 78.25 | 78.35 | 125523 |
4 | 2015-10-14 09:40:00 | RCOM | 78.40 | 78.65 | 78.35 | 78.55 | 105811 |
In [5]:
from datetime import date
import plotly.plotly as py
from plotly.tools import FigureFactory as FF
from datetime import datetime
In [9]:
fig = FF.create_candlestick(df.Open, df.High, df.Low, df.Close, dates=df.index)
fig['layout'].update({
'title': 'RCOM Intraday Charts',
'yaxis': {'title': 'RCOM Stock'}})
py.iplot(fig, filename='finance/intraday-candlestick', validate=False)
The draw time for this plot will be slow for all clients.
Out[9]: