Welcome, guest | Sign In | My Account | Store | Cart

Download daily comics from comics.com and ucomics/gocomics.com e.g. peanuts, dilbert, calvin & hobbes etc.

Python, 167 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
'''
comixGetter.py - a python script to download daily comics from ucomics and comics .com's
author: sami
date: mar 07
todo: a. check if file already exists, b. save with correct file extension
'''

import httplib, re

re_date = '[a-zA-Z]+,\s+(\d+)\s+([a-zA-Z]+)\s+(\d+).+'

dates = {'Jan':'01', 'Feb':'02', 'Mar':'03', 'Apr':'04', 'May':'05', 'Jun':'06', 
         'Jul':'07', 'Aug':'08', 'Sep':'09', 'Oct':'10', 'Nov':'11', 'Dec':'12'}

comics_dot_com_info = (
              ('peanuts', '/comics/', '.+(\/comics\/peanuts\/archive\/images\/peanuts\d+\.gif|\/comics\/peanuts\/archive\/images\/peanuts\d+\.jpg).+'),
              ('dilbert', '/comics/', '.+(\/comics\/dilbert\/archive\/images\/dilbert\d+\.gif|\/comics\/dilbert\/archive\/images\/dilbert\d+\.jpg).+'),
              ('bignate', '/comics/', '.+(\/comics\/bignate\/archive\/images\/bignate\d+\.gif|\/comics\/bignate\/archive\/images\/bignate\d+\.jpg).+'),
              ('drabble', '/comics/', '.+(\/comics\/drabble\/archive\/images\/drabble\d+\.gif|\/comics\/drabble\/archive\/images\/drabble\d+\.jpg).+'),
              ('franknernest', '/comics/', '.+(\/comics\/franknernest\/archive\/images\/franknernest\d+\.jpg).+'),
              ('monty', '/comics/', '.+(\/comics\/monty\/archive\/images\/monty\d+\.gif|\/comics\/monty\/archive\/images\/monty\d+\.jpg).+'),
              ('wizardofid', '/creators/', '.+(\/creators\/wizardofid\/archive\/images\/wizardofid\d+\.gif|\/creators\/wizardofid\/archive\/images\/wizardofid\d+\.jpg).+'),
             )

ucomics_dot_com_info = (
              ('doonesbury', '/comics/db/', 'db'),
              ('calvin_and_hobbes', '/comics/ch/', 'ch')
             )

#===============================
#download_others - random comic downloads
#
#===============================
def download_others():
    
    cnxn = connect('www.archiecomics.com')
    
    print "getting: Archie"
    
    cnxn.request("GET", '/')
    
    res = cnxn.getresponse()
            
    p = re.match(re_date, res.getheader("date"))
        
    comic_path = '/pops_shop/dailycomics/image' + p.group(1) + '.gif'
    
    #reconnecting since archiecomics.com closes connection after sending response
    cnxn = connect('www.archiecomics.com')
    
    cnxn.request("GET", comic_path)
                        
    res = cnxn.getresponse()
    
    igot = res.status, res.reason

    if res.status != "200" and res.reason != "OK":
      print 'continuing to next comic since i got: '
      print igot
      return
              
    f = open('archie'+"_"+p.group(1)+"_"+p.group(2)+"_"+p.group(3)+".gif", "wb")

    f.write(res.read())
    
    f.close()
    
    print "OK" 

#===============================
#connect(server) - connect to server
#
#===============================             
def connect(server):
    con = httplib.HTTPConnection(server, 80)
    con.connect()                                                      
    #print con
    return con

#===============================
#download_ucomics_dot_com - download comics from ucomics/gocomics servers
#
#===============================
def download_ucomics_dot_com():
        
    cnxn = connect('images.ucomics.com')
    
    cnxn.request("GET", '/')
    
    res = cnxn.getresponse()
        
    p = re.match(re_date, res.getheader("date"))
        
    for entry in ucomics_dot_com_info:
            
        print "getting: " + entry[0]
        
        comic_path = entry[1] + p.group(3) + '/'+ entry[2] + p.group(3)[2] + p.group(3)[3] + dates[p.group(2)] + p.group(1) + '.gif';
        
        #for ucomics.com, we need to reconnect everytime, server closes connection after sending a response        
        cnxn = connect('images.ucomics.com')
        
        cnxn.request("GET", comic_path)
                        
        res = cnxn.getresponse()
                        
        igot = res.status, res.reason
  
        if res.status != "200" and res.reason != "OK":
          print 'continuing to next comic since i got: '
          print igot
          continue
          
        f = open(entry[0]+"_"+p.group(1)+"_"+p.group(2)+"_"+p.group(3)+".gif", "wb")

        f.write(res.read())
        
        f.close()
        
        print "OK"  

#===============================
#download_comics_dot_com - download comics from comics.com servers
#
#===============================
def download_comics_dot_com():

    #Calling connect to url directly since for comics.com, the server does not close the connection itself after
    #sending a response
    cnxn = connect('comics.com')
    
    for entry in comics_dot_com_info:
      
      print "getting: " + entry[0]
            
      cnxn.request("GET", entry[1]+entry[0]+'/')
      
      res = cnxn.getresponse()
      igot = res.status, res.reason
      if res.status != "200" and res.reason != "OK":
        print 'continuing to next comic since i got: '
        print igot
        continue
      
      p = re.match(re_date, res.getheader("date"))
      
      iread = res.read()
      l = re.findall(entry[2], iread)
                 
      cnxn.request("GET", l[0])
      res = cnxn.getresponse()
      
      f = open(entry[0]+"_"+p.group(1)+"_"+p.group(2)+"_"+p.group(3)+".gif", "wb")

      f.write(res.read())
      
      f.close()
      
      print "OK"

#===============================
#execution
#
#===============================
download_comics_dot_com()
download_ucomics_dot_com()
download_others()

Why use this: Instead of going to each website and reading comics one by one, use this script to download 'today's' comics - for your own particular comic, just add the regular expression to "comics_dot_com_info" or "ucomics_dot_com_info"

Known issues: Always saves files with .gif extension - Will not work if the server changes it's comics' regex - does not check if file already exists

Created by sami jan on Fri, 22 Jun 2007 (PSF)
Python recipes (4591)
sami jan's recipes (2)

Required Modules

Other Information and Tasks