Welcome, guest | Sign In | My Account | Store | Cart
# -*- coding: utf-8 -*-
"""
Assembled by : Peter Arwanitis (spex66)

Task: Administrative job to run in my case 2300 jobs in a scheduled manner
Restriction: Don't start two jobs at same schedule on same server

Problems to solve that for:
* align list of projects into batch of jobs with distinct servers
* templated job creation
* create a crontab 
** to start all this jobs from a starting schedule every hour
** respect some restrictions that on some days and some hours no jobs should be started
** thanks to standard-library time, datetime and timedelta to make that an ease at the end!

References:
* http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/410687
    Transposing a List of Lists with Different Lengths without Loosing Elements (by Zoran Isailovski)
* thanks to Gerhard Kalab for his excellent pycron (cause it runs on windows too :))
    http://www.kalab.com/freeware/pycron/pycron.htm

Targeted on windows and python23
"""

import os, time
from datetime import datetime, timedelta

report_ROOTDIR = r"/tmp/jobs"
crontabBuilderData = """
# list of jobnames with paramaters to run on a specific server
jobname1,server1,param1,param2,param3
jobname2,server1,param1,param2,param3
jobname3,server1,param1,param2,param3
jobname4,server2,param1,param2,param3
jobname5,server2,param1,param2,param3
jobname6,server3,param1,param2,param3
jobname7,server4,param1,param2,param3
jobname8,server4,param1,param2,param3
"""

def groupProjectsByUniqueServer():
    listOfJobs = [l.strip().split(',') 
                    for l in crontabBuilderData.split('\n') # or read this from file
                    if not l.startswith('#')                # skip comments
                    if l.strip()                            # skip empty lines
                    ]

    print listOfJobs

    # group by servername    
    dictByServer = {}
    _skip = [dictByServer.setdefault(i[1], []).append(i) for i in listOfJobs]
    
    # align them that way, that each server comes up only one time in a batch of job
    # background: to minimize server load on then same time
    
    # clean up the None's out of the lists got from map(None, list1, list2, list3) of different length
    # most elegant solution?-) kudos to Recipe/410687
    # for some more insights how to handle such mappings, I never fiddled out that *row part!
    alignedJobs = map(lambda *row: [elem for elem in row if elem is not None],*dictByServer.values())                            

    return alignedJobs
    
def reportcrontab(alignedprojects, year, month, day=1, hour=0, excludedays=[], excludehours=[]):
    
    # start from that date at default 0 o'clock
    # remember start for some statistics
    startschedule = schedule = datetime(year, month, day, hour)

    # prepare that folders for job creation
    
    report_logfile = os.path.join(report_ROOTDIR, 'logs', "%(scheduled_time)s_%(jobname)s.log")
    report_cmddir  = os.path.join(report_ROOTDIR, 'cmds')
    
    report_crontabname = os.path.join(report_ROOTDIR, "crontab_projects.txt")
    report_crontabheader = """
    # THIS CONFIGURATION IS AUTOMATICALLY GENERATED!!!
    # this version is from: %s
    """ % (time.strftime('%Y-%m-%d/%H:%M:%S', time.localtime()))

    # 0 13 15 6 * "test.cmd" 
    report_crontabtemplate = '''0 %(hour)s %(day)s %(month)s * "%(cmd)s"'''
    report_crontab = []
    
    # example template for commandfile generation
    report_commands   =  [
       "@echo SomeJobRunner --name %(jobname)s --server=%(server)s --p1 %(param1)s --p2 %(param2)s --p3 %(param3)s 1> " + report_logfile,
       r"c:", 
       r"cd c:\python23", 
       "python SomeJobRunner.py --name %(jobname)s --server=%(server)s --p1 %(param1)s --p2 %(param2)s --p3 %(param3)s 1>> " + report_logfile + " 2>&1",
       ]
    report_cmd_template = '\n'.join(report_commands)
    
    project_count = 0
    while alignedprojects:        
        if schedule.weekday() in excludedays:
            continue # SKIP
        if schedule.hour in excludehours:
            continue # SKIP
    
        actual = alignedprojects.pop(0) #reduce the stack, from TOP where the biggest projects are
        
        for (jobname, server, param1, param2, param3) in actual:
            project_count += 1
            # build a characteristic prefix
            scheduled_time = '%i%02i%02i_%02i' % (
                                             schedule.year, 
                                             schedule.month, 
                                             schedule.day, 
                                             schedule.hour
                                             )
            jobcmddfile = os.path.join(report_cmddir, '%s_%s.cmd' % ( scheduled_time,
                                                                     jobname))

            # write complete batch out
            # hint: the replace is only essential on windows, cause % is reserved!
            # locals() to feed right away parameters wia keywords into command template
            file(jobcmddfile,'w').write((report_cmd_template % locals()).replace('%', '%%')) 

            report_crontab.append( report_crontabtemplate % {
                                            'hour'  : schedule.hour,
                                            'day'   : schedule.day,
                                            'month' : schedule.month,
                                            'cmd'   : jobcmddfile,
                                            }
                                   )
        # jump to next time slice
        schedule += timedelta(hours=1) # next hour, place your stepping here
    
    
    file(report_crontabname, 'w').write('\n'.join([report_crontabheader]+report_crontab))
    
    print '%i projects scheduled starting from %s, ends up %s' % (project_count, startschedule, schedule)
    print 'no schedules at hours (%s) and days of week (%s) # Monday is 0 and Sunday is 6' % (excludehours, excludedays)
    
def buildReportCrontab():

    # configure startdate here
    year, month, day = (2007, 5, 26)

    # configure excludes to skip over here
    # Monday is 0 and Sunday is 6
    excludedays  = [6]   # for example: keep free backup day
    excludehours = [6,7] # for example: keep free administrative window

    # build for every hour a slice of jobs running on different fileshares
    reportcrontab(groupProjectsByUniqueServer(), year, month, day, excludedays=excludedays, excludehours=excludehours)
    
if __name__ == '__main__':
    # let it run
    buildReportCrontab()

""" example crontab, no fun to make that for 2300 jobs :)

    # THIS CONFIGURATION IS AUTOMATICALLY GENERATED!!!
    # this version is from: 2007-05-28/09:25:21
    
0 0 26 5 * "/tmp/jobs/cmds/20070526_00_jobname7.cmd"
0 0 26 5 * "/tmp/jobs/cmds/20070526_00_jobname1.cmd"
0 0 26 5 * "/tmp/jobs/cmds/20070526_00_jobname4.cmd"
0 0 26 5 * "/tmp/jobs/cmds/20070526_00_jobname6.cmd"
0 1 26 5 * "/tmp/jobs/cmds/20070526_01_jobname8.cmd"
0 1 26 5 * "/tmp/jobs/cmds/20070526_01_jobname2.cmd"
0 1 26 5 * "/tmp/jobs/cmds/20070526_01_jobname5.cmd"
0 2 26 5 * "/tmp/jobs/cmds/20070526_02_jobname3.cmd"
"""

History