import random # Version 1.1 - Extended reporting. Donald 'Paddy' McCarthy 2004-04-25 def rand_jobs(jobs = 10001, # number of jobs in a regression jobtime_spread = [90, 1,9], # 90% take 1..4 mins, 1% take 4..20 mins jobtime_split = [5,21,41]): # and 9% take 20..40 mins to complete 'randomly generate jobs to a profile' assert sum(jobtime_spread)==100 shortjobs = [ random.randrange(1,jobtime_split[0]) for x in xrange(jobs*jobtime_spread[0]/100)] mediumjobs = [ random.randrange(jobtime_split[0],jobtime_split[1]) for x in xrange(jobs*jobtime_spread[1]/100)] longjobs = [ random.randrange(jobtime_split[1],jobtime_split[2]) for x in xrange(jobs*jobtime_spread[2]/100)] return shortjobs + mediumjobs + longjobs def sort_and_sow(job, bins): ''' sorts the jobs by time then put them in bin[0..bins-1] in the order bin[o..bins..0..bins..]''' job.sort() bin = list() for i in range(bins): bin.append(job[i::2*bins] + job[2*bins-1-i::2*bins]) bin[-1].sort() # Sort makes more tests finish early return bin def rev_sort_and_sow(job, bins): ''' reverse sorts the jobs by time then put them in bin[0..bins-1] in the order bin[o..bins..0..bins..]''' job.sort() job.reverse() bin = list() for i in range(bins): bin.append(job[i::2*bins] + job[2*bins-1-i::2*bins]) bin[-1].sort(); bin[-1].reverse() return bin def sort_and_place(job, bins): ''' sorts the jobs by time then put them in bin[0..bins-1] in the order bin[o..bins,0..bins,...]''' job.sort() bin = list() for i in range(bins): bin.append(job[i::bins]) return bin def rev_sort_and_place(job, bins): ''' reverse sorts the jobs by time then put them in bin[0..bins-1] in the order bin[o..bins,0..bins,...]''' job.sort() job.reverse() bin = list() for i in range(bins): bin.append(job[i::bins]) return bin def rand_binning_avg_count(job, bins): 'fill bins randomly, but with averaged items in a bin' random.shuffle(job) return [job[i::bins] for i in range(bins)] def pure_rand(job, bins): 'fill bins randomly' bin = [list() for i in range(bins)] random.shuffle(job) for j in job: bin[random.randrange(bins)] += [j] return bin def binning_stats(binning, filler='', pr=False): 'Prints stats on the how the bins are filled' bin_times = [sum(bin) for bin in binning] target_time = sum(bin_times)/1.0/len(bin_times) regression_time = max(bin_times) closeness = test_completion_stats(binning, target_time) if pr: print "%-25s [50, 75, 87.5, 100]%% of tests finish after %s%% of Target time: %7.3f" % (filler, ["%5.1f" % c for c in closeness], target_time ) return closeness def time_accumulator(t): ' Do "time_accumulator.t = 0" before use' time_accumulator.t += t return time_accumulator.t def test_completion_stats(binning, target_time): 'How long for 50,75,87.5, and 100% of tests to complete (as a percentage of target_time' # change run times to times of completion for each bin completion_bins = list() for bin in binning: time_accumulator.t = 0.0 completion_bins.append( [time_accumulator(duration) for duration in bin] ) #merge completion times for each bin completion_times = sum(completion_bins,[]) completion_times.sort() #find the times for tests to complete end_time = completion_times[-1] # last element num_tests = len(completion_times) num50 = num_tests*50/100-1 num75 = num_tests*75/100-1 num87 = num_tests*875/1000-1 # 87.5% time_to_50 = time_to_75 = time_to_87 = time_to_100 = 0 for n,ct in enumerate(completion_times): if n==num50: #found when 50% tests finished time_to_50 = ct if n==num75: #found when 75% tests finished time_to_75 = ct if n==num87: #found when 87.5% tests finished time_to_87 = ct break time_to_100 = end_time return [time_to_50*1.0/target_time*100, time_to_75*1.0/target_time*100, time_to_87*1.0/target_time*100, time_to_100*1.0/target_time*100] def hm(mins): ' convert minutes to hours+minutes as string' return '%02i:%5.2f' % (int(mins / 60), mins-60*int(mins / 60)) if __name__ == "__main__": repititions = 100 # Number of regressions to run job_slots = 99 # number of LSF jobs I can run tests = 10001 # Number of tests in a regression jobtime_spread = [90, 9,1] # y[0]% in first range below, ...y[2]% in third jobtime_split = [4,16,61] # 1 to x[0]-1, x[0] to x[1]-1, x[1] to x[2]-1 pr = False # Print as we go? ''' repititions = 1 # Number of regressions to run job_slots = 2 # number of LSF jobs I can run tests = 20 # Number of tests in a regression jobtime_spread = [90, 9,1] # y[0]% in first range below, ...y[2]% in third jobtime_split = [4,16,61] # 1 to x[0]-1, x[0] to x[1]-1, x[1] to x[2]-1 pr = True # Print as we go? ''' algorithms = [(sort_and_sow,"sort_and_sow"), (rev_sort_and_sow,"rev_sort_and_sow"), (sort_and_place,"sort_and_place"), (rev_sort_and_place,"rev_sort_and_place"), (rand_binning_avg_count,'rand_binning_avg_count'), (pure_rand,'pure_rand'), ] closeness = dict() # for cumulative stats last_bin = dict() # debug copy of last bins mean_target_time = 0.0 # Calculates theoretical minimum time for avg Regression # for x in xrange(repititions): jobs = rand_jobs(jobtime_spread=jobtime_spread, jobtime_split = jobtime_split, jobs=tests) mean_target_time += sum(jobs)*1.0/job_slots for algo, algo_name in algorithms: binning = algo(jobs[:],job_slots) last_bin[algo_name] = binning tmp = binning_stats(binning, algo_name, pr=pr) closeness[algo_name] = [ sum(percent) for percent in zip(closeness.get(algo_name,[0,0,0,0]), tmp) ] if pr: print '' mean_target_time /= repititions print '' print 'Results from %i regressions each of %i tests using %i job_slots' % (repititions, tests, job_slots) print ' Random test times profile is:' print ' %-3s from %2i to %2i minutes' % (`jobtime_spread[0]`+'%', 1, jobtime_split[0]-1) print ' %-3s from %2i to %2i minutes' % (`jobtime_spread[1]`+'%', jobtime_split[0], jobtime_split[1]-1) print ' %-3s from %2i to %2i minutes' % (`jobtime_spread[2]`+'%', jobtime_split[1], jobtime_split[2]-1) print ' Target time for a Regression run is %.1f minutes (%s)\n' % (mean_target_time, hm(mean_target_time)) for algo, algo_name in algorithms: print " %-23s [50, 75, 87.5, 100]%% tests finish in %s%% of Target time - %s" % (algo_name+':', ["%5.1f" % (c*1.0/repititions) for c in closeness[algo_name]], hm(mean_target_time*closeness[algo_name][-1]/100.0/repititions) )