DESY Hbb Analysis Framework
submitCrab3MC.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 import os
4 import os.path
5 import urllib2
6 import importlib
7 import sys
8 from WMCore.Configuration import Configuration
9 from CRABClient.UserUtilities import getUsernameFromSiteDB
10 
11 # colors
12 W = '\033[0m' # white (normal)
13 R = '\033[31m' # red
14 G = '\033[32m' # green
15 O = '\033[33m' # orange
16 B = '\033[34m' # blue
17 P = '\033[35m' # purple
18 
19 
20 ARGSN = len(sys.argv)
21 if ARGSN < 3:
22  print (R+"You need to provide the CMSSW python config, the samples file and the campaign, e.g Fall17, in this order."+W)
23  print (R+"Optionally you can provide the max number of events (def. -1)"+W)
24  sys.exit()
25 
26 # ---
27 # Some parameter steering
28 UNITS_PER_JOB = 1
29 TOTAL_EVENTS = -1
30 TYPE = 'MC'
31 #CAMPAIGN = 'Moriond17/80x_moriond17_data03Feb2017_v1'
32 
33 #CRABCMDOPTS = '--dryrun'
34 CRABCMDOPTS = ''
35 
36 IS_NANO = False
37 
38 ARGS = sys.argv
39 PSET = ARGS[1]
40 if PSET.find('nano') >= 0:
41  IS_NANO = True
42  print "Producing NanoAOD ntuples..."
43 else:
44  print "Producing Ntuplizer ntuples..."
45 
46 
47 psetname, pset_ext = os.path.splitext(PSET)
48 SAMPLE = ARGS[2]
49 
50 
51 samplename, sample_ext = os.path.splitext(SAMPLE)
52 CAMPAIGN = ARGS[3] + '/' + psetname
53 if ARGSN == 5:
54  TOTAL_EVENTS = int(ARGS[4])
55 
56 
57 if not ( os.path.isfile(PSET) and pset_ext == '.py' ):
58  print (R+"The given python config does not exist or it is not a python file"+W)
59  sys.exit()
60 
61 if not ( os.path.isfile(SAMPLE) and sample_ext == '.txt' ):
62  print (R+"The given sample list file does not exist or it is not a txt file"+W)
63  sys.exit()
64 
65 # ---
66 # Some parameter steering
67 PROCESS = samplename.split('/')[-1]
68 #MYPATH = '/store/user/%s/' % (getUsernameFromSiteDB())
69 MYPATH = '/store/user/rwalsh/'
70 BASEOUTDIR = MYPATH+'Analysis/Ntuples/' + TYPE + '/' + CAMPAIGN
71 
72 dataset_list = 'samples/mc/' + PROCESS + '.txt'
73 f_datasets = open(dataset_list,'r')
74 datasets = f_datasets.readlines()
75 
76 import FWCore.ParameterSet.Config as cms
77 #from ntuplizer_mc_765_summer_conferences_2016_v1 import process
78 pset = PSET.split('.')[0]
79 process = __import__(pset).process #(see why it does not work!)
80 
81 
82 # _________________________________________________________________________
83 
84 if __name__ == '__main__':
85 
86  from CRABAPI.RawCommand import crabCommand
87  from CRABClient.ClientExceptions import ClientException
88  from httplib import HTTPException
89 
90  from Analysis.Ntuplizer.crabConfig import crabConfig
91  config = crabConfig()
92 
93 # ====== GENERAL
94  config.General.workArea += '_' + PROCESS
95 
96 # ====== DATA
97 # config.Data.splitting = 'Automatic'
98 # config.Data.unitsPerJob = UNITS_PER_JOB
99  config.Data.totalUnits = TOTAL_EVENTS
100  config.Data.outLFNDirBase = BASEOUTDIR + '/'
101 # config.Data.inputDBS = 'https://cmsweb.cern.ch/dbs/prod/phys03/DBSReader/'
102 # config.Data.allowNonValidInputDataset = True # If dataset not valid yet, will run over valid files only
103 # config.Data.allowNonValidInputDataset = True # If dataset not valid yet, will run over valid files only
104 
105 # ====== JOBTYPE
106 # config.JobType.psetName = PSET
107  config.JobType.numCores = 4
108  config.JobType.maxMemoryMB = 10000
109 # config.JobType.inputFiles = ['Fall15_25nsV2_MC_PtResolution_AK4PFPuppi.txt','Fall15_25nsV2_MC_PtResolution_AK4PFchs.txt','Fall15_25nsV2_MC_SF_AK4PFPuppi.txt','Fall15_25nsV2_MC_SF_AK4PFchs.txt']
110 
111  if IS_NANO:
112  config.JobType.outputFiles = ['nano.root']
113 
114  for dataset in datasets:
115  dataset=dataset.replace(" ", "")
116  if dataset[0] == '#' or dataset[0] != '/':
117  continue
118  cross_section = 1.
119  if len(dataset.split(',')) > 1:
120  cross_section = dataset.split(',')[1].split('\n')[0]
121  dataset = dataset.split(',')[0]
122  else:
123  dataset = dataset.split('\n')[0]
124 
125  print " oioi ", dataset
126  dataset_name = dataset.split('/')[1]
127  dataset_cond = dataset.split('/')[2]
128  dataset_tier = dataset.split('/')[3]
129 
130 #
131  config.Data.inputDataset = dataset
132  config.Data.outputDatasetTag = dataset_cond
133 #
134  config.General.requestName = dataset_name
135 # use if needed in private productions (modify accordingly)
136 # processname = dataset_cond.split('_')
137 # config.General.requestName += '_'+processname[0]+'-'+processname[1]+'_oldGT'
138 # print config.General.requestName
139 #
140 
141  if IS_NANO:
142  config.JobType.psetName = PSET
143  else:
144  try:
145  process.MssmHbb.CrossSection = cms.double(cross_section)
146  except AttributeError:
147  pass
148  psettmp = pset+'_tmp.py'
149  f = open(psettmp, 'w')
150  f.write(process.dumpPython())
151  f.close()
152 #
153  config.JobType.psetName = psettmp
154 #
155  outtext = "Submitting dataset " + dataset + "..."
156  print (O+str(outtext)+W)
157 #
158 # crabCommand('submit', config = config, *CRABCMDOPTS.split())
159  crabCommand('submit', config = config)
160  print (O+"--------------------------------"+W)
161  print
162 #
163  if not IS_NANO:
164  os.remove(psettmp)
165 
166 # _________________________________________________________________________