#!/usr/bin/env python3
"""
.. module:: databaseObj
:synopsis: Contains Database class that represents the database of experimental results.
.. moduleauthor:: Veronika Magerl <v.magerl@gmx.at>
.. moduleauthor:: Andre Lessa <lessa.a.p@gmail.com>
.. moduleauthor:: Wolfgang Waltenberger <wolfgang.waltenberger@gmail.com>
.. moduleauthor:: Matthias Wolf <matthias.wolf@wot.at>
"""
from __future__ import print_function
import os
import hashlib
import pathlib
## sweet spot for numpy multi-threading is 2? More threads
## make some weaker machines freeze when building the pickle file.
## Anyhow, we parallelize at a higher level.
os.environ["OMP_NUM_THREADS"]="2"
import sys
import time
import copy
from smodels.experiment import datasetObj
from smodels.installation import cacheDirectory
from smodels.experiment.metaObj import Meta
from smodels.experiment.expResultObj import ExpResult
from smodels.experiment.exceptions import DatabaseNotFoundException
from smodels.tools.physicsUnits import TeV
from smodels.tools.stringTools import cleanWalk
from smodels.experiment.exceptions import SModelSExperimentError as SModelSError
from smodels.tools.smodelsLogging import logger
import logging
scipyver = ""
try:
from importlib.metadata import version
scipyver = version("scipy")
except Exception as e:
try:
from scipy import __version__ as scipyver
except Exception as e:
pass
if scipyver not in [ "1.8.", "1.9.", "1.10.", "2.0.", "2.1." ]:
# fix for pickling different scipy versions (1.7.x vs 1.8.x)
# so that databases pickled with scipy 1.8.x still work with scipy 1.7.x
import scipy.spatial
if not hasattr ( scipy.spatial, "_qhull" ) and hasattr ( scipy.spatial, "qhull" ):
sys.modules["scipy.spatial._qhull"] = scipy.spatial.qhull
try:
import cPickle as serializer
except ImportError as e:
import pickle as serializer
def _getSHA1 ( filename ):
return hashlib.sha1( pathlib.Path(filename).read_bytes() ).hexdigest()
[docs]class Database(object):
""" Database object. Holds a list of SubDatabases.
Delegates all calls to SubDatabases.
"""
def __init__(self, base=None, force_load = None, discard_zeroes = True,
progressbar = False, subpickle = True, combinationsmatrix = None ):
"""
:param base: path to the database, or pickle file (string), or http
address. If None, "official", or "official_fastlim",
use the official database for your code version
(including fastlim results, if specified).
If "latest", or "latest_fastlim", check for the latest database.
Multiple databases may be specified using `+' as a delimiter.
:param force_load: force loading the text database ("txt"),
or binary database ("pcl"), dont force anything if None
:param discard_zeroes: discard txnames with only zeroes as entries.
:param progressbar: show a progressbar when building pickle file
(needs the python-progressbar module)
:param subpickle: produce small pickle files per exp result.
Should only be used when working on the database.
:param combinationsmatrix: an optional dictionary that contains info
about combinable analyses, e.g. { "anaid1": ( "anaid2", "anaid3" ) }
optionally specifying signal regions, e.g. { "anaid1:SR1":
( "anaid2:SR2", "anaid3" ) }
"""
self.subs = []
if "_fastlim" in base: ## for backwards compatibility
base = base.replace("_fastlim","+fastlim")
sstrings = base.split ( "+" )
for ss in sstrings:
self.subs.append ( SubDatabase ( ss, force_load, discard_zeroes,
progressbar, subpickle, combinationsmatrix ) )
@property
def expResultList(self):
"""
The combined list, compiled from the individual lists
"""
if len(self.subs)==0:
return []
lists = [ x.expResultList for x in self.subs ]
return self.mergeLists ( lists )
[docs] def mergeLists ( self, lists ):
""" small function, merges lists of ERs """
D = {}
for tmp in lists:
for t in tmp:
if len(t.datasets)== 0: # skip empty entries
logger.warning ( f"Analysis {t.globalInfo.id} has no datasets. Will remove it." )
continue
anaid = t.globalInfo.id + t.datasets[0].getType()
if not anaid in D:
D[anaid]=t
else: ## FIXME merge expResults
D[anaid]=self.mergeERs ( D[anaid], t )
return list ( D.values() )
[docs] def mergeERs ( self, o1, r2 ):
""" merge the content of exp res r1 and r2 """
r1 = copy.deepcopy ( o1 )
r1.globalInfo = r2.globalInfo
dids = [ x.getID() for x in o1.datasets ]
for ds in r2.datasets:
if not ds.getID() in dids: ## completely new dataset
r1.datasets.append ( ds )
else: ## just overwrite the old txnames
idx = dids.index ( ds.getID() ) ## ds index
r2txs = ds.txnameList
r1txnames = [ x.txName for x in r1.datasets[idx].txnameList ]
for txn in r2txs:
if txn.txName in r1txnames:
tidx = r1txnames.index ( txn.txName ) ## overwrite
r1.datasets[idx].txnameList[tidx]=txn
else:
# a new txname
r1.datasets[idx].txnameList.append ( txn )
return r1
[docs] def createBinaryFile(self, filename=None ):
""" create a pcl file from all the subs """
## make sure we have a model to pickle with the database!
logger.debug( " * create %s" % filename )
if filename == None:
filename = self.pcl_meta.pathname
with open( filename, "wb" ) as f:
logger.debug( " * load text database" )
logger.debug( " * write %s db version %s" % \
( filename, self.databaseVersion ) )
ptcl = min ( 4, serializer.HIGHEST_PROTOCOL )
## 4 is default protocol in python3.8, and highest protocol in 3.7
serializer.dump(self.txt_meta, f, protocol=ptcl)
serializer.dump(self.expResultList, f, protocol=ptcl)
serializer.dump(self.databaseParticles, f, protocol=ptcl )
logger.info( "%s created." % ( filename ) )
def __str__(self):
# r = [ str(x) for x in self.subs ]
# return "+".join(r)
idList = "Database version: " + self.databaseVersion
idList += "\n"
idList += "-" * len(idList) + "\n"
if self.expResultList == None:
idList += "no experimental results available! "
return idList
idList += "%d experimental results: " % \
len( self.expResultList )
atlas,cms = [],[]
datasets = 0
txnames = 0
s = { 8:0, 13:0 }
for expRes in self.expResultList:
Id = expRes.globalInfo.getInfo('id')
sqrts = expRes.globalInfo.getInfo('sqrts').asNumber( TeV )
if not sqrts in s.keys():
s[sqrts] = 0
s[sqrts]+=1
datasets += len( expRes.datasets )
for ds in expRes.datasets:
txnames += len( ds.txnameList )
if "ATLAS" in Id:
atlas.append( expRes )
if "CMS" in Id:
cms.append( expRes )
idList += "%d CMS, %d ATLAS, " % ( len(cms), len(atlas) )
for sqrts in s.keys():
idList += "%d @ %d TeV, " % ( s[sqrts], sqrts )
# idList += expRes.globalInfo.getInfo('id') + ', '
idList = idList[:-2] + '\n'
idList += "%d datasets, %d txnames.\n" % ( datasets, txnames )
return idList
def __eq__( self, other ):
if type(other) != type(self):
return False
for x,y in zip ( self.subs, other.subs ):
if x != y:
return False
return True
[docs] def getExpResults(self, analysisIDs=['all'], datasetIDs=['all'], txnames=['all'],
dataTypes = ['all'], useSuperseded=False, useNonValidated=False,
onlyWithExpected = False ):
"""
Returns a list of ExpResult objects.
Each object refers to an analysisID containing one (for UL) or more
(for Efficiency maps) dataset (signal region) and each dataset
containing one or more TxNames. If analysisIDs is defined, returns
only the results matching one of the IDs in the list. If dataTypes is
defined, returns only the results matching a dataType in the list. If
datasetIDs is defined, returns only the results matching one of the IDs
in the list. If txname is defined, returns only the results matching
one of the Tx names in the list.
:param analysisIDs: list of analysis ids ([CMS-SUS-13-006,...]). Can
be wildcarded with usual shell wildcards: * ? [<letters>]
Furthermore, the centre-of-mass energy can be chosen
as suffix, e.g. ":13*TeV". Note that the asterisk
in the suffix is not a wildcard.
:param datasetIDs: list of dataset ids ([ANA-CUT0,...]). Can be wildcarded
with usual shell wildcards: * ? [<letters>]
:param txnames: list of txnames ([TChiWZ,...]). Can be wildcarded with
usual shell wildcards: * ? [<letters>]
:param dataTypes: dataType of the analysis (all, efficiencyMap or upperLimit)
Can be wildcarded with usual shell wildcards: * ? [<letters>]
:param useSuperseded: If False, the supersededBy results will not be included
(deprecated)
:param useNonValidated: If False, the results with validated = False
will not be included
:param onlyWithExpected: Return only those results that have expected values
also. Note that this is trivially fulfilled for all efficiency maps.
:returns: list of ExpResult objects or the ExpResult object if the list
contains only one result
"""
if useSuperseded:
hasSuperseded = False
for s in self.subs:
if "superseded" in s.url:
hasSuperseded = True
break
ss = ""
if hasSuperseded:
ss = " - which you seem to have already done"
logger.warning ( "the useSuperseded flag is deprecated from smodels v2.1 onwards. if you wish to use superseded results, please simply add them to your database path%s, e.g. 'official+superseded'." % ss )
ret = []
for sub in self.subs:
tmp = sub.getExpResults( analysisIDs, datasetIDs, txnames, dataTypes,
True, useNonValidated, onlyWithExpected )
ret.append ( tmp )
return self.mergeLists ( ret )
@property
def databaseParticles(self):
"""
Database particles, a list, one entry per sub
"""
r = [ x.databaseParticles for x in self.subs ]
return r[0] ## FIXME do sth smarter?
@property
def databaseVersion(self):
"""
The version of the database, concatenation of the individual versions
"""
r = [ x.databaseVersion for x in self.subs ]
for i,ri in enumerate(r): # avoid repetitions
for j,rj in enumerate(r[i+1:]):
if ri in rj:
r[i+j+1]=rj.replace(ri,"")
return "+".join ( r )
@property
def txt_meta(self):
"""
The meta info of the text version, a merger of the original ones
"""
r = [ x.txt_meta for x in self.subs ]
ret = r[0]
return ret
@property
def pcl_meta(self):
"""
The meta info of the text version, a merger of the original ones
"""
ret = None
r = []
for x in self.subs:
if hasattr ( x, "pcl_meta" ):
r.append ( x.pcl_meta )
ret = r[0]
return ret
[docs] def createLinksToCombinationsMatrix ( self ):
""" in all globalInfo objects, create a shallow link to the
combinations matrix """
for x in self.subs:
if not hasattr ( x, "combinationsmatrix" ) or x.combinationsmatrix == None:
x.combinationsmatrix = self.combinationsmatrix
x.createLinksToCombinationsMatrix()
[docs] def clearLinksToCombinationsMatrix ( self ):
""" clear all shallow links to the combinations matrix """
self.combinationsmatrix = None
for x in self.subs:
x.combinationsmatrix = None
x.clearLinksToCombinationsMatrix()
[docs]class SubDatabase(object):
"""
SubDatabase object. Holds a list of ExpResult objects.
"""
def __init__(self, base=None, force_load = None, discard_zeroes = True,
progressbar = False, subpickle = True, combinationsmatrix=None ):
"""
:param base: path to the database, or pickle file (string), or http
address. If None, "official", or "official_fastlim",
use the official database for your code version
(including fastlim results, if specified).
If "latest", or "latest_fastlim", check for the latest database.
Multiple databases may be named, use "+" as delimiter.
Order matters: Results with same name will overwritten
according to sequence
:param force_load: force loading the text database ("txt"),
or binary database ("pcl"), dont force anything if None
:param discard_zeroes: discard txnames with only zeroes as entries.
:param progressbar: show a progressbar when building pickle file
(needs the python-progressbar module)
:param subpickle: produce small pickle files per exp result.
Should only be used when working on the database.
:param combinationsmatrix: an optional dictionary that contains info
about combinable analyses, e.g. { "anaid1": ( "anaid2", "anaid3" ) }
optionally specifying signal regions, e.g. { "anaid1:SR1":
( "anaid2:SR2", "anaid3" ) }
"""
self.url = base
self.combinationsmatrix = combinationsmatrix
self.source=""
if force_load == None and base.endswith(".pcl"):
force_load = "pcl"
self.force_load = force_load
self.subpickle = subpickle
obase = base ## keep old name for more checks for 'latest'
from smodels.installation import __dblabels__
if base in __dblabels__:
from smodels.installation import databasePath
base = databasePath( base )
base, pclfile = self.checkPathName(base, discard_zeroes )
self.pcl_meta = Meta( pclfile )
self.expResultList = []
self.txt_meta = self.pcl_meta
if not self.force_load == "pcl":
self.txt_meta = Meta( base, discard_zeroes = discard_zeroes )
self.progressbar = None
if progressbar:
try:
import progressbar as P
self.progressbar = P.ProgressBar( widgets=
[ "Building Database ", P.Percentage(),
P.Bar( marker=P.RotatingMarker() ), P.ETA() ] )
except ImportError as e:
logger.warning( "progressbar requested, but python-progressbar is not installed." )
if self.force_load=="txt":
self._setParticles()
self.loadTextDatabase()
self.txt_meta.printFastlimBanner()
return
if self.force_load=="pcl":
self.loadBinaryFile()
self._setParticles()
self.pcl_meta.printFastlimBanner()
if "latest" in obase:
from smodels import installation
codeVersion = installation.version()
pclVersion = self.pcl_meta.databaseVersion
if codeVersion[0]!=pclVersion[0]:
logger.error ( "major versions of code and database differ! code=%s, database=%s" % ( codeVersion[0], pclVersion[0] ) )
return
if self.force_load in [ None, "none", "None" ]:
self.loadDatabase()
self._setParticles()
self.txt_meta.printFastlimBanner()
return
logger.error( "when initialising database: force_load=%s is not " \
"recognized. Valid values are: pcl, txt, None." % force_load )
raise SModelSError()
def __eq__( self, other ):
""" compare two databases """
if type( self ) != type( other ):
return False
if not self.txt_meta.sameAs( other.txt_meta ):
return False
if len( self.expResultList ) != len(other.expResultList):
return False
for ( myres, otherres ) in zip( self.expResultList, other.expResultList ):
if myres != otherres:
return False
return True
[docs] def loadDatabase( self ):
""" if no binary file is available, then
load the database and create the binary file.
if binary file is available, then check if
it needs update, create new binary file, in
case it does need an update.
"""
if not os.path.exists( self.pcl_meta.pathname ):
logger.info( "Creating binary database " )
logger.info( "(this may take a few minutes, but it's done only once!)" )
self.loadTextDatabase()
self.createBinaryFile()
else:
if self.needsUpdate():
self.createBinaryFile()
else:
self.loadBinaryFile( lastm_only = False )
[docs] def loadTextDatabase( self ):
""" simply loads the textdabase """
if self.txt_meta.databaseVersion and len(self.expResultList)>0:
logger.debug( "Asked to load database, but has already been loaded. Ignore." )
return
logger.info( "Parsing text database at %s" % self.txt_meta.pathname )
self.expResultList = self._loadExpResults()
self.createLinksToModel()
self.createLinksToCombinationsMatrix()
[docs] def createLinksToModel( self ):
""" in all globalInfo objects, create links to self.databaseParticles """
if not hasattr ( self, "databaseParticles" ):
return
if type(self.databaseParticles) == type(None):
return
for ctr,er in enumerate(self.expResultList):
if not hasattr ( er.globalInfo, "_databaseParticles" ):
er.globalInfo._databaseParticles = self.databaseParticles
elif type(er.globalInfo._databaseParticles) == type(None):
er.globalInfo._databaseParticles = self.databaseParticles
[docs] def createLinksToCombinationsMatrix( self ):
""" in all globalInfo objects, create links to self.combinationsmatrix """
if not hasattr ( self, "combinationsmatrix" ):
return
if type(self.combinationsmatrix) == type(None):
return
for ctr,er in enumerate(self.expResultList):
if not hasattr ( er.globalInfo, "_combinationsmatrix" ):
er.globalInfo._combinationsmatrix = self.combinationsmatrix
elif type(er.globalInfo._combinationsmatrix) == type(None):
er.globalInfo._combinationsmatrix = self.combinationsmatrix
[docs] def clearLinksToCombinationsMatrix( self ):
for ctr,er in enumerate(self.expResultList):
if hasattr ( er.globalInfo, "_combinationsmatrix" ):
del er.globalInfo._combinationsmatrix
[docs] def removeLinksToModel ( self ):
""" remove the links of globalInfo._databaseParticles to the model.
Currently not used. """
for ctr,er in enumerate(self.expResultList):
if hasattr ( er.globalInfo, "_databaseParticles" ):
del er.globalInfo._databaseParticles
[docs] def loadBinaryFile( self, lastm_only = False ):
"""
Load a binary database, returning last modified, file count, database.
:param lastm_only: if true, the database itself is not read.
:returns: database object, or None, if lastm_only == True.
"""
if lastm_only and self.pcl_meta.mtime:
## doesnt need to load database, and mtime is already
## loaded
return None
if not os.path.exists( self.pcl_meta.pathname ):
return None
try:
with open( self.pcl_meta.pathname, "rb" ) as f:
t0=time.time()
pclfilename = self.pcl_meta.pathname
self.pcl_meta = serializer.load( f )
self.pcl_meta.pathname = pclfilename
if self.force_load == "pcl":
self.txt_meta = self.pcl_meta
if not lastm_only:
if not self.force_load == "pcl" and self.pcl_meta.needsUpdate( self.txt_meta ):
logger.warning( "Something changed in the environment."
"Regenerating." )
self.createBinaryFile()
return self
logger.info( "loading binary db file %s format version %s" %
( self.pcl_meta.pathname, self.pcl_meta.format_version ) )
if sys.version[0]=="2":
self.expResultList = serializer.load( f )
else:
self.expResultList = serializer.load( f, encoding="latin1" )
t1=time.time()-t0
logger.info( "Loaded database from %s in %.1f secs." % \
( self.pcl_meta.pathname, t1 ) )
self.databaseParticles = None
try:
self.databaseParticles = serializer.load ( f )
except EOFError as e:
pass ## a model does not *have* to be defined
self.createLinksToModel()
self.createLinksToCombinationsMatrix()
except(EOFError,ValueError) as e:
os.unlink( self.pcl_meta.pathname )
if lastm_only:
self.pcl_meta.format_version = -1
self.pcl_meta.mtime = 0
return self
logger.error( "%s is not readable (%s)." % \
( self.pcl_meta.pathname, str(e) ) )
if self.source in [ "http", "ftp", "pcl" ]:
logger.error( "source cannot be rebuilt. supply a different path to the database in your ini file." )
raise SModelSError()
self.createBinaryFile()
# self.txt_meta = self.pcl_meta
return self
[docs] def checkBinaryFile( self ):
nu=self.needsUpdate()
logger.debug( "Checking binary db file." )
logger.debug( "Binary file dates to %s(%d)" % \
( time.ctime(self.pcl_meta.mtime),self.pcl_meta.filecount ) )
logger.debug( "Database dates to %s(%d)" % \
( time.ctime(self.txt_meta.mtime),self.txt_meta.filecount ) )
if nu:
logger.info( "Binary db file needs an update." )
else:
logger.info( "Binary db file does not need an update." )
return nu
[docs] def needsUpdate( self ):
""" does the binary db file need an update? """
try:
self.loadBinaryFile( lastm_only = True )
# logger.error( "needs update?" )
return( self.pcl_meta.needsUpdate( self.txt_meta ) )
except(IOError,DatabaseNotFoundException,TypeError,ValueError):
# if we encounter a problem, we rebuild the database.
return True
[docs] def createBinaryFile(self, filename=None):
""" create a pcl file from the text database,
potentially overwriting an old pcl file. """
## make sure we have a model to pickle with the database!
if self.txt_meta == None:
logger.error("Trying to create database pickle, but no txt_meta defined." )
raise SModelSError()
logger.debug( "database timestamp: %s, filecount: %s" % \
( time.ctime( self.txt_meta.mtime ), self.txt_meta.filecount ) )
binfile = filename
if binfile == None:
binfile = self.pcl_meta.pathname
if not hasattr(self,'databaseParticles') or \
type(self.databaseParticles) == type(None):
self._setParticles(self._getParticles())
logger.debug( " * create %s" % binfile )
with open( binfile, "wb" ) as f:
logger.debug( " * load text database" )
self.loadTextDatabase()
logger.debug( " * write %s db version %s, format version %s, %s" % \
( binfile, self.txt_meta.databaseVersion,
self.txt_meta.format_version, self.txt_meta.cTime() ) )
# ptcl = serializer.HIGHEST_PROTOCOL
ptcl = min ( 4, serializer.HIGHEST_PROTOCOL ) ## 4 is default protocol in python3.8, and highest protocol in 3.7
serializer.dump(self.txt_meta, f, protocol=ptcl)
serializer.dump(self.expResultList, f, protocol=ptcl)
serializer.dump(self.databaseParticles, f, protocol=ptcl )
logger.info( "%s created." % ( binfile ) )
@property
def databaseVersion(self):
"""
The version of the database, read from the 'version' file.
"""
return self.txt_meta.databaseVersion
@databaseVersion.setter
def databaseVersion(self, x ):
self.txt_meta.databaseVersion = x
self.pcl_meta.databaseVersion = x
[docs] def inNotebook(self):
"""
Are we running within a notebook? Has an effect on the
progressbar we wish to use.
"""
try:
cfg = get_ipython().config
if 'IPKernelApp' in cfg.keys():
return True
else:
return False
except NameError:
return False
@property
def base(self):
"""
This is the path to the base directory.
"""
return self.txt_meta.pathname
[docs] def fetchFromScratch( self, path, store, discard_zeroes ):
""" fetch database from scratch, together with
description.
:param store: filename to store json file.
"""
def sizeof_fmt(num, suffix='B'):
for unit in [ '','K','M','G','T','P' ]:
if abs(num) < 1024.:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
import requests
try:
r = requests.get( path, timeout=5 )
except requests.exceptions.RequestException as e:
logger.error( "Exception when trying to fetch database: %s" % e )
logger.error( "Consider supplying a different database path in the ini file (possibly a local one)" )
raise SModelSError()
if r.status_code != 200:
line = "Error %d: could not fetch '%s' from server: '%s'" % \
( r.status_code, path, r.reason )
logger.error( line )
raise SModelSError( line )
## its new so store the description
with open( store, "w" ) as f:
f.write( r.text )
if not "url" in r.json().keys():
logger.error( "cannot parse json file %s." % path )
raise SModelSError()
size = r.json()["size"]
cDir,defused = cacheDirectory ( create=True, reportIfDefault=True )
t0=time.time()
filename = os.path.join ( cDir, r.json()["url"].split("/")[-1] )
if os.path.exists ( filename ):
# if file exists and checksums match, we dont download
if "sha1" in r.json():
sha = _getSHA1 ( filename )
if sha == r.json()["sha1"]:
## seems it hasnt changed
self.force_load = "pcl"
return ( "./", "%s" % filename )
r2=requests.get ( r.json()["url"], stream=True, timeout=(250,2000) )
# filename= os.path.join ( cDir, r2.url.split("/")[-1] )
msg = "downloading the database from %s and caching in %s." % ( path, cDir )
if defused:
msg += " If you want the pickled database file to be cached in a different location, set the environment variable SMODELS_CACHEDIR, e.g. to '/tmp'."
logger.warning ( msg )
logger.info ( "need to fetch %s and store in %s. size is %s." % \
( r.json()["url"], filename, sizeof_fmt ( size ) ) )
with open( filename, "wb" ) as dump:
import fcntl
fcntl.lockf ( dump, fcntl.LOCK_EX )
if not self.inNotebook(): ## \r doesnt work in notebook
print( " " + " "*51 + "<", end="\r" )
print( "loading >", end="" )
for x in r2.iter_content(chunk_size=int( size / 50 ) ):
dump.write( x )
dump.flush()
print( ".", end="" )
sys.stdout.flush()
if self.inNotebook():
print( "done." )
else:
print( "" )
fcntl.lockf ( dump, fcntl.LOCK_UN )
dump.close()
sha = _getSHA1 ( filename )
testsha = r.json()["sha1"]
if sha != testsha:
logger.error ( f"error: downloaded file has different checksum {sha}!={testsha}. This should not happen. Contact the smodels-developers <smodels-developers@lists.oeaw.ac.at>" )
# sys.exit()
logger.info( "fetched %s in %d secs." % ( r2.url, time.time()-t0 ) )
logger.debug( "store as %s" % filename )
self.force_load = "pcl"
return ( "./", "%s" % filename )
[docs] def fetchFromServer( self, path, discard_zeroes ):
import requests, time, json
self.source = "http"
if "ftp://" in path:
self.source = "ftp"
cDir = cacheDirectory ( create=True )
store = os.path.join ( cDir, path.replace ( ":","_" ).replace( "/", "_" ).replace(".","_" ) )
logger.debug ( "need to fetch from server: %s and store to %s" % ( path, store ) )
if not os.path.isfile( store ):
## completely new! fetch the description and the db!
return self.fetchFromScratch( path, store, discard_zeroes )
with open(store,"r") as f:
jsn = json.load(f)
filename= os.path.join ( cDir, jsn["url"].split("/")[-1] )
class _: ## pseudo class for pseudo requests
def __init__( self ): self.status_code = -1
r=_()
try:
r = requests.get( path, timeout=2 )
except requests.exceptions.RequestException as e:
pass
if r.status_code != 200:
logger.warning( "Error %d: could not fetch %s from server." % \
( r.status_code, path ) )
if not os.path.isfile( filename ):
logger.error( "Cant find a local copy of the pickle file. Exit." )
sys.exit()
logger.warning ( "I do however have a local copy of the file at %s. I work with that." % filename )
self.force_load = "pcl"
return ( cDir, filename )
#return ( cDir, os.path.basename ( filename ) )
if not os.path.exists ( filename ):
return self.fetchFromScratch ( path, store, discard_zeroes )
stats = os.stat ( filename )
if abs ( stats.st_size - jsn["size"]) > 4096:
## size doesnt match (4096 is to allow for slightly different file
## sizes reported by the OS). redownload!
return self.fetchFromScratch ( path, store, discard_zeroes )
"""
# dont do this b/c its slowish
if "sha1" in r.json():
t0 = time.time()
sha = _getSHA1 ( filename )
print ( "it took", time.time()-t0 )
if sha != r.json()["sha1"]:
return self.fetchFromScratch ( path, store, discard_zeroes )
"""
if r.json()["lastchanged"] > jsn["lastchanged"]:
## has changed! redownload everything!
return self.fetchFromScratch( path, store, discard_zeroes )
if not os.path.isfile( filename ):
return self.fetchFromScratch( path, store, discard_zeroes )
self.force_load = "pcl"
return ( "./", filename )
[docs] def checkPathName( self, path, discard_zeroes ):
"""
checks the path name,
returns the base directory and the pickle file name.
If path starts with http or ftp, fetch the description file
and the database.
returns the base directory and the pickle file name
"""
logger.debug('Try to set the path for the database to: %s', path)
if path.startswith( ( "http://", "https://", "ftp://" ) ):
return self.fetchFromServer( path, discard_zeroes )
if path.startswith( ( "file://" ) ):
path=path[7:]
tmp = os.path.realpath(path)
if os.path.isfile( tmp ):
base = os.path.dirname( tmp )
return ( base, tmp )
if tmp[-4:]==".pcl":
self.source="pcl"
if not os.path.exists( tmp ):
if self.force_load == "pcl":
logger.error( "File not found: %s" % tmp )
raise SModelSError()
logger.info( "File not found: %s. Will generate." % tmp )
base = os.path.dirname( tmp )
return ( base, tmp )
logger.error( "Supplied a pcl filename, but %s is not a file." % tmp )
raise SModelSError()
path = tmp + '/'
if not os.path.exists(path):
logger.error('%s is no valid path!' % path)
raise DatabaseNotFoundException("Database not found")
m=Meta( path, discard_zeroes = discard_zeroes )
self.source="txt"
return ( path, path + m.getPickleFileName() )
def __str__(self):
idList = "Database version: " + self.databaseVersion
idList += "\n"
idList += "-" * len(idList) + "\n"
if self.expResultList == None:
idList += "no experimental results available! "
return idList
idList += "%d experimental results: " % \
len( self.expResultList )
atlas,cms = [],[]
datasets = 0
txnames = 0
s = { 8:0, 13:0 }
for expRes in self.expResultList:
Id = expRes.globalInfo.getInfo('id')
sqrts = expRes.globalInfo.getInfo('sqrts').asNumber( TeV )
if not sqrts in s.keys():
s[sqrts] = 0
s[sqrts]+=1
datasets += len( expRes.datasets )
for ds in expRes.datasets:
txnames += len( ds.txnameList )
if "ATLAS" in Id:
atlas.append( expRes )
if "CMS" in Id:
cms.append( expRes )
idList += "%d CMS, %d ATLAS, " % ( len(cms), len(atlas) )
for sqrts in s.keys():
idList += "%d @ %d TeV, " % ( s[sqrts], sqrts )
# idList += expRes.globalInfo.getInfo('id') + ', '
idList = idList[:-2] + '\n'
idList += "%d datasets, %d txnames.\n" % ( datasets, txnames )
return idList
def _setParticles(self,databaseParticles=None):
"""
Set the databaseParticles attribute.
If databaseParticles is None and the self.databaseParticles is None,
try to use the particles stored in the first ExpResult
in the database (ExptResult.globalInfo._databaseParticles).
If not found, fallback to the final states defined in defaultFinalStates.py.
:param databaseParticles: Model object containing the final state particles
used in the database.
"""
#If not yet defined, set the attribute to None:
if not hasattr(self,'databaseParticles'):
self.databaseParticles = None
#If input is given, use it to set the databaseParticles attribute:
if databaseParticles:
logger.debug("Setting database particles from %s" %str(databaseParticles))
self.databaseParticles = databaseParticles
#If still None, fallback to default:
if self.databaseParticles is None:
logging.debug("databaseParticles not found. Using default state.")
from smodels.experiment.defaultFinalStates import finalStates
self.databaseParticles = finalStates
def _getParticles(self, particlesFile='databaseParticles.py'):
"""
Load the particle objects used in the database.
The particles are searched for in the database folder.
If not found, the default particles will be loaded.
"""
fulldir = os.path.join(self.txt_meta.pathname,particlesFile)
if os.path.isfile(fulldir):
from importlib import import_module
sys.path.append(self.txt_meta.pathname)
pFile = os.path.splitext(particlesFile)[0]
logger.debug("Loading database particles from: %s" %fulldir)
modelFile = import_module(pFile, package='smodels')
if not hasattr(modelFile,'finalStates'):
logger.error("Model definition (finalStates) not found in" % fulldir)
else:
#set model name to file location:
modelFile.finalStates.label = os.path.basename(fulldir)
return modelFile.finalStates
return None
def _loadExpResults(self):
"""
Checks the database folder and generates a list of ExpResult objects for
each (globalInfo.txt,sms.py) pair.
:returns: list of ExpResult objects
"""
#Try to load particles from databaseParticles.py
self._setParticles(self._getParticles())
folders=[]
#for root, _, files in os.walk(self.txt_meta.pathname):
# for root, _, files in cleanWalk(self._base):
for root, _, files in cleanWalk(self.txt_meta.pathname):
folders.append( (root, files) )
folders.sort()
roots = []
for root,files in folders:
if "/.git/" in root:
continue
if root[-11:] == "/validation":
continue
if root[-5:] == "/orig":
continue
if not 'globalInfo.txt' in files:
continue
else:
roots.append( root )
if self.progressbar:
self.progressbar.maxval = len( roots )
self.progressbar.start()
resultsList = []
for ctr,root in enumerate(roots):
if self.progressbar:
self.progressbar.update(ctr)
expres = self.createExpResult( root )
if expres:
resultsList.append(expres)
if not resultsList:
logger.warning("Zero results loaded.")
if self.progressbar:
self.progressbar.finish()
return resultsList
[docs] def createExpResult( self, root ):
""" create, from pickle file or text files """
txtmeta = Meta( root, discard_zeroes = self.txt_meta.discard_zeroes,
hasFastLim=None, databaseVersion = self.databaseVersion )
pclfile = "%s/.%s" % ( root, txtmeta.getPickleFileName() )
logger.debug( "Creating %s, pcl=%s" % (root,pclfile ) )
expres = None
try:
# logger.info( "%s exists? %d" % ( pclfile,os.path.exists( pclfile ) ) )
if not self.force_load=="txt" and os.path.exists( pclfile ):
# logger.info( "%s exists" % ( pclfile ) )
with open(pclfile,"rb" ) as f:
logger.debug( "Loading: %s" % pclfile )
## read meta from pickle
pclmeta = serializer.load( f )
if not pclmeta.needsUpdate( txtmeta ):
logger.debug( "we can use expres from pickle file %s" % pclfile )
expres = serializer.load( f )
else:
logger.debug( "we cannot use expres from pickle file %s" % pclfile )
logger.debug( "txt meta %s" % txtmeta )
logger.debug( "pcl meta %s" % pclmeta )
logger.debug( "pcl meta needs update %s" % pclmeta.needsUpdate( txtmeta ) )
except IOError as e:
logger.error( "exception %s" % e )
if not expres: ## create from text file
expres = ExpResult(root, discard_zeroes = self.txt_meta.discard_zeroes,
databaseParticles = self.databaseParticles)
if self.subpickle and expres: expres.writePickle( self.databaseVersion )
if expres:
contact = expres.globalInfo.getInfo("contact")
if contact and "fastlim" in contact.lower():
self.txt_meta.hasFastLim = True
return expres
[docs] def getExpResults(self, analysisIDs=['all'], datasetIDs=['all'], txnames=['all'],
dataTypes = ['all'], useSuperseded=False, useNonValidated=False,
onlyWithExpected = False ):
"""
Returns a list of ExpResult objects.
Each object refers to an analysisID containing one (for UL) or more
(for Efficiency maps) dataset (signal region) and each dataset
containing one or more TxNames. If analysisIDs is defined, returns
only the results matching one of the IDs in the list. If dataTypes is
defined, returns only the results matching a dataType in the list. If
datasetIDs is defined, returns only the results matching one of the IDs
in the list. If txname is defined, returns only the results matching
one of the Tx names in the list.
:param analysisIDs: list of analysis ids ([CMS-SUS-13-006,...]). Can
be wildcarded with usual shell wildcards: * ? [<letters>]
Furthermore, the centre-of-mass energy can be chosen
as suffix, e.g. ":13*TeV". Note that the asterisk
in the suffix is not a wildcard.
:param datasetIDs: list of dataset ids ([ANA-CUT0,...]). Can be wildcarded
with usual shell wildcards: * ? [<letters>]
:param txnames: list of txnames ([TChiWZ,...]). Can be wildcarded with
usual shell wildcards: * ? [<letters>]
:param dataTypes: dataType of the analysis (all, efficiencyMap or upperLimit)
Can be wildcarded with usual shell wildcards: * ? [<letters>]
:param useSuperseded: If False, the supersededBy results will not be included
(deprecated)
:param useNonValidated: If False, the results with validated = False
will not be included
:param onlyWithExpected: Return only those results that have expected values
also. Note that this is trivially fulfilled for all efficiency maps.
:returns: list of ExpResult objects or the ExpResult object if the list
contains only one result
"""
if type(analysisIDs)==str: analysisIDs=[analysisIDs]
if type(datasetIDs)==str: datasetIDs=[datasetIDs]
if type(txnames)==str: txnames=[txnames]
if type(dataTypes)==str: dataTypes=[dataTypes]
import fnmatch
expResultList = []
for expResult in self.expResultList:
superseded = None
if hasattr(expResult.globalInfo,'supersededBy'):
superseded = expResult.globalInfo.supersededBy.replace(" ","")
if superseded and (not useSuperseded):
continue
analysisID = expResult.globalInfo.getInfo('id')
sqrts = expResult.globalInfo.getInfo('sqrts')
# Skip analysis not containing any of the required ids:
if analysisIDs != ['all']:
hits=False
for patternString in analysisIDs:
# Extract centre-of-mass energy
# Assuming 0 or 1 colons.
pattern = patternString.split(':')
hits = fnmatch.filter( [ analysisID ], pattern[0] )
if len( pattern ) > 1:
# Parse suffix
# Accepted Strings: ":13", ":13*TeV", ":13TeV", ":13 TeV"
# Everything else will yield an error at the unum-conversion (eval())
if pattern[1].endswith('TeV'):
pattern[1] = pattern[1][:-3]
if pattern[1][-1] in [" ", "*"]:
pattern[1] = pattern[1][:-1]
pattern[1] += "*TeV"
if sqrts != eval(pattern[1]):
hits = False
if hits:
break
# continue
if not hits:
continue
newExpResult = ExpResult()
newExpResult.path = expResult.path
newExpResult.globalInfo = expResult.globalInfo
newExpResult.datasets = []
newExpResult.origdatasets = expResult.datasets
origdatasets = []
for dataset in expResult.datasets:
if dataTypes != ['all']:
hits=False
for pattern in dataTypes:
hits = fnmatch.filter( [ dataset.dataInfo.dataType ], pattern )
if hits:
break
#continue
if not hits:
continue
if hasattr(dataset.dataInfo, 'dataId') and datasetIDs != ['all']:
hits=False
if datasetIDs == None:
datasetIDs = [ None ]
for pattern in datasetIDs:
hits = fnmatch.filter( [ str(dataset.dataInfo.dataId) ], str(pattern) )
if hits:
break
# continue
if not hits:
continue
newDataSet = datasetObj.DataSet( dataset.path, dataset.globalInfo,
False, discard_zeroes=self.txt_meta.discard_zeroes )
newDataSet.dataInfo = dataset.dataInfo
newDataSet.txnameList = []
for txname in dataset.txnameList:
if type(txname.validated) == str:
txname.validated = txname.validated.lower()
# print ( "txname",txname.validated,type(txname.validated) )
if (txname.validated not in [True, False, "true", "false", "n/a", "tbd", None, "none"]):
logger.error("value of validated field '%s' in %s unknown." % (txname.validated, expResult))
if txname.validated in [None, "none"]: ## FIXME after 1.1.1 this becomes a warning msg?
logger.debug("validated is None in %s/%s/%s. Please set to True, False, N/A, or tbd." % \
( expResult.globalInfo.id, dataset.dataInfo.dataId, txname ) )
if txname.validated not in [ None, True, "true", "n/a", "tbd" ] and (not useNonValidated ):
# if txname.validated is False and (not useNonValidated):
continue
if txnames != ['all']:
#Replaced by wildcard-evaluation below (2018-04-06 mat)
hits=False
for pattern in txnames:
hits = fnmatch.filter( [ txname.txName ], pattern )
if hits: # one match is enough
break
if not hits:
continue
if onlyWithExpected and dataset.dataInfo.dataType == \
"upperLimit" and not txname.txnameDataExp:
continue
newDataSet.txnameList.append(txname)
# Skip data set not containing any of the required txnames:
if not newDataSet.txnameList or newDataSet.txnameList == []:
continue
newExpResult.datasets.append(newDataSet)
# Skip analysis not containing any of the required txnames:
if not newExpResult.getTxNames():
continue
expResultList.append(newExpResult)
return expResultList
[docs] def updateBinaryFile( self ):
""" write a binar db file, but only if
necessary. """
if self.needsUpdate():
logger.debug( "Binary db file needs an update." )
self.createBinaryFile()
else:
logger.debug( "Binary db file does not need an update." )
[docs]class ExpResultList(object):
"""
Holds a list of ExpResult objects for printout.
"""
def __init__(self, expResList):
"""
:param expResultList: list of ExpResult objects
"""
self.expResultList = expResList
if __name__ == "__main__":
import argparse
from smodels.tools.smodelsLogging import setLogLevel
""" Run as a script, this checks and/or writes dbX.pcl files """
argparser = argparse.ArgumentParser(description='simple script to check \
and/or write dbX.pcl files')
argparser.add_argument('-c', '--check', help='check binary db file',
action='store_true')
argparser.add_argument('-t', '--time', help='time reading db',
action='store_true')
argparser.add_argument('-r', '--read', help='read binary db file',
action='store_true')
argparser.add_argument('-w', '--write', help='force writing binary db file',
action='store_true')
argparser.add_argument('-u', '--update', help='update binary db file, if necessary',
action='store_true')
argparser.add_argument('-d', '--debug', help='debug mode',
action='store_true')
argparser.add_argument('-D', '--database', help='directory name of database',
default="../../../smodels-database/" )
args = argparser.parse_args()
logger.setLevel(level=logging.INFO )
if args.debug:
setLogLevel(level=logging.DEBUG )
if args.write:
db = Database( args.database, force_load="txt" )
db.createBinaryFile()
sys.exit()
db = Database( args.database )
if args.update:
db.updateBinaryFile()
if args.check:
db.checkBinaryFile()
if args.time:
t0=time.time()
expResult = db.loadBinaryFile( lastm_only = False )
t1=time.time()
print( "Time it took reading binary db file: %.1f s." % (t1-t0) )
txtdb = db.loadTextDatabase()
t2=time.time()
print( "Time it took reading text file: %.1f s." % (t2-t1) )
if args.read:
db = db.loadBinaryFile( lastm_only = False )
listOfExpRes = db.getExpResults()
for expResult in listOfExpRes:
print(expResult)