#!/usr/bin/env python3
"""
.. module:: databaseObj
:synopsis: Contains Database class that represents the database of experimental results.
.. moduleauthor:: Veronika Magerl <v.magerl@gmx.at>
.. moduleauthor:: Andre Lessa <lessa.a.p@gmail.com>
.. moduleauthor:: Wolfgang Waltenberger <wolfgang.waltenberger@gmail.com>
.. moduleauthor:: Matthias Wolf <matthias.wolf@wot.at>
"""
from __future__ import print_function
import os
import hashlib
import pathlib
import sys
import time
import copy
import io
from smodels.experiment import datasetObj
from smodels.installation import cacheDirectory
from smodels.experiment.metaObj import Meta
from smodels.experiment.expResultObj import ExpResult
from smodels.experiment.expSMSDict import ExpSMSDict
from smodels.experiment.exceptions import DatabaseNotFoundException
from smodels.base.physicsUnits import TeV
from smodels.experiment.expAuxiliaryFuncs import cleanWalk
from smodels.experiment.exceptions import SModelSExperimentError as SModelSError
from smodels.base.smodelsLogging import logger
import logging
os.environ["OMP_NUM_THREADS"] = "2"
scipyver = ""
try:
from importlib.metadata import version
scipyver = version("scipy")
except Exception as e:
try:
from scipy import __version__ as scipyver
except Exception as e:
pass
if scipyver not in ["1.8.", "1.9.", "1.10.", "2.0.", "2.1."]:
# fix for pickling different scipy versions (1.7.x vs 1.8.x)
# so that databases pickled with scipy 1.8.x still work with scipy 1.7.x
import scipy.spatial
if not hasattr(scipy.spatial, "_qhull") and hasattr(scipy.spatial, "qhull"):
sys.modules["scipy.spatial._qhull"] = scipy.spatial.qhull
try:
import cPickle as serializer
except ImportError as e:
import pickle as serializer
def _getSHA1(filename):
return hashlib.sha1(pathlib.Path(filename).read_bytes()).hexdigest()
# some mechanism to remove lock files if the download got interrupted
import atexit
lockfiles = set()
[docs]def removeLockFiles( lockfiles ):
""" remove cruft lockfiles """
for l in lockfiles:
if os.path.exists ( l ):
try:
os.unlink ( l )
except FileNotFoundError as e:
pass
lockfiles = set()
atexit.register ( removeLockFiles, lockfiles )
[docs]class Database(object):
"""
Database object. Holds a list of SubDatabases and the ExpSMS map.
Delegates all calls to SubDatabases.
"""
def __init__(self, base=None, force_load=None,
progressbar=False, subpickle=True,
combinationsmatrix=None):
"""
:param base: path to the database, or pickle file (string), or http
address. If None, "official", or "official_fastlim",
use the official database for your code version
(including fastlim results, if specified).
If "latest", or "latest_fastlim", check for the latest database.
Multiple databases may be specified using '+' as a delimiter.
:param force_load: force loading the text database ("txt"),
or binary database ("pcl"), dont force anything if None
:param progressbar: show a progressbar when building pickle file
(needs the python-progressbar module)
:param subpickle: produce small pickle files per exp result.
Should only be used when working on the database.
:param combinationsmatrix: an optional dictionary that contains info
about combinable analyses, e.g. { "anaid1": ( "anaid2", "anaid3" ) }
optionally specifying signal regions, e.g. { "anaid1:SR1":
( "anaid2:SR2", "anaid3" ) }
"""
self.subs = []
if "_fastlim" in base: # for backwards compatibility
base = base.replace("_fastlim", "+fastlim")
sstrings = base.split("+")
for ss in sstrings:
self.subs.append(SubDatabase(ss, force_load,
progressbar, subpickle,
combinationsmatrix))
# Compute SMS dict with all results
self._allExpSMSDict = ExpSMSDict(self.expResultList)
# Filter results using the default options
# (i.e. remove non-validated results,...)
self.selectExpResults()
@property
def expResultList(self):
"""
The combined list of results, compiled from the
the active results in each subdatabase.
"""
if len(self.subs) == 0:
return []
else:
lists = [x.expResultList for x in self.subs]
return self.mergeLists(lists)
[docs] def mergeLists(self, lists):
""" small function, merges lists of ERs """
D = {}
for tmp in lists:
for t in tmp:
if len(t.datasets) == 0: # skip empty entries
logger.warning(f"Analysis {t.globalInfo.id} has no datasets. Will remove it.")
continue
anaid = t.globalInfo.id + t.datasets[0].getType()
if anaid not in D:
D[anaid] = t
else:
D[anaid] = self.mergeERs(D[anaid], t)
return list(D.values())
[docs] def mergeERs(self, o1, r2):
""" merge the content of exp res r1 and r2 """
r1 = copy.deepcopy(o1)
r1.globalInfo = r2.globalInfo
dids = [x.getID() for x in o1.datasets]
for ds in r2.datasets:
if not ds.getID() in dids: # completely new dataset
r1.datasets.append(ds)
else: # just overwrite the old txnames
idx = dids.index(ds.getID()) # ds index
r2txs = ds.txnameList
r1txnames = [x.txName for x in r1.datasets[idx].txnameList]
for txn in r2txs:
if txn.txName in r1txnames:
tidx = r1txnames.index(txn.txName) # overwrite
r1.datasets[idx].txnameList[tidx] = txn
else:
# a new txname
r1.datasets[idx].txnameList.append(txn)
return r1
[docs] def createBinaryFile(self, filename=None):
""" create a pcl file from all the subs """
## make sure we have a model to pickle with the database!
logger.debug(" * create %s" % filename)
if filename == None:
filename = self.pcl_meta.pathname
with open(filename, "wb") as f:
logger.debug(" * load text database")
logger.debug(" * write %s db version %s" %
(filename, self.databaseVersion))
ptcl = min(4, serializer.HIGHEST_PROTOCOL)
## 4 is default protocol in python3.8, and highest protocol in 3.7
serializer.dump(self.txt_meta, f, protocol=ptcl)
serializer.dump(self.expResultList, f, protocol=ptcl)
serializer.dump(self.databaseParticles, f, protocol=ptcl)
logger.info("%s created." % (filename))
def __str__(self):
# r = [ str(x) for x in self.subs ]
# return "+".join(r)
idList = "Database version: " + self.databaseVersion
idList += "\n"
idList += "-" * len(idList) + "\n"
if self.expResultList == None:
idList += "no experimental results available! "
return idList
idList += "%d experimental results: " % \
len(self.expResultList)
atlas, cms = [], []
datasets = 0
txnames = 0
s = {8: 0, 13: 0}
for expRes in self.expResultList:
Id = expRes.globalInfo.getInfo('id')
sqrts = expRes.globalInfo.getInfo('sqrts').asNumber(TeV)
if not sqrts in s.keys():
s[sqrts] = 0
s[sqrts] += 1
datasets += len(expRes.datasets)
for ds in expRes.datasets:
txnames += len(ds.txnameList)
if "ATLAS" in Id:
atlas.append(expRes)
if "CMS" in Id:
cms.append(expRes)
idList += "%d CMS, %d ATLAS, " % (len(cms), len(atlas))
for sqrts in s.keys():
idList += "%d @ %d TeV, " % (s[sqrts], sqrts)
# idList += expRes.globalInfo.getInfo('id') + ', '
idList = idList[:-2] + '\n'
idList += "%d datasets, %d txnames.\n" % (datasets, txnames)
return idList
def __eq__(self, other):
if type(other) != type(self):
return False
for x, y in zip(self.subs, other.subs):
if x != y:
return False
return True
[docs] def getExpResults(self, analysisIDs=['all'], datasetIDs=['all'], txnames=['all'],
dataTypes=['all'], useNonValidated=False,
onlyWithExpected=False):
"""
Select (filter) the results within the database satisfying the restrictions set by the arguments and returns the corresponding results.
"""
self.selectExpResults(analysisIDs=analysisIDs, datasetIDs=datasetIDs,
txnames=txnames, dataTypes=dataTypes,
useNonValidated=useNonValidated,
onlyWithExpected=onlyWithExpected)
return self.expResultList[:]
[docs] def selectExpResults(self, analysisIDs=['all'], datasetIDs=['all'], txnames=['all'],
dataTypes=['all'], useNonValidated=False,
onlyWithExpected=False):
"""
Selects (filter) the results within the database satisfying the restrictions set by the arguments and updates the centralized SMS dictionary.
:param analysisIDs: list of analysis ids ([CMS-SUS-13-006,...]). Can
be wildcarded with usual shell wildcards: * ? [<letters>]
Furthermore, the centre-of-mass energy can be chosen
as suffix, e.g. ":13*TeV". Note that the asterisk
in the suffix is not a wildcard.
:param datasetIDs: list of dataset ids ([ANA-CUT0,...]). Can be wildcarded
with usual shell wildcards: * ? [<letters>]
:param txnames: list of txnames ([TChiWZ,...]). Can be wildcarded with
usual shell wildcards: * ? [<letters>]
:param dataTypes: dataType of the analysis (all, efficiencyMap or upperLimit)
Can be wildcarded with usual shell wildcards: * ? [<letters>]
:param useNonValidated: If False, the results with validated = False
will not be included
:param onlyWithExpected: Return only those results that have expected values
also. Note that this is trivially fulfilled for all efficiency maps.
"""
for sub in self.subs:
sub.setActiveExpResults(analysisIDs, datasetIDs, txnames, dataTypes,
useNonValidated, onlyWithExpected)
# Update SMS map
expDict = self._allExpSMSDict
self.expSMSDict = expDict.filter(self.expResultList)
[docs] def getExpSMS(self):
"""
Returns all the SMS present in the selected experimental results
"""
return list(self.expSMSDict._smsDict.keys())
@property
def databaseParticles(self):
"""
Database particles, a list, one entry per sub
"""
r = [x.databaseParticles for x in self.subs]
return r[0] # FIXME do sth smarter?
@property
def databaseVersion(self):
"""
The version of the database, concatenation of the individual versions
"""
r = [x.databaseVersion for x in self.subs]
for i, ri in enumerate(r): # avoid repetitions
if ri == None:
r[i] = "unknown"
for j, rj in enumerate(r[i+1:]):
if ri in rj:
r[i+j+1] = rj.replace(ri, "")
return "+".join(r)
@property
def txt_meta(self):
"""
The meta info of the text version, a merger of the original ones
"""
r = [x.txt_meta for x in self.subs]
ret = r[0]
return ret
@property
def pcl_meta(self):
"""
The meta info of the text version, a merger of the original ones
"""
ret = None
r = []
for x in self.subs:
if hasattr(x, "pcl_meta"):
r.append(x.pcl_meta)
ret = r[0]
return ret
[docs] def createLinksToCombinationsMatrix(self):
""" in all globalInfo objects, create a shallow link to the
combinations matrix """
for x in self.subs:
if not hasattr(x, "combinationsmatrix") or x.combinationsmatrix == None:
x.combinationsmatrix = self.combinationsmatrix
x.createLinksToCombinationsMatrix()
[docs] def clearLinksToCombinationsMatrix(self):
""" clear all shallow links to the combinations matrix """
self.combinationsmatrix = None
for x in self.subs:
x.combinationsmatrix = None
x.clearLinksToCombinationsMatrix()
[docs]class SubDatabase(object):
"""
SubDatabase object. Holds a list of ExpResult objects.
"""
def __init__(self, base=None, force_load=None,
progressbar=False, subpickle=True, combinationsmatrix=None):
"""
:param base: path to the database, or pickle file (string), or http
address. If None, "official", or "official_fastlim",
use the official database for your code version
(including fastlim results, if specified).
If "latest", or "latest_fastlim", check for the latest database.
Multiple databases may be named, use "+" as delimiter.
Order matters: Results with same name will overwritten
according to sequence
:param force_load: force loading the text database ("txt"),
or binary database ("pcl"), dont force anything if None
:param progressbar: show a progressbar when building pickle file
(needs the python-progressbar module)
:param subpickle: produce small pickle files per exp result.
Should only be used when working on the database.
:param combinationsmatrix: an optional dictionary that contains info
about combinable analyses, e.g. { "anaid1": ( "anaid2", "anaid3" ) }
optionally specifying signal regions, e.g. { "anaid1:SR1":
( "anaid2:SR2", "anaid3" ) }
"""
self.url = base
self.combinationsmatrix = combinationsmatrix
self.source = ""
if force_load == None and base.endswith(".pcl"):
force_load = "pcl"
self.force_load = force_load
self.subpickle = subpickle
obase = base # keep old name for more checks for 'latest'
from smodels.installation import __dblabels__
if base in __dblabels__:
from smodels.installation import databasePath
base = databasePath(base)
base, pclfile = self.checkPathName(base)
self.pcl_meta = Meta(pclfile)
self._allExpResults = []
self._activeResults = []
self.txt_meta = self.pcl_meta
if not self.force_load == "pcl":
self.txt_meta = Meta(base)
self.progressbar = None
if progressbar:
try:
import progressbar as P
self.progressbar = P.ProgressBar(widgets=["Building Database ", P.Percentage(),
P.Bar(marker=P.RotatingMarker()), P.ETA()])
except ImportError as e:
logger.warning("progressbar requested, but python-progressbar is not installed.")
if self.force_load == "txt":
self._setParticles()
self.loadTextDatabase()
self.txt_meta.printFastlimBanner()
return
if self.force_load == "pcl":
self.loadBinaryFile()
self._setParticles()
self.pcl_meta.printFastlimBanner()
if "latest" in obase:
from smodels import installation
codeVersion = installation.version()
pclVersion = self.pcl_meta.databaseVersion
if codeVersion[0] != pclVersion[0]:
logger.error("major versions of code and database differ! code=%s, database=%s" % (codeVersion[0], pclVersion[0]))
return
if self.force_load in [None, "none", "None"]:
self.loadDatabase()
self._setParticles()
self.txt_meta.printFastlimBanner()
return
logger.error("when initialising database: force_load=%s is not "
"recognized. Valid values are: pcl, txt, None." % force_load)
raise SModelSError()
def __eq__(self, other):
""" compare two databases """
if type(self) != type(other):
return False
if not self.txt_meta.sameAs(other.txt_meta):
return False
if len(self.expResultList) != len(other.expResultList):
return False
for (myres, otherres) in zip(self.expResultList, other.expResultList):
if myres != otherres:
return False
return True
@property
def expResultList(self):
"""
The list of active results.
"""
return self._activeResults[:]
@expResultList.setter
def expResultList(self,value):
"""
If a results list is defined for the database,
store it in _allExpResults and reset the active list.
"""
self._allExpResults = value
self._activeResults = self._allExpResults[:]
[docs] def loadDatabase(self):
""" if no binary file is available, then
load the database and create the binary file.
if binary file is available, then check if
it needs update, create new binary file, in
case it does need an update.
"""
if not os.path.exists(self.pcl_meta.pathname):
logger.info("Creating binary database ")
logger.info("(this may take a few minutes, but it's done only once!)")
self.loadTextDatabase()
self.createBinaryFile()
else:
if self.needsUpdate():
self.createBinaryFile()
else:
self.loadBinaryFile(lastm_only=False)
[docs] def loadTextDatabase(self):
""" simply loads the textdabase """
if self.txt_meta.databaseVersion and len(self.expResultList) > 0:
logger.debug("Asked to load database, but has already been loaded. Ignore.")
return
logger.info("Parsing text database at %s" % self.txt_meta.pathname)
self.expResultList = self._loadExpResults()
self.createLinksToModel()
self.createLinksToCombinationsMatrix()
[docs] def createLinksToModel(self):
""" in all globalInfo objects, create links to self.databaseParticles """
if not hasattr(self, "databaseParticles"):
return
if type(self.databaseParticles) == type(None):
return
for ctr, er in enumerate(self.expResultList):
if not hasattr(er.globalInfo, "_databaseParticles"):
er.globalInfo._databaseParticles = self.databaseParticles
elif type(er.globalInfo._databaseParticles) == type(None):
er.globalInfo._databaseParticles = self.databaseParticles
[docs] def createLinksToCombinationsMatrix(self):
""" in all globalInfo objects, create links to self.combinationsmatrix """
if not hasattr(self, "combinationsmatrix"):
return
if self.combinationsmatrix is None:
return
for er in self.expResultList:
if not hasattr(er.globalInfo, "_combinationsmatrix"):
er.globalInfo._combinationsmatrix = self.combinationsmatrix
elif er.globalInfo._combinationsmatrix is None:
er.globalInfo._combinationsmatrix = self.combinationsmatrix
[docs] def clearLinksToCombinationsMatrix(self):
for er in self.expResultList:
if hasattr(er.globalInfo, "_combinationsmatrix"):
del er.globalInfo._combinationsmatrix
[docs] def removeLinksToModel(self):
""" remove the links of globalInfo._databaseParticles to the model.
Currently not used. """
for er in self.expResultList:
if hasattr(er.globalInfo, "_databaseParticles"):
del er.globalInfo._databaseParticles
[docs] def loadBinaryFile(self, lastm_only=False):
"""
Load a binary database, returning last modified, file count, database.
:param lastm_only: if true, the database itself is not read.
:returns: database object, or None, if lastm_only == True.
"""
if lastm_only and self.pcl_meta.mtime:
## doesnt need to load database, and mtime is already
## loaded
return None
if not os.path.exists(self.pcl_meta.pathname):
return None
try:
with open(self.pcl_meta.pathname, "rb") as f:
t0 = time.time()
pclfilename = self.pcl_meta.pathname
self.pcl_meta = serializer.load(f)
self.pcl_meta.pathname = pclfilename
if self.force_load == "pcl":
self.txt_meta = self.pcl_meta
if not lastm_only:
if not self.force_load == "pcl" and self.pcl_meta.needsUpdate(self.txt_meta):
logger.warning("Something changed in the environment."
"Regenerating.")
self.createBinaryFile()
return self
logger.info("loading binary db file %s format version %s" %
(self.pcl_meta.pathname, self.pcl_meta.format_version))
if sys.version[0] == "2":
self.expResultList = serializer.load(f)
else:
self.expResultList = serializer.load(f, encoding="latin1")
t1 = time.time()-t0
logger.info("Loaded database from %s in %.1f secs." %
(self.pcl_meta.pathname, t1))
self.databaseParticles = None
try:
self.databaseParticles = serializer.load(f)
except EOFError as e:
pass # a model does not *have* to be defined
self.createLinksToModel()
self.createLinksToCombinationsMatrix()
except(EOFError, ValueError) as e:
os.unlink(self.pcl_meta.pathname)
if lastm_only:
self.pcl_meta.format_version = -1
self.pcl_meta.mtime = 0
return self
logger.error("%s is not readable (%s)." %
(self.pcl_meta.pathname, str(e)))
if self.source in ["http", "ftp", "pcl"]:
logger.error("source cannot be rebuilt. supply a different path to the database in your ini file.")
raise SModelSError()
self.createBinaryFile()
# self.txt_meta = self.pcl_meta
return self
[docs] def checkBinaryFile(self):
nu = self.needsUpdate()
logger.debug("Checking binary db file.")
logger.debug("Binary file dates to %s(%d)" %
(time.ctime(self.pcl_meta.mtime), self.pcl_meta.filecount))
logger.debug("Database dates to %s(%d)" %
(time.ctime(self.txt_meta.mtime), self.txt_meta.filecount))
if nu:
logger.info("Binary db file needs an update.")
else:
logger.info("Binary db file does not need an update.")
return nu
[docs] def needsUpdate(self):
""" does the binary db file need an update? """
try:
self.loadBinaryFile(lastm_only=True)
# logger.error( "needs update?" )
return(self.pcl_meta.needsUpdate(self.txt_meta))
except(IOError, DatabaseNotFoundException, TypeError, ValueError):
# if we encounter a problem, we rebuild the database.
return True
[docs] def createBinaryFile(self, filename=None):
""" create a pcl file from the text database,
potentially overwriting an old pcl file. """
## make sure we have a model to pickle with the database!
if self.txt_meta == None:
logger.error("Trying to create database pickle, but no txt_meta defined.")
raise SModelSError()
logger.debug("database timestamp: %s, filecount: %s" %
(time.ctime(self.txt_meta.mtime), self.txt_meta.filecount))
binfile = filename
if binfile == None:
binfile = self.pcl_meta.pathname
if not hasattr(self, 'databaseParticles') or \
type(self.databaseParticles) == type(None):
self._setParticles(self._getParticles())
logger.debug(" * create %s" % binfile)
with open(binfile, "wb") as f:
logger.debug(" * load text database")
self.loadTextDatabase()
logger.debug(" * write %s db version %s, format version %s, %s" %
(binfile, self.txt_meta.databaseVersion,
self.txt_meta.format_version, self.txt_meta.cTime()))
# ptcl = serializer.HIGHEST_PROTOCOL
ptcl = min(4, serializer.HIGHEST_PROTOCOL) # 4 is default protocol in python3.8, and highest protocol in 3.7
serializer.dump(self.txt_meta, f, protocol=ptcl)
serializer.dump(self.expResultList, f, protocol=ptcl)
serializer.dump(self.databaseParticles, f, protocol=ptcl)
logger.info("%s created." % (binfile))
@property
def databaseVersion(self):
"""
The version of the database, read from the 'version' file.
"""
return self.txt_meta.databaseVersion
@databaseVersion.setter
def databaseVersion(self, x):
self.txt_meta.databaseVersion = x
self.pcl_meta.databaseVersion = x
[docs] def inNotebook(self):
"""
Are we running within a notebook? Has an effect on the
progressbar we wish to use.
"""
try:
cfg = get_ipython().config
if 'IPKernelApp' in cfg.keys():
return True
else:
return False
except NameError:
return False
@property
def base(self):
"""
This is the path to the base directory.
"""
return self.txt_meta.pathname
[docs] def lockFile ( self, filename : os.PathLike ):
""" lock the file <filename>
"""
lockfile = os.path.join ( os.path.dirname ( filename ),
".lock_"+ os.path.basename ( filename ) )
ctr = 0
while ( ctr < 5 ):
ctr+=1
if not os.path.exists ( lockfile ):
f=open ( lockfile, "wt" )
f.write ( f"# this is a temporary lockfile created {time.asctime()}\n" )
f.write ( f"# meant to prevent multiple, parallel downloads of\n" )
f.write ( f"# {filename}\n" )
f.close()
lockfiles.add ( lockfile )
return True
# we have a lockfile. lets see how old
s = os.stat ( lockfile )
t = ( time.time() - s.st_mtime ) / 60. # time in minutes
# if older than 3 hours, then disregard lockfile
if t > 180:
lockfiles.add ( lockfile )
return True
time.sleep ( ctr )
logger.error ( f"File {filename} is locked, probably because another process is already downloading. Remove {lockfile} if you feel it is safe to retry." )
sys.exit()
"""
import fcntl # does not work on all file systems
fcntl.lockf( handle, fcntl.LOCK_EX)
"""
[docs] def unlockFile ( self, filename : os.PathLike ):
""" unlock the file <filename>
"""
lockfile = os.path.join ( os.path.dirname ( filename ),
".lock_"+ os.path.basename ( filename ) )
if lockfile in lockfiles:
lockfiles.remove( lockfile )
if os.path.exists ( lockfile ):
try:
os.unlink ( lockfile )
except FileNotFoundError as e:
pass
#import fcntl # does not work on all filesystems
#fcntl.lockf(handle, fcntl.LOCK_UN)
[docs] def fetchFromScratch(self, path, store):
""" fetch database from scratch, together with
description.
:param store: filename to store json file.
"""
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'K', 'M', 'G', 'T', 'P']:
if abs(num) < 1024.:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
import requests
try:
r = requests.get(path, timeout=5)
except requests.exceptions.RequestException as e:
logger.error("Exception when trying to fetch database: %s" % e)
logger.error("Consider supplying a different database path in the ini file (possibly a local one)")
raise SModelSError()
if r.status_code != 200:
line = "Error %d: could not fetch '%s' from server: '%s'" % \
(r.status_code, path, r.reason)
logger.error(line)
raise SModelSError(line)
## its new so store the description
with open(store, "w") as f:
f.write(r.text)
if not "url" in r.json().keys():
logger.error("cannot parse json file %s." % path)
raise SModelSError()
size = r.json()["size"]
cDir, defused = cacheDirectory(create=True, reportIfDefault=True)
t0 = time.time()
filename = os.path.join(cDir, r.json()["url"].split("/")[-1])
if os.path.exists(filename):
# if file exists and checksums match, we dont download
if "sha1" in r.json():
sha = _getSHA1(filename)
if sha == r.json()["sha1"]:
## seems it hasnt changed
self.force_load = "pcl"
return ("./", "%s" % filename)
r2 = requests.get(r.json()["url"], stream=True, timeout=(250, 2000))
# filename= os.path.join ( cDir, r2.url.split("/")[-1] )
msg = "downloading the database from %s and caching in %s." % (path, cDir)
if defused:
msg += " If you want the pickled database file to be cached in a different location, set the environment variable SMODELS_CACHEDIR, e.g. to '/tmp'."
logger.warning(msg)
logger.info("need to fetch %s and store in %s. size is %s." %
(r.json()["url"], filename, sizeof_fmt(size)))
self.lockFile ( filename )
with open(filename, "wb") as dump:
if not self.inNotebook(): # \r doesnt work in notebook
print(" " + " "*51 + "<", end="\r")
print("loading >", end="")
for x in r2.iter_content(chunk_size=int(size / 50)):
dump.write(x)
dump.flush()
print(".", end="")
sys.stdout.flush()
if self.inNotebook():
print("done.")
else:
print("")
dump.close()
self.unlockFile ( filename )
sha = _getSHA1(filename)
testsha = r.json()["sha1"]
if sha != testsha:
logger.error(f"error: downloaded file has different checksum {sha}!={testsha}. This should not happen. Contact the smodels-developers <smodels-developers@lists.oeaw.ac.at>")
# sys.exit()
logger.info("fetched %s in %d secs." % (r2.url, time.time()-t0))
logger.debug("store as %s" % filename)
self.force_load = "pcl"
return ("./", "%s" % filename)
[docs] def fetchFromServer(self, path):
import requests
import time
import json
self.source = "http"
if "ftp://" in path:
self.source = "ftp"
cDir = cacheDirectory(create=True)
store = os.path.join(cDir, path.replace(":", "_").replace("/", "_").replace(".", "_"))
logger.debug("need to fetch from server: %s and store to %s" % (path, store))
if not os.path.isfile(store):
## completely new! fetch the description and the db!
return self.fetchFromScratch(path, store)
with open(store, "r") as f:
jsn = json.load(f)
filename = os.path.join(cDir, jsn["url"].split("/")[-1])
class _: # pseudo class for pseudo requests
def __init__(self): self.status_code = -1
r = _()
try:
r = requests.get(path, timeout=2)
except requests.exceptions.RequestException as e:
pass
if r.status_code != 200:
logger.warning("Error %d: could not fetch %s from server." %
(r.status_code, path))
if not os.path.isfile(filename):
logger.error("Cant find a local copy of the pickle file. Exit.")
sys.exit()
logger.warning("I do however have a local copy of the file at %s. I work with that." % filename)
self.force_load = "pcl"
return (cDir, filename)
#return ( cDir, os.path.basename ( filename ) )
if not os.path.exists(filename):
return self.fetchFromScratch(path, store)
stats = os.stat(filename)
if abs(stats.st_size - jsn["size"]) > 4096:
## size doesnt match (4096 is to allow for slightly different file
## sizes reported by the OS). redownload!
return self.fetchFromScratch(path, store)
"""
# dont do this b/c its slowish
if "sha1" in r.json():
t0 = time.time()
sha = _getSHA1 ( filename )
print ( "it took", time.time()-t0 )
if sha != r.json()["sha1"]:
return self.fetchFromScratch ( path, store )
"""
if r.json()["lastchanged"] > jsn["lastchanged"]:
## has changed! redownload everything!
return self.fetchFromScratch(path, store)
if not os.path.isfile(filename):
return self.fetchFromScratch(path, store)
self.force_load = "pcl"
return ("./", filename)
[docs] def checkPathName(self, path):
"""
checks the path name,
returns the base directory and the pickle file name.
If path starts with http or ftp, fetch the description file
and the database.
returns the base directory and the pickle file name
"""
logger.debug('Try to set the path for the database to: %s', path)
if path.startswith(("http://", "https://", "ftp://")):
return self.fetchFromServer(path)
if path.startswith(("file://")):
path = path[7:]
tmp = os.path.realpath(path)
if os.path.isfile(tmp):
base = os.path.dirname(tmp)
return (base, tmp)
if tmp[-4:] == ".pcl":
self.source = "pcl"
if not os.path.exists(tmp):
if self.force_load == "pcl":
logger.error("File not found: %s" % tmp)
raise SModelSError()
logger.info("File not found: %s. Will generate." % tmp)
base = os.path.dirname(tmp)
return (base, tmp)
logger.error("Supplied a pcl filename, but %s is not a file." % tmp)
raise SModelSError()
path = tmp + '/'
if not os.path.exists(path):
logger.error('%s is no valid path!' % path)
raise DatabaseNotFoundException("Database not found")
m = Meta(path)
self.source = "txt"
return (path, path + m.getPickleFileName())
def __str__(self):
idList = "Database version: " + self.databaseVersion
idList += "\n"
idList += "-" * len(idList) + "\n"
if self.expResultList == None:
idList += "no experimental results available! "
return idList
idList += "%d experimental results: " % \
len(self.expResultList)
atlas, cms = [], []
datasets = 0
txnames = 0
s = {8: 0, 13: 0}
for expRes in self.expResultList:
Id = expRes.globalInfo.getInfo('id')
sqrts = expRes.globalInfo.getInfo('sqrts').asNumber(TeV)
if not sqrts in s.keys():
s[sqrts] = 0
s[sqrts] += 1
datasets += len(expRes.datasets)
for ds in expRes.datasets:
txnames += len(ds.txnameList)
if "ATLAS" in Id:
atlas.append(expRes)
if "CMS" in Id:
cms.append(expRes)
idList += "%d CMS, %d ATLAS, " % (len(cms), len(atlas))
for sqrts in s.keys():
idList += "%d @ %d TeV, " % (s[sqrts], sqrts)
# idList += expRes.globalInfo.getInfo('id') + ', '
idList = idList[:-2] + '\n'
idList += "%d datasets, %d txnames.\n" % (datasets, txnames)
return idList
def _setParticles(self, databaseParticles=None):
"""
Set the databaseParticles attribute.
If databaseParticles is None and the self.databaseParticles is None,
try to use the particles stored in the first ExpResult
in the database (ExptResult.globalInfo._databaseParticles).
If not found, fallback to the final states defined in defaultFinalStates.py.
:param databaseParticles: Model object containing the final state particles
used in the database.
"""
#If not yet defined, set the attribute to None:
if not hasattr(self, 'databaseParticles'):
self.databaseParticles = None
#If input is given, use it to set the databaseParticles attribute:
if databaseParticles:
logger.debug("Setting database particles from %s" % str(databaseParticles))
self.databaseParticles = databaseParticles
#If still None, fallback to default:
if self.databaseParticles is None:
logging.debug("databaseParticles not found. Using default state.")
from smodels.experiment.defaultFinalStates import finalStates
self.databaseParticles = finalStates
def _getParticles(self, particlesFile='databaseParticles.py'):
"""
Load the particle objects used in the database.
The particles are searched for in the database folder.
If not found, the default particles will be loaded.
"""
fulldir = os.path.join(self.txt_meta.pathname, particlesFile)
if os.path.isfile(fulldir):
from importlib import import_module
sys.path.append(self.txt_meta.pathname)
pFile = os.path.splitext(particlesFile)[0]
logger.debug("Loading database particles from: %s" % fulldir)
modelFile = import_module(pFile, package='smodels')
if not hasattr(modelFile, 'finalStates'):
logger.error("Model definition (finalStates) not found in" % fulldir)
else:
#set model name to file location:
modelFile.finalStates.label = os.path.basename(fulldir)
return modelFile.finalStates
return None
def _loadExpResults(self):
"""
Checks the database folder and generates a list of ExpResult objects for
each (globalInfo.txt,sms.py) pair.
:returns: list of ExpResult objects
"""
#Try to load particles from databaseParticles.py
self._setParticles(self._getParticles())
folders = []
#for root, _, files in os.walk(self.txt_meta.pathname):
# for root, _, files in cleanWalk(self._base):
for root, _, files in cleanWalk(self.txt_meta.pathname):
folders.append((root, files))
folders.sort()
roots = []
for root, files in folders:
if "/.git/" in root:
continue
if root[-11:] == "/validation":
continue
if root[-5:] == "/orig":
continue
if not 'globalInfo.txt' in files:
continue
else:
roots.append(root)
if self.progressbar:
self.progressbar.maxval = len(roots)
self.progressbar.start()
resultsList = []
for ctr, root in enumerate(roots):
if self.progressbar:
self.progressbar.update(ctr)
expres = self.createExpResult(root)
if expres:
resultsList.append(expres)
if not resultsList:
logger.warning("Zero results loaded.")
if self.progressbar:
self.progressbar.finish()
return resultsList
[docs] def createExpResult(self, root):
""" create, from pickle file or text files """
txtmeta = Meta(root,
hasFastLim=None, databaseVersion=self.databaseVersion)
pclfile = "%s/.%s" % (root, txtmeta.getPickleFileName())
logger.debug("Creating %s, pcl=%s" % (root, pclfile))
expres = None
try:
# logger.info( "%s exists? %d" % ( pclfile,os.path.exists( pclfile ) ) )
if not self.force_load == "txt" and os.path.exists(pclfile):
# logger.info( "%s exists" % ( pclfile ) )
with open(pclfile, "rb") as f:
logger.debug("Loading: %s" % pclfile)
## read meta from pickle
pclmeta = serializer.load(f)
if not pclmeta.needsUpdate(txtmeta):
logger.debug("we can use expres from pickle file %s" % pclfile)
expres = serializer.load(f)
else:
logger.debug("we cannot use expres from pickle file %s" % pclfile)
logger.debug("txt meta %s" % txtmeta)
logger.debug("pcl meta %s" % pclmeta)
logger.debug("pcl meta needs update %s" % pclmeta.needsUpdate(txtmeta))
except IOError as e:
logger.error("exception %s" % e)
if not expres: # create from text file
expres = ExpResult(root,
databaseParticles=self.databaseParticles)
if self.subpickle and expres:
expres.writePickle(self.databaseVersion)
if expres:
contact = expres.globalInfo.getInfo("contact")
if contact and "fastlim" in contact.lower():
self.txt_meta.hasFastLim = True
return expres
[docs] def setActiveExpResults(self, analysisIDs=['all'], datasetIDs=['all'], txnames=['all'],
dataTypes=['all'], useNonValidated=False,
onlyWithExpected=False):
"""
Filter the experimental results and store them in activeResults.
:param analysisIDs: list of analysis ids ([CMS-SUS-13-006,...]). Can
be wildcarded with usual shell wildcards: * ? [<letters>]
Furthermore, the centre-of-mass energy can be chosen
as suffix, e.g. ":13*TeV". Note that the asterisk
in the suffix is not a wildcard.
:param datasetIDs: list of dataset ids ([ANA-CUT0,...]). Can be wildcarded
with usual shell wildcards: * ? [<letters>]
:param txnames: list of txnames ([TChiWZ,...]). Can be wildcarded with
usual shell wildcards: * ? [<letters>]
:param dataTypes: dataType of the analysis (all, efficiencyMap or upperLimit)
Can be wildcarded with usual shell wildcards: * ? [<letters>]
:param useNonValidated: If False, the results with validated = False
will not be included
:param onlyWithExpected: Return only those results that have expected values
also. Note that this is trivially fulfilled for all efficiency maps.
:returns: list of ExpResult objects or the ExpResult object if the list
contains only one result
"""
self._activeResults = self.getExpResults(analysisIDs, datasetIDs, txnames,
dataTypes, useNonValidated,
onlyWithExpected)
[docs] def getExpResults(self, analysisIDs=['all'], datasetIDs=['all'], txnames=['all'],
dataTypes=['all'], useNonValidated=False,
onlyWithExpected=False):
"""
Returns a list of ExpResult objects.
Each object refers to an analysisID containing one (for UL) or more
(for Efficiency maps) dataset (signal region) and each dataset
containing one or more TxNames. If analysisIDs is defined, returns
only the results matching one of the IDs in the list. If dataTypes is
defined, returns only the results matching a dataType in the list. If
datasetIDs is defined, returns only the results matching one of the IDs
in the list. If txname is defined, returns only the results matching
one of the Tx names in the list.
:param analysisIDs: list of analysis ids ([CMS-SUS-13-006,...]). Can
be wildcarded with usual shell wildcards: * ? [<letters>]
Furthermore, the centre-of-mass energy can be chosen
as suffix, e.g. ":13*TeV". Note that the asterisk
in the suffix is not a wildcard.
:param datasetIDs: list of dataset ids ([ANA-CUT0,...]). Can be wildcarded
with usual shell wildcards: * ? [<letters>]
:param txnames: list of txnames ([TChiWZ,...]). Can be wildcarded with
usual shell wildcards: * ? [<letters>]
:param dataTypes: dataType of the analysis (all, efficiencyMap or upperLimit)
Can be wildcarded with usual shell wildcards: * ? [<letters>]
:param useNonValidated: If False, the results with validated = False
will not be included
:param onlyWithExpected: Return only those results that have expected values
also. Note that this is trivially fulfilled for all efficiency maps.
:returns: list of ExpResult objects or the ExpResult object if the list
contains only one result
"""
if type(analysisIDs) == str:
analysisIDs = [analysisIDs]
if type(datasetIDs) == str:
datasetIDs = [datasetIDs]
if type(txnames) == str:
txnames = [txnames]
if type(dataTypes) == str:
dataTypes = [dataTypes]
import fnmatch
expResultList = []
for expResult in self._allExpResults:
analysisID = expResult.globalInfo.getInfo('id')
sqrts = expResult.globalInfo.getInfo('sqrts')
# Skip analysis not containing any of the required ids:
if analysisIDs != ['all']:
hits = False
for patternString in analysisIDs:
# Extract centre-of-mass energy
# Assuming 0 or 1 colons.
pattern = patternString.split(':')
hits = fnmatch.filter([analysisID], pattern[0])
if len(pattern) > 1:
# Parse suffix
# Accepted Strings: ":13", ":13*TeV", ":13TeV", ":13 TeV"
# Everything else will yield an error at the unum-conversion (eval())
if pattern[1].endswith('TeV'):
pattern[1] = pattern[1][:-3]
if pattern[1][-1] in [" ", "*"]:
pattern[1] = pattern[1][:-1]
pattern[1] += "*TeV"
if sqrts != eval(pattern[1]):
hits = False
if hits:
break
# continue
if not hits:
continue
newExpResult = ExpResult()
newExpResult.path = expResult.path
newExpResult.globalInfo = expResult.globalInfo
newExpResult.datasets = []
newExpResult.origdatasets = expResult.datasets
for dataset in expResult.datasets:
if dataTypes != ['all']:
hits = False
for pattern in dataTypes:
hits = fnmatch.filter([dataset.dataInfo.dataType], pattern)
if hits:
break
#continue
if not hits:
continue
if hasattr(dataset.dataInfo, 'dataId') and datasetIDs != ['all']:
hits = False
if datasetIDs == None:
datasetIDs = [None]
for pattern in datasetIDs:
hits = fnmatch.filter([str(dataset.dataInfo.dataId)], str(pattern))
if hits:
break
# continue
if not hits:
continue
newDataSet = datasetObj.DataSet(dataset.path, dataset.globalInfo,
False)
newDataSet.dataInfo = dataset.dataInfo
newDataSet.txnameList = []
for txname in dataset.txnameList:
validated = txname.validated
if type(validated) == str:
validated = validated.lower()
if (validated not in [True, False, "true", "false", "n/a", "tbd", None, "none"]):
logger.error("value of validated field '%s' in %s unknown." % (validated, expResult))
if validated in [None, "none"]: # FIXME after 1.1.1 this becomes a warning msg?
logger.debug("validated is None in %s/%s/%s. Please set to True, False, N/A, or tbd." %
(expResult.globalInfo.id, dataset.dataInfo.dataId, txname))
if validated not in [None, True, "true", "n/a", "tbd"] and (not useNonValidated):
continue
if txnames != ['all']:
#Replaced by wildcard-evaluation below (2018-04-06 mat)
hits = False
for pattern in txnames:
hits = fnmatch.filter([txname.txName], pattern)
if hits: # one match is enough
break
if not hits:
continue
if onlyWithExpected and dataset.dataInfo.dataType == \
"upperLimit" and not txname.txnameDataExp:
continue
newDataSet.txnameList.append(txname)
# Skip data set not containing any of the required txnames:
if not newDataSet.txnameList or newDataSet.txnameList == []:
continue
newExpResult.datasets.append(newDataSet)
# Skip analysis not containing any of the required txnames:
if not newExpResult.getTxNames():
continue
expResultList.append(newExpResult)
return expResultList
[docs] def updateBinaryFile(self):
""" write a binar db file, but only if
necessary. """
if self.needsUpdate():
logger.debug("Binary db file needs an update.")
self.createBinaryFile()
else:
logger.debug("Binary db file does not need an update.")
if __name__ == "__main__":
import argparse
from smodels.base.smodelsLogging import setLogLevel
""" Run as a script, this checks and/or writes dbX.pcl files """
argparser = argparse.ArgumentParser(description='simple script to check \
and/or write dbX.pcl files')
argparser.add_argument('-c', '--check', help='check binary db file',
action='store_true')
argparser.add_argument('-t', '--time', help='time reading db',
action='store_true')
argparser.add_argument('-r', '--read', help='read binary db file',
action='store_true')
argparser.add_argument('-w', '--write', help='force writing binary db file',
action='store_true')
argparser.add_argument('-u', '--update', help='update binary db file, if necessary',
action='store_true')
argparser.add_argument('-d', '--debug', help='debug mode',
action='store_true')
argparser.add_argument('-D', '--database', help='directory name of database',
default="../../../smodels-database/")
args = argparser.parse_args()
logger.setLevel(level=logging.INFO)
if args.debug:
setLogLevel(level=logging.DEBUG)
if args.write:
db = Database(args.database, force_load="txt")
db.createBinaryFile()
sys.exit()
db = Database(args.database)
if args.update:
db.updateBinaryFile()
if args.check:
db.checkBinaryFile()
if args.time:
t0 = time.time()
expResult = db.loadBinaryFile(lastm_only=False)
t1 = time.time()
print("Time it took reading binary db file: %.1f s." % (t1-t0))
txtdb = db.loadTextDatabase()
t2 = time.time()
print("Time it took reading text file: %.1f s." % (t2-t1))
if args.read:
db = db.loadBinaryFile(lastm_only=False)
listOfExpRes = db.expResultList
for expResult in listOfExpRes:
print(expResult)
for expResult in listOfExpRes:
print(expResult)