Top

ScanLHA.MergeLHA module

Merges multiple HDF files into one file.

#!/usr/bin/env python3
"""
Merges multiple HDF files into one file.
"""
from pandas import HDFStore, DataFrame
from glob import glob
import sys
from os import getenv
__pdoc__ = {}
__pdoc__['Merge'] = """
Merges multiple HDF files into one file.

Usage: MergeLHA mergedfile.h5 file1.h5 file2.h5 [...]

File names may be specified using patterns compatible with python.glob (e.g. '*.h5').

Note that it is not possible to merge different `ScanLHA.config.Config` instances i.e. the `Config`
instance of the first file is used.


The default H5 tree is 'results' (default of `ScanLHA.scan.Scan.save`).
For changing the path set the environment variable `export LHPATH='/yourpath'`.
"""

def Merge():
    if len(sys.argv) < 3:
        print('No valid filenames given!')
        print(__pdoc__['Merge'])
        sys.exit(1)

    infiles = [glob(f) for f in sys.argv[1:-1]]
    outfile = sys.argv[-1]

    LHAPATH = getenv('LHAPATH') if getenv('LHAPATH') else 'results'

    print("Will concatenate into {}.".format(outfile))
    store = HDFStore(outfile)

    df = DataFrame()
    store_conf = None
    for fs in infiles:
        for f in fs:
            print('Reading %s ...' % f)
            tmp_store = HDFStore(f)
            tmp_conf = tmp_store.get_storer(LHAPATH).attrs.config
            try:
                tmp_conf = tmp_store.get_storer(LHAPATH).attrs.config
                if not store_conf:
                    store_conf = tmp_conf
            except AttributeError:
                print('No config attribute found in {}'.format(f))
            if 'scatterplot' in store_conf:
                tmp_conf['scatterplot'] = store_conf['scatterplot']
            if store_conf and store_conf != tmp_conf:
                print('Warning: merge file with different config {}'.format(f))
            tmp_df = tmp_store['results']
            try:
                tmp_df['scan_seed'] = tmp_store.get_storer(LHAPATH).attrs.seed
                tmp_df['scan_parallel'] = tmp_store.get_storer(LHAPATH).attrs.parallel
            except AttributeError:
                pass
            df = df.append(tmp_df, ignore_index=True)
            tmp_store.close()

    store[LHAPATH] = df
    store.get_storer(LHAPATH).attrs.config = store_conf
    store.close()

Functions

def Merge(

)

Merges multiple HDF files into one file.

Usage: MergeLHA mergedfile.h5 file1.h5 file2.h5 [...]

File names may be specified using patterns compatible with python.glob (e.g. '*.h5').

Note that it is not possible to merge different ScanLHA.config.Config instances i.e. the Config instance of the first file is used.

The default H5 tree is 'results' (default of ScanLHA.scan.Scan.save). For changing the path set the environment variable export LHPATH='/yourpath'.

def Merge():
    if len(sys.argv) < 3:
        print('No valid filenames given!')
        print(__pdoc__['Merge'])
        sys.exit(1)

    infiles = [glob(f) for f in sys.argv[1:-1]]
    outfile = sys.argv[-1]

    LHAPATH = getenv('LHAPATH') if getenv('LHAPATH') else 'results'

    print("Will concatenate into {}.".format(outfile))
    store = HDFStore(outfile)

    df = DataFrame()
    store_conf = None
    for fs in infiles:
        for f in fs:
            print('Reading %s ...' % f)
            tmp_store = HDFStore(f)
            tmp_conf = tmp_store.get_storer(LHAPATH).attrs.config
            try:
                tmp_conf = tmp_store.get_storer(LHAPATH).attrs.config
                if not store_conf:
                    store_conf = tmp_conf
            except AttributeError:
                print('No config attribute found in {}'.format(f))
            if 'scatterplot' in store_conf:
                tmp_conf['scatterplot'] = store_conf['scatterplot']
            if store_conf and store_conf != tmp_conf:
                print('Warning: merge file with different config {}'.format(f))
            tmp_df = tmp_store['results']
            try:
                tmp_df['scan_seed'] = tmp_store.get_storer(LHAPATH).attrs.seed
                tmp_df['scan_parallel'] = tmp_store.get_storer(LHAPATH).attrs.parallel
            except AttributeError:
                pass
            df = df.append(tmp_df, ignore_index=True)
            tmp_store.close()

    store[LHAPATH] = df
    store.get_storer(LHAPATH).attrs.config = store_conf
    store.close()