""" Classes for reading (TODO: and writing) STARDB files. """ from __future__ import division from __future__ import print_function from __future__ import with_statement import numpy as np import os, sys, gzip, bz2 import utils from byte2human import byte2human from utils import prod, xz_file_size import isotope import re from logged import Logged from lzma import LZMAFile class StarDB(Logged): """ Class for reading STARDB binary files. Compressed files with .gz will be automatically uncompressed. This may fail, however, if the file is bigger than 2GB or 4GB. Compressed files with .bz2 will be automatically uncompressed. This is currently rather inefficient because to determine the file size the entire stream need to be read first. FIELD_TYPES: 0 UNDEFINED Undefined 1 BYTE Byte 2 INT Integer 3 LONG Longword integer 4 FLOAT Floating point 5 DOUBLE Double-precision floating 6 COMPLEX Complex floating 7 STRING String 8 STRUCT Structure [NOT ALLOWED] 9 DCOMPLEX Double-precision complex 10 POINTER Pointer 11 OBJREF Object reference 12 UINT Unsigned Integer 13 ULONG Unsigned Longword Integer 14 LONG64 64-bit Integer 15 ULONG64 Unsigned 64-bit Integer """ sys_is_le = sys.byteorder == "little" native_byteorder = "<" if sys_is_le else ">" abundance_type_names = ('isomere','isotope','element','mass number (isobar)','neutron number (isotone)') abundance_class_names = ('all (raw)','rad (raw + decays)','dec (stable subset of radiso)') abundance_unit_names = ('mass (g)','mass (solar)','mol','mass fraction (X)','mol fraction (YPS)','log epsion','[ ]','production factor (10^[])','log [Y/Si] + 6') abundance_total_names = ('initial (total, not normalized, fallback is missing mass)','ejecta') abundance_data_names = ('all ejecta (SN ejecta + wind)','SN ejecta, no wind','wind only','ejecta including fallback and wind') abundance_sum_names = ('mass fraction','number fraction') flagnames = ('parameter','property') typenames = ( 'UNDEFINED', 'BYTE', 'INT', 'LONG', 'FLOAT', 'DOUBLE', 'COMPLEX', 'STRING', 'STRUCT', 'DCOMPLEX', 'POINTER', 'OBJREF', 'UINT', 'ULONG', 'LONG64', 'ULONG64') class SignatureError(Exception): """ Exception raised when signature could not be read. """ def __init__(self, filename): """ Store file name that causes error. """ self.filename = filename def __str__(self): """ Return error message. """ return "Error reading signature from file {:s}."\ .format(self.filename) class VersionError(Exception): """ Exception raised when version mismatch. """ def __init__(self): """ Just set up. """ def __str__(self): """ Return error message. """ return "Version Error.".format() class IntegrityError(Exception): """ Exception raised when file integrity seems broken. """ def __init__(self): """ Just set up. """ def __str__(self): """ Return error message. """ return "File Integrity Error.".format() class DataError(Exception): """ Exception raised when data seems faulty. """ def __init__(self): """ Just set up. """ def __str__(self): """ Return error message. """ return "Data seems faulty.".format() def __init__(self, filename, extension = None): """ Initialize data fields and open file. Optionally the byte order can be specified. The default is big endian. """ self.setup_logger(silent = False) self.extension = extension self.open(filename) self.logger.info('Loading {:s}'.format( self.filename)) s = 'File size: {}'.format(byte2human(self.filesize)) if self.compressed: s += ' (compressed)' self.logger.info(s + '.') self.byteorder = self._check_signature() self.swapbyteorder = self.byteorder != self.native_byteorder if self.swapbyteorder: self.logger.info('Swapping endian.') self.dtype_i8 = np.dtype(np.int64).newbyteorder(self.byteorder) self.dtype_u8 = np.dtype(np.uint64).newbyteorder(self.byteorder) self.dtype_f8 = np.dtype(np.float64).newbyteorder(self.byteorder) self.dtypes = np.zeros(15, dtype = np.object) self.dtypes[[5,13,14]] = [self.dtype_f8, self.dtype_i8, self.dtype_u8] self._load() self.close() self.close_logger(timing = 'Data loaded in') def open(self, filename): """ Open the file. """ self.filename = os.path.expandvars(os.path.expanduser(filename)) if not os.path.exists(self.filename): fn = self.filename + '.gz' if os.path.exists(fn): self.filename = fn else: fn = self.filename + '.bz2' if os.path.exists(fn): self.filename = fn else: raise IOError("File not found.") if self.filename.endswith('.gz'): self.compressed = True self.compress_mode = 'gz' self.file = gzip.open(self.filename,'rb') pos = self.file.myfileobj.tell() self.file.myfileobj.seek(-4, os.SEEK_END) self.filesize = np.ndarray(1, dtype = "')) if len(np.argwhere(self.typenames[self.fieldtypes[ifield]] == np.array(['DOUBLE','LONG64','ULONG64']))) == 0: self.logger.error('data type not yet supported') self.logger.error('only supporting 8-byte scalar data types') raise self.VersionError() self.abu_Z = self.read_uin(self.nabu) if self.version < 10100: self.abu_A = np.ndarray(nabu,dtype=np.uint64) self.abu_E = np.ndarray(nabu,dtype=np.uint64) else: self.abu_A = self.read_uin(self.nabu) self.abu_E = self.read_uin(self.nabu) if self.abundance_type == 0: self.ions = np.array([isotope.Ion(Z=int(self.abu_Z[i]), A=int(self.abu_A[i]), E=int(self.abu_E[i])) for i in xrange(self.nabu)]) elif self.abundance_type == 1: self.ions = np.array([isotope.Ion(Z=int(self.abu_Z[i]), A=int(self.abu_A[i])) for i in xrange(self.nabu)]) elif self.abundance_type == 2: self.ions = np.array([isotope.Ion(Z=int(self.abu_Z[i])) for i in xrange(self.nabu)]) elif self.abundance_type == 3: self.ions = np.array([isotope.Ion(A=int(self.abu_A[i])) for i in xrange(self.nabu)]) elif self.abundance_type == 4: self.ions = np.array([isotope.Ion(N=int(self.abu_A[i])) for i in xrange(self.nabu)]) else: self.logger.error('anundance type not defined.') raise self.DataError() self.field_data = self.read_stu(self.nstar, self.fieldnames, self.fieldtypes) l1 = max(len(x) for x in self.fieldnames) l2 = 0 l3 = 0 nvalues = np.zeros(self.nfield, dtype=np.uint64) values = np.ndarray((self.nfield, self.nstar), dtype = np.float64) ivalues = np.ndarray((self.nfield, self.nstar), dtype=np.uint64) re_len = re.compile('^[A-Z]([0-9]+)',flags=re.I) for ifield in xrange(self.nfield): # values v = self.field_data[self.fieldnames[ifield]] vs = v.argsort() vv,vu,vx = np.unique(v, return_index=True, return_inverse=True) nv = len(vv) values[ifield,0:nv] = vv nvalues[ifield] = nv vx = np.insert(vx,0,-1) for iv in xrange(nv): ivalues[ifield,vs[vx[iv]+1:vx[iv+1]+1]] = iv # output formatting flen = int(re_len.findall(self.fieldformats[ifield])[0]) l2 = max(l2,flen) l3 = max(l3,len('{:d}'.format(nv))) nvalues_max = max(nvalues) values = values[:,0:nvalues_max] # convert to python formats self.field_formats = self.fieldformats.copy() for i in xrange(self.nfield): self.field_formats[i] = self.fieldformats[i][1:] if self.fieldformats[0][0] == 'F': self.field_formats[i] += 'F' elif self.fieldformats[0][0] == 'I': self.field_formats[i] += 'D' else: self.logger.error('Format type not supported.') raise AttributeError() xpar = np.argwhere(self.fieldflags == 0) if len(xpar) > 0: self.logger.info(''.ljust(58,"-")) self.logger.info('PARAMETER RANGES:') for ip in xpar.flat: fmax = max(values[ip,0:nvalues[ip]]) fmin = min(values[ip,0:nvalues[ip]]) line=(self.fieldnames[ip]+': ', ("{:"+self.field_formats[ip]+"}").format(fmin), ("{:"+self.field_formats[ip]+"}").format(fmax), "{:d}".format(int(nvalues[ip]))) format="{{:<{:d}s}} {{:>{:d}s}} ... {{:>{:d}s}} ({{:>{:d}s}} values)".format(l1+2,l2,l2,l3) self.logger.info(format.format(*line)) xprop = np.argwhere(self.fieldflags != 0) if len(xprop) > 0: self.logger.info(''.ljust(58,"-")) self.logger.info('PROPERTY RANGES:') for ip in xprop.flat: fmax = max(values[ip,0:nvalues[ip]]) fmin = min(values[ip,0:nvalues[ip]]) line=(self.fieldnames[ip]+': ', ("{:"+self.field_formats[ip]+"}").format(fmin), ("{:"+self.field_formats[ip]+"}").format(fmax), "{:d}".format(int(nvalues[ip]))) format="{{:<{:d}s}} {{:>{:d}s}} ... {{:>{:d}s}} ({{:>{:d}s}} values)".format(l1+2,l2,l2,l3) self.logger.info(format.format(*line)) if len(xpar) > 0: self.logger.info(''.ljust(58,"-")) self.logger.info('PARAMETER VALUES:') for ip in xpar.flat: self.logger.info(self.fieldnames[ip]+':') flen = int(re_len.findall(self.fieldformats[ifield])[0]) s = '' f = " {:"+self.field_formats[ip]+"}" for id in xrange(nvalues[ip]): if len(s) >= 50: self.logger.info(s) s = '' s += f.format(values[ip,id]) self.logger.info(s) maxpropvalues = 100 if len(xprop) > 0: self.logger.info(''.ljust(58,"-")) self.logger.info('PROPERTY VALUES:') for ip in xprop.flat: self.logger.info(self.fieldnames[ip]+':') if nvalues[ip] > maxpropvalues: self.logger.info('(more than {:d} values)'.format(maxpropvalues)) else: flen = int(re_len.findall(self.fieldformats[ifield])[0]) s = '' f = " {:"+self.field_formats[ip]+"}" for id in xrange(nvalues[ip]): if len(s) >= 50: self.logger.info(s) s = '' s += f.format(values[ip,id]) self.logger.info(s) self.logger.info(''.ljust(58,"-")) self.abu_data = self.read_dbl((self.nabu, self.nstar)) self.nvalues = nvalues self.values = values self.indices = ivalues if len(np.nonzero(self.abu_data.sum(0)==0)[0]) > 0: self.logger.error('found zero data sets.') raise self.DataError() self.read_tail()