fsuReader, Scanning fsu file will estimate the best batch size

This commit is contained in:
Ryan Tang 2024-06-19 16:55:38 -04:00
parent 0e8c92a266
commit b9d1a76b7e
5 changed files with 90 additions and 257 deletions

View File

@ -20,6 +20,8 @@ struct FileInfo{
};
#define NMINARG 5
//^#############################################################
//^#############################################################
int main(int argc, char **argv) {
@ -27,13 +29,12 @@ int main(int argc, char **argv) {
printf("=========================================\n");
printf("=== *.fsu Events Builder ===\n");
printf("=========================================\n");
if (argc < 6) {
if (argc < NMINARG) {
printf("Incorrect number of arguments:\n");
printf("%s [timeWindow] [withTrace] [verbose] [batchSize] [inFile1] [inFile2] .... \n", argv[0]);
printf("%s [timeWindow] [withTrace] [verbose] [inFile1] [inFile2] .... \n", argv[0]);
printf(" timeWindow : in ns, -1 = no event building \n");
printf(" withTrace : 0 for no trace, 1 for trace \n");
printf(" verbose : > 0 for debug \n");
printf(" batchSize : the size of hit in a batch \n");
printf(" Output file name is contructed from inFile1 \n");
printf("\n");
printf(" Example: %s 0 0 0 10000 '\\ls -1 *001*.fsu'\n", argv[0]);
@ -48,10 +49,10 @@ int main(int argc, char **argv) {
long timeWindow = atoi(argv[1]);
bool traceOn = atoi(argv[2]);
unsigned int debug = atoi(argv[3]);
unsigned int batchSize = atoi(argv[4]);
int nFile = argc - 5;
unsigned int batchSize = 2* DEFAULT_HALFBUFFERSIZE;
int nFile = argc - NMINARG + 1;
TString inFileName[nFile];
for( int i = 0 ; i < nFile ; i++){ inFileName[i] = argv[i+5];}
for( int i = 0 ; i < nFile ; i++){ inFileName[i] = argv[i + NMINARG -1];}
/// Form outFileName;
TString outFileName = inFileName[0];
@ -69,7 +70,6 @@ int main(int argc, char **argv) {
printf(" Time Window = %ld ns = %.1f us\n", timeWindow, timeWindow/1000.);
printf(" Include Trace = %s\n", traceOn ? "Yes" : "No");
printf(" Debug level = %d\n", debug);
printf(" Batch size = %d events/file\n", batchSize);
printf(" Max multiplity = %d hits/event (hard coded)\n", MAX_MULTI);
printf("========================================= Grouping files\n");
@ -80,15 +80,18 @@ int main(int argc, char **argv) {
FSUReader * readerA = new FSUReader(inFileName[0].Data(), 1, 1);
readerA->ScanNumBlock(0,0);
FileInfo fileInfo = {inFileName[0].Data(), readerA->GetSN() * 1000 + readerA->GetFileOrder(), readerA->GetHitCount()};
if( readerA->GetOptimumBatchSize() > batchSize ) batchSize = readerA->GetOptimumBatchSize();
FileInfo fileInfo = {inFileName[0].Data(), readerA->GetSN() * 1000 + readerA->GetFileOrder(), readerA->GetTotalHitCount()};
fileList.push_back(fileInfo);
totalHitCount += readerA->GetHitCount();
totalHitCount += readerA->GetTotalHitCount();
for( int i = 1; i < nFile; i++){
FSUReader * readerB = new FSUReader(inFileName[i].Data(), 1, 1);
readerB->ScanNumBlock(0,0);
totalHitCount += readerB->GetHitCount();
fileInfo = {inFileName[i].Data(), readerB->GetSN() * 1000 + readerB->GetFileOrder(), readerB->GetHitCount()};
if( readerB->GetOptimumBatchSize() > batchSize ) batchSize = readerB->GetOptimumBatchSize();
totalHitCount += readerB->GetTotalHitCount();
fileInfo = {inFileName[i].Data(), readerB->GetSN() * 1000 + readerB->GetFileOrder(), readerB->GetTotalHitCount()};
if( readerA->GetSN() == readerB->GetSN() ){
fileList.push_back(fileInfo);
@ -105,6 +108,7 @@ int main(int argc, char **argv) {
delete readerA;
printf("======================= total Hit Count : %llu\n", totalHitCount);
printf(">>>>>>>>>>>>>>>>>>>>>>>>>> Batch size : %d events/file\n", batchSize);
for( size_t i = 0; i < fileGroupList.size(); i++){
printf("group ----- %ld \n", i);
@ -171,7 +175,7 @@ int main(int argc, char **argv) {
for( size_t j = 0; j < fileGroupList[i].size(); j++){
fList.push_back( fileGroupList[i][j].fileName );
}
reader[i] = new FSUReader(fList, 600, debug);
reader[i] = new FSUReader(fList, 1024, debug); // 1024 is the maximum event / agg.
hitList[i] = reader[i]->ReadBatch(batchSize, debug );
reader[i]->PrintHitListInfo(&hitList[i], "hitList-" + std::to_string(reader[i]->GetSN()));
ID[i] = 0;
@ -266,7 +270,7 @@ int main(int argc, char **argv) {
tEnd = events.back().timestamp;
hitProcessed += events.size();
if( hitProcessed % (traceOn ? 100 : 10000) == 0 ) printf("hit Porcessed %llu/%llu hit....%.2f%%\n\033[A\r", hitProcessed, totalHitCount, hitProcessed*100./totalHitCount);
if( hitProcessed % (traceOn ? 10000 : 10000) == 0 ) printf("hit Porcessed %llu/%llu hit....%.2f%%\n\033[A\r", hitProcessed, totalHitCount, hitProcessed*100./totalHitCount);
multi = events.size() ;
if( events.size() >= MAX_MULTI ) {
@ -354,7 +358,7 @@ int main(int argc, char **argv) {
printf(" first timestamp = %20llu ns\n", tStart);
printf(" last timestamp = %20llu ns\n", tEnd);
printf(" total data duration = %.2f sec = %.2f min\n", tDuration_sec, tDuration_sec/60.);
printf("==============> saved to %s \n", outFileName.Data());
printf("========================================> saved to %s \n", outFileName.Data());
TMacro info;
info.AddLine(Form("tStart= %20llu ns",tStart));
@ -366,6 +370,8 @@ int main(int argc, char **argv) {
for( int i = 0; i < nGroup; i++) delete reader[i];
delete [] reader;
printf("####################################### end of %s\n", argv[0]);
return 0;
}

View File

@ -66,15 +66,15 @@ int main(int argc, char **argv) {
FSUReader * readerA = new FSUReader(inFileName[0], 1, 1);
readerA->ScanNumBlock(0,0);
FileInfo fileInfo = {inFileName[0], readerA->GetSN() * 1000 + readerA->GetFileOrder(), readerA->GetHitCount()};
FileInfo fileInfo = {inFileName[0], readerA->GetSN() * 1000 + readerA->GetFileOrder(), readerA->GetTotalHitCount()};
fileList.push_back(fileInfo);
totalHitCount += readerA->GetHitCount();
totalHitCount += readerA->GetTotalHitCount();
for( int i = 1; i < nFile; i++){
FSUReader * readerB = new FSUReader(inFileName[i], 1, 1);
readerB->ScanNumBlock(0,0);
totalHitCount += readerB->GetHitCount();
fileInfo = {inFileName[i], readerB->GetSN() * 1000 + readerB->GetFileOrder(), readerB->GetHitCount()};
totalHitCount += readerB->GetTotalHitCount();
fileInfo = {inFileName[i], readerB->GetSN() * 1000 + readerB->GetFileOrder(), readerB->GetTotalHitCount()};
if( readerA->GetSN() == readerB->GetSN() ){
fileList.push_back(fileInfo);

View File

@ -3,7 +3,7 @@
#include <algorithm>
#include <filesystem>
// #include "AggSeparator.h"
#define DEFAULT_HALFBUFFERSIZE 500000
class FSUReader{
@ -60,21 +60,13 @@ class FSUReader{
return hit[id];
}
void ClearHitCount() {hitCount = 0;}
ulong GetHitCount() const{return hitCount;}
void ClearTotalHitCount() {totalHitCount = 0;}
ulong GetTotalHitCount() const{return totalHitCount;}
std::vector<Hit> ReadBatch(unsigned int batchSize = 1000000, bool verbose = false); // output the sorted Hit
// std::string SaveHit(std::vector<Hit> hitList, bool isAppend = false);
// std::string SaveHit2NewFile(std::string saveFolder = "./", std::string indexStr = "");
// void SortAndSaveTS(unsigned int batchSize = 1000000, bool verbose = false);
// off_t GetTSFileSize() const {return tsFileSize;}
//TODO
//void SplitFile(unsigned long hitSizePreFile);
void PrintHit(ulong numHit = -1, ulong startIndex = 0) {
for( ulong i = startIndex; i < std::min(numHit, hitCount); i++){
for( ulong i = startIndex; i < std::min(numHit, totalHitCount); i++){
printf("%10zu ", i); hit[i].Print();
}
}
@ -101,8 +93,7 @@ class FSUReader{
}
}
//void SaveAsCAENCoMPASSFormat();
unsigned long GetOptimumBatchSize() const {return optBufferSize;}
private:
@ -129,7 +120,7 @@ class FSUReader{
std::vector<unsigned int> blockPos;
std::vector<unsigned int > blockTimeStamp;
unsigned long hitCount;
unsigned long totalHitCount;
std::vector<Hit> hit;
@ -139,8 +130,15 @@ class FSUReader{
off_t tsFileSize;
//checking the t0 and tmin for every 1 million hit
unsigned short nMillion;
std::vector<unsigned long> tmin;
unsigned long optBufferSize;
};
//^==============================================================
inline FSUReader::~FSUReader(){
delete data;
@ -148,6 +146,7 @@ inline FSUReader::~FSUReader(){
}
//^==============================================================
inline FSUReader::FSUReader(){
inFile = nullptr;
data = nullptr;
@ -161,6 +160,7 @@ inline FSUReader::FSUReader(){
}
//^==============================================================
inline FSUReader::FSUReader(std::string fileName, uInt dataSize, int verbose){
inFile = nullptr;
data = nullptr;
@ -174,6 +174,7 @@ inline FSUReader::FSUReader(std::string fileName, uInt dataSize, int verbose){
OpenFile(fileName, dataSize, verbose);
}
//^==============================================================
inline FSUReader::FSUReader(std::vector<std::string> fileList, uInt dataSize, int verbose){
inFile = nullptr;
data = nullptr;
@ -188,6 +189,7 @@ inline FSUReader::FSUReader(std::vector<std::string> fileList, uInt dataSize, in
}
//^==============================================================
inline void FSUReader::OpenFile(std::string fileName, uInt dataSize, int verbose){
/// File format must be YYY...Y_runXXX_AAA_BBB_TT_CCC.fsu
@ -223,9 +225,14 @@ inline void FSUReader::OpenFile(std::string fileName, uInt dataSize, int verbose
blockPos.clear();
blockTimeStamp.clear();
hitCount = 0;
totalHitCount = 0;
hit.clear();
nMillion = 0;
tmin.clear();
tmin.push_back(-1);
optBufferSize = 2*DEFAULT_HALFBUFFERSIZE;
//check is the file is *.fsu or *.fsu.X
size_t found = fileName.find_last_of('.');
std::string ext = fileName.substr(found + 1);
@ -279,6 +286,7 @@ inline void FSUReader::OpenFile(std::string fileName, uInt dataSize, int verbose
}
//^==============================================================
inline int FSUReader::ReadNextBlock(bool traceON, int verbose, uShort saveData){
if( inFile == NULL ) return -1;
if( feof(inFile) || filePos >= inFileSize) {
@ -334,13 +342,22 @@ inline int FSUReader::ReadNextBlock(bool traceON, int verbose, uShort saveData){
return -20;
}
unsigned int eventCout = 0;
for( int ch = 0; ch < data->GetNChannel(); ch++){
if( data->NumEventsDecoded[ch] == 0 ) continue;
hitCount += data->NumEventsDecoded[ch];
eventCout += data->NumEventsDecoded[ch];
totalHitCount += data->NumEventsDecoded[ch];
if( totalHitCount / DEFAULT_HALFBUFFERSIZE > nMillion ) {
nMillion ++;
tmin.push_back(-1);
}
int start = data->GetDataIndex(ch) - data->NumEventsDecoded[ch] + 1;
if( start < 0 ) start = start + data->GetDataSize();
for( int i = start; i < start + data->NumEventsDecoded[ch]; i++ ){
int k = i % data->GetDataSize();
if( data->GetTimestamp(ch, k) < tmin[nMillion] ) tmin[nMillion] = data->GetTimestamp(ch, k);
}
if( saveData ){
int start = data->GetDataIndex(ch) - data->NumEventsDecoded[ch] + 1;
@ -375,6 +392,7 @@ inline int FSUReader::ReadNextBlock(bool traceON, int verbose, uShort saveData){
return 0;
}
//^==============================================================
inline int FSUReader::ReadBlock(unsigned int ID, int verbose){
if( totNumBlock == 0 )return -1;
if( ID >= totNumBlock )return -1;
@ -392,6 +410,7 @@ inline int FSUReader::ReadBlock(unsigned int ID, int verbose){
}
//^==============================================================
inline void FSUReader::SortHit(int verbose){
if( verbose) printf("\nQuick Sort hit array according to time...");
std::sort(hit.begin(), hit.end(), [](const Hit& a, const Hit& b) {
@ -400,6 +419,7 @@ inline void FSUReader::SortHit(int verbose){
if( verbose) printf(".......done.\n");
}
//^==============================================================
inline void FSUReader::ScanNumBlock(int verbose, uShort saveData){
if( inFile == nullptr ) return;
if( feof(inFile) ) return;
@ -423,8 +443,8 @@ inline void FSUReader::ScanNumBlock(int verbose, uShort saveData){
totNumBlock = blockID;
if(verbose) {
printf("\nScan complete: number of data Block : %lu\n", totNumBlock);
printf( " number of hit : %lu", hitCount);
if( hitCount > 1e6 ) printf(" = %.3f million", hitCount/1e6);
printf( " number of hit : %lu", totalHitCount);
if( totalHitCount > 1e6 ) printf(" = %.3f million", totalHitCount/1e6);
printf("\n");
if( saveData )printf( " size of the hit array : %lu\n", hit.size());
@ -445,14 +465,32 @@ inline void FSUReader::ScanNumBlock(int verbose, uShort saveData){
//check is the hitCount == hit.size();
if( saveData ){
if( hitCount != hit.size()){
if( totalHitCount != hit.size()){
printf("!!!!!! the Data::dataSize is not big enough. !!!!!!!!!!!!!!!\n");
}else{
SortHit(verbose+1);
}
}
//print time structre
if( nMillion > 0 ){
// printf("------------ time structure\n");
// printf("%5s | %15s\n", "mil.", "t-min");
for( int i = 0; i < nMillion; i++){
// printf("%5d | %15lu", i, tmin[i]);
if( i > 0 && tmin[i] < tmin[i-1] ) {
// printf("<----");
if( i > 1 && tmin[i] < tmin[i-2]) optBufferSize += 2*DEFAULT_HALFBUFFERSIZE;
}
// printf("\n");
}
}
// printf(" recommanded batch size : %lu\n", optBufferSize);
}
//^==============================================================
inline std::vector<Hit> FSUReader::ReadBatch(unsigned int batchSize, bool verbose){
// printf("%s sn:%d. filePos : %lu\n", __func__, sn, ftell(inFile));
@ -513,6 +551,8 @@ inline std::vector<Hit> FSUReader::ReadBatch(unsigned int batchSize, bool verbos
if( t0_A >= t0_B) {
printf("\033[0;31m!!!!!!!!!!!!!!!!! %s | Need to increase the batch size. \033[0m\n", __func__);
printf("t0_A : %15lu\n", t0_A);
printf("t0_B : %15lu\n", t0_B);
return std::vector<Hit> ();
}
@ -580,216 +620,3 @@ inline std::vector<Hit> FSUReader::ReadBatch(unsigned int batchSize, bool verbos
}
/*
inline void FSUReader::SortAndSaveTS(unsigned int batchSize, bool verbose){
int count = 0;
std::vector<Hit> hitList_A ;
do{
if( verbose ) printf("***************************************************\n");
int res = 0;
do{
res = ReadNextBlock(true, 0, 3);
}while ( hit.size() < batchSize && res == 0);
SortHit();
uLong t0_B = hit.at(0).timestamp;
uLong t1_B = hit.back().timestamp;
if( verbose ) {
printf(" hit in memeory : %7zu | %u | %lu \n", hit.size(), filePos, inFileSize);
printf("t0 : %15lu\n", t0_B);
printf("t1 : %15lu\n", t1_B);
}
if( count == 0 ) {
hitList_A = hit; // copy hit
}else{
uLong t0_A = hitList_A.at(0).timestamp;
uLong t1_A = hitList_A.back().timestamp;
ulong ID_A = 0;
ulong ID_B = 0;
if( t0_A > t0_B) {
printf("Need to increase the batch size. \n");
return;
}
if( t1_A > t0_B) { // need to sort between two hitList
if( verbose ) {
printf("############# need to sort \n");
printf("=========== sume of A + B : %zu \n", hitList_A.size() + hit.size());
}
std::vector<Hit> hitTemp;
for( size_t j = 0; j < hitList_A.size() ; j++){
if( hitList_A[j].timestamp < t0_B ) continue;
if( ID_A == 0 ) ID_A = j;
hitTemp.push_back(hitList_A[j]);
}
hitList_A.erase(hitList_A.begin() + ID_A, hitList_A.end() );
if( verbose ) {
printf("----------------- ID_A : %lu, Drop\n", ID_A);
PrintHitListInfo(hitList_A, "hitList_A");
}
for( size_t j = 0; j < hit.size(); j++){
if( hit[j].timestamp > t1_A ) {
ID_B = j;
break;
}
hitTemp.push_back(hit[j]);
}
std::sort(hitTemp.begin(), hitTemp.end(), [](const Hit& a, const Hit& b) {
return a.timestamp < b.timestamp;
});
hit.erase(hit.begin(), hit.begin() + ID_B );
if( verbose ) {
PrintHitListInfo(hitTemp, "hitTemp");
printf("----------------- ID_B : %lu, Drop\n", ID_B);
PrintHitListInfo(hit, "hit");
printf("=========== sume of A + B + Temp : %zu \n", hitList_A.size() + hit.size() + hitTemp.size());
printf("----------------- refill hitList_A \n");
}
ulong ID_Temp = 0;
for( size_t j = 0; j < hitTemp.size(); j++){
hitList_A.push_back(hitTemp[j]);
if( hitList_A.size() >= batchSize ) {
ID_Temp = j+1;
break;
}
}
hitTemp.erase(hitTemp.begin(), hitTemp.begin() + ID_Temp );
for( size_t j = 0 ; j < hit.size(); j ++){
hitTemp.push_back(hit[j]);
}
SaveHit(hitList_A, count <= 1 ? false : true);
if( verbose ) {
PrintHitListInfo(hitList_A, "hitList_A");
PrintHitListInfo(hitTemp, "hitTemp");
printf("----------------- replace hitList_A by hitTemp \n");
}
hitList_A.clear();
hitList_A = hitTemp;
hit.clear();
if( verbose ) {
PrintHitListInfo(hitList_A, "hitList_A");
printf("===========================================\n");
}
}else{ // save hitList_A, replace hitList_A
SaveHit(hitList_A, count <= 1? false : true);
hitList_A.clear();
hitList_A = hit;
if( verbose ) PrintHitListInfo(hitList_A, "hitList_A");
}
}
ClearHitList();
count ++;
}while(filePos < inFileSize);
SaveHit(hitList_A, count <= 1 ? false : true);
printf("================= finished.\n");
}
*/
/*
inline std::string FSUReader::SaveHit(std::vector<Hit> hitList, bool isAppend){
std::string outFileName;
if( fileList.empty() ) {
outFileName = fileName + ".ts" ;
}else{
outFileName = fileList[0] + ".ts" ;
}
uint64_t hitSize = hitList.size();
FILE * outFile ;
if( isAppend ) {
outFile = fopen(outFileName.c_str(), "rb+"); //read/write bineary
rewind(outFile);
fseek( outFile, 4, SEEK_CUR);
uint64_t org_hitSize;
fread(&org_hitSize, 8, 1, outFile);
rewind(outFile);
fseek( outFile, 4, SEEK_CUR);
org_hitSize += hitSize;
fwrite(&org_hitSize, 8, 1, outFile);
fseek(outFile, 0, SEEK_END);
}else{
outFile = fopen(outFileName.c_str(), "wb"); //overwrite binary
uint32_t header = 0xAA000000;
header += sn;
fwrite( &header, 4, 1, outFile );
fwrite( &hitSize, 8, 1, outFile);
}
for( ulong i = 0; i < hitSize; i++){
if( i% 10000 == 0 ) printf("Saving %lu/%lu Hit (%.2f%%)\n\033[A\r", i, hitSize, i*100./hitSize);
uint16_t flag = hitList[i].ch + (hitList[i].pileUp << 8) ;
if( DPPType == DPPTypeCode::DPP_PSD_CODE ) flag += ( 1 << 15);
if( hitList[i].traceLength > 0 ) flag += (1 << 14);
// fwrite( &(hit[i].ch), 1, 1, outFile);
fwrite( &flag, 2, 1, outFile);
fwrite( &(hitList[i].energy), 2, 1, outFile);
if( DPPType == DPPTypeCode::DPP_PSD_CODE ) fwrite( &(hitList[i].energy2), 2, 1, outFile);
fwrite( &(hitList[i].timestamp), 6, 1, outFile);
fwrite( &(hitList[i].fineTime), 2, 1, outFile);
if( hitList[i].traceLength > 0 ) fwrite( &(hitList[i].traceLength), 2, 1, outFile);
for( uShort j = 0; j < hitList[i].traceLength; j++){
fwrite( &(hitList[i].trace[j]), 2, 1, outFile);
}
}
off_t tsFileSize = ftello(outFile); // unsigned int = Max ~4GB
fclose(outFile);
printf("Saved to %s, size: ", outFileName.c_str());
if( tsFileSize < 1024 ) {
printf(" %ld Byte", tsFileSize);
}else if( tsFileSize < 1024*1024 ) {
printf(" %.2f kB", tsFileSize/1024.);
}else if( tsFileSize < 1024*1024*1024){
printf(" %.2f MB", tsFileSize/1024./1024.);
}else{
printf(" %.2f GB", tsFileSize/1024./1024./1024.);
}
printf("\n");
return outFileName;
}
*/

View File

@ -81,7 +81,7 @@ int main(int argc, char **argv) {
tempInfo.fileName = inFileName[i];
tempInfo.readerID = i;
tempInfo.SN = reader[i]->GetSN();
tempInfo.hitCount = reader[i]->GetHitCount();
tempInfo.hitCount = reader[i]->GetTotalHitCount();
tempInfo.fileSize = reader[i]->GetFileByteSize();
tempInfo.tick2ns = reader[i]->GetTick2ns();
tempInfo.DPPType = reader[i]->GetDPPType();
@ -208,7 +208,7 @@ int main(int argc, char **argv) {
}else{
group[gpID].hitID = 0;
uShort rID = group[gpID].readerIDList[group[gpID].currentID];
group[gpID].hitCount = reader[rID]->GetHitCount();
group[gpID].hitCount = reader[rID]->GetTotalHitCount();
printf("-----> go to the next file, %s \n", fileInfo[rID].fileName.c_str() );
}
}

View File

@ -110,7 +110,7 @@ int main(int argc, char **argv) {
tempInfo.fileName = outFileName;
tempInfo.readerID = i;
tempInfo.SN = reader[i]->GetSN();
tempInfo.hitCount = reader[i]->GetHitCount();
tempInfo.hitCount = reader[i]->GetTotalHitCount();
tempInfo.fileSize = reader[i]->GetTSFileSize();
tempInfo.tick2ns = reader[i]->GetTick2ns();
tempInfo.DPPType = reader[i]->GetDPPType();