# Author: Pengfei Ding (dingpf@fnal.gov) # Created on 2017-08-14 for Data Management tutorial for DUNE # This file is to be used during the hands-on session in the tutorial ##################################################################### ### Slide 9 ### Setups ##################################################################### # On GPVM (e.g. dunegpvm01.fnal.gov) # setup UPS etc. source /cvmfs/dune.opensciencegrid.org/products/dune/setup_dune.sh # Getting a valid Kerberos certificate and VOMS proxy kx509 voms-proxy-init --noregen -rfc -voms dune:/dune/Role=Analysis # Setup fife_utils, current version is v3_1_0 setup fife_utils # set experiment name export EXPERIMENT=dune # setup ROOT (not needed by data management itself, but we will use # ROOT in this tutorial to show how to use files interactively). setup root v6_08_06d -f Linux64bit+2.6-2.12 -q e14:nu:prof ##################################################################### ### Slide 10 ### Access file in dCache (I) - copy files to scratch ##################################################################### # Create a directory in scratch area for this tutorial export SCRATCH_DIR=/pnfs/dune/scratch/users/${USER}/tutorial ifdh mkdir_p ${SCRATCH_DIR} # Write files to scratch dCache (best to have files written in local # disk or BlueArc first and then copy to the scratch area with ifdh # or xrootd) # create four 5MB dummy files, these files will be used for # demonstration of data handling. You do not need to create the dummy # files. You can use files of your own. for i in `seq 0 3`; do \ head -c 5242880 /dev/urandom > ~/dummy_${USER}_${i}.bin; \ done # copy files into scratch dCache with “ifdh cp”. ifdh cp -D ~/dummy_${USER}_[0-3].bin ${SCRATCH_DIR} # To explore other options available with “ifdh cp”, just type “ifdh”. ##################################################################### ### Slide 11 ### Access file in dCache (II) - delete files in scratch ##################################################################### # delete files with ”ifdh rm” ifdh rm ${SCRATCH_DIR}/dummy_${USER}_0.bin for i in `seq 1 3`; do \ ifdh rm ${SCRATCH_DIR}/dummy_${USER}_${i}.bin;\ done # Copy files to scratch dCache using xrootd xrdcp ~/dummy_${USER}_[0-3].bin ${SCRATCH_DIR} # or xrdcp ~/dummy_${USER}_*.bin \ root://fndca1.fnal.gov:1094//pnfs/fnal.gov/usr/dune\ /scratch/users/${USER}/tutorial # note that one should convert the path to scratch dCache to URI # recognized by xrootd: # e.g. from: /pnfs/dune/scratch/users/${USER}/dummy_${USER}_1.bin # to: root://fndca1.fnal.gov:1094//pnfs/fnal.gov/usr/dune\ # /scratch/users/${USER}/dummy_${USER}_1.bin ##################################################################### ### Slide 12 ### Access file in dCache (III) - streaming with xrootd ##################################################################### # Converting the path to xrootd URI using ifdhc ifdh getUrl /pnfs/scratch/users/$USER root # copy a root file to scratch dCache (you can use your own root file, # this file is used only for demonstrating streaming root file with # xrootd in ROOT). ifdh cp -D $ROOTSYS/tutorials/hsimple.root ${SCRATCH_DIR} # access the file in dCache via xrootd in ROOT root -l root://fndca1.fnal.gov:1094//pnfs/fnal.gov/usr/dune\ /scratch/users/${USER}/hsimple.root # access the file in dCache via xrootd in ROOT root -l root://fndca1.fnal.gov:1094//pnfs/fnal.gov/usr/dune\ /scratch/users/tutorial/${USER}/hsimple.root # art can also take the xrootd URI ##################################################################### ### Slide 13 ### Store files to persistent/tape-backed area (I) ### - declare a SAM dataset with files in scratch area ##################################################################### # choose a dataset name, better to be user, purpose and time specific export TUTORIAL_DATASET=${USER}_tutorial_`date +%y%m%d%H%M`_01 # Add a SAM dataset for files in dCache scratch area sam_add_dataset -n ${TUTORIAL_DATASET} -d ${SCRATCH_DIR} # Instead of the “-d” option, it can take “-f” option followed by a # text file containing a list of paths to files # NOTE: sam_add_dataset will change the filename with UUID prefix. ls ${SCRATCH_DIR} # List files in the dataset samweb list-definition-files ${TUTORIAL_DATASET} ##################################################################### ### Slide 14 ### Store files to persistent/tape-backed area (II) ### - clone the dataset to persistent/tape-backed area ##################################################################### # If the files under scratch area worth being kept for longer time, # they can be added to SAM first with sam_add_dataset, followed by # copying to the persistent or tape-backed area. # create a destination directory in the persistent area first export PERSISTENT_DIR=/pnfs/dune/persistent/users/${USER}/tutorial ifdh mkdir_p ${PERSISTENT_DIR} # Copy the dataset to persistent area with sam_clone_dataset sam_clone_dataset -n ${TUTORIAL_DATASET} -d ${PERSISTENT_DIR} # Advanced tips for cloning large dataset: # “sam_clone_dataset” has ”--njobs” option to launch multiple jobs to do # the cloning. “launch_clone_jobs” can lauch grid jobs to do the cloning. ##################################################################### ### Slide 15 ### Store files to persistent/tape-backed area (III) ### - remove replicas in the scratch area ##################################################################### # check file locations, you will see two locations. DUMMY_01=`samweb list-definition-files ${TUTORIAL_DATASET}|head -n 1` samweb locate-file ${DUMMY_01} # Remove replicas of the dataset files in the scratch area sam_unclone_dataset -n ${TUTORIAL_DATASET} -d ${SCRATCH_DIR} # List ${SCRATCH_DIR} to check if files are still there. ls ${SCRATCH_DIR} # check the file locations again, you will see only one location left samweb locate-file ${DUMMY_01} ##################################################################### ### Slide 16 ### Store files to persistent/tape-backed area (IV) ### - validate dataset and deadling with missing files ##################################################################### # Validate dataset, that is to check if each files in a dataset exists # in the storage volume sam_validate_dataset -n ${TUTORIAL_DATASET} # Let’s move one file in the dataset and run “sam_validate_dataset” FPATH=`samweb locate-file ${DUMMY_01}|cut -d ':' -f 2` ifdh mv ${FPATH}/${DUMMY_01} \ ${PERSISTENT_DIR}/ sam_validate_dataset -n ${TUTORIAL_DATASET} # When there is a file missing, one can either replace the file with # a backup copy; or use “--prune” option to remove the file from the # dataset; otherwise there will be errors when using SAM record for # file access. sam_validate_dataset -n ${TUTORIAL_DATASET} --prune # Let’s list the files in the dataset again samweb list-definition-files ${TUTORIAL_DATASET} ##################################################################### ### Slide 17 ### Store files to persistent/tape-backed area (V) ### - retire dataset ##################################################################### # This will delete the dataset definition in SAM, retire all files # contained in the dataset and delete them from disk, but if the files # are the last copy, they will not be deleted. “-j” (“--just_say” # option allow you to see what will be done without actually doing them. sam_retire_dataset -n ${TUTORIAL_DATASET} -j # You can use “--keep_files” option if you don’t want to delete the # files. sam_retire_dataset -n ${TUTORIAL_DATASET} --keep_files # Once the dataset being retired, you can revert the file names for the # last copy of files with sam_revert_names sam_revert_names -d ${PERSISTENT_DIR}