srp=$1 #comma delimited SRR's to remove from metadata samps2remove=$2 lo=$(echo "$srp" | sed 's#^.\+\(..\)$#\1#') #/datascope/recount03/release/human/data_sources/sra/gene_sums/44/SRP157044/sra.gene_sums.SRP157044.G026.gz ds=sra p=/datascope/recount03/release/human/data_sources/$ds #metadata check first #known_nsamps=$(pcat $p/metadata/sra.recount_project.MD.gz | fgrep " $srp " | wc -l) #echo "$p/metadata/sra.recount_project.MD.gz has $known_nsamps samples" #pcat $p/metadata/sra.recount_project.MD.gz | fgrep " $srp " | cut -f 2 | sed 's#^#\t#' | sed 's#$#\t#' > known_samples.txt m="$p/metadata/$lo/$srp/$ds" srrs=$(echo "$samps2remove" | sed 's#,# #g') for t in $ds recount_qc recount_project recount_seq_qc recount_pred; do for srr in $srrs; do fname="${m}.${t}.${srp}.MD.gz" bfname="${t}.${srp}.MD.gz" #nsamps=$(pcat $fname | tail -n+2 | wc -l) if [[ ! -f $bfname ]]; then rsync -av $fname $bfname fi pcat $bfname | fgrep -v " $srr " | gzip > $fname done done