pcat exon_sums/mouse.exon_sums.M023.gtf.gz | perl -ne 'BEGIN { open(IN,") { chomp($line); ($gid,$eid,$rid)=split(/\t/,$line); $h{$eid."|".$rid}=$gid; } close(IN); } chomp; $f=$_; if($f=~/^#/) { print "$f\n"; next; } $f=~/exon_id\s+"(ENSMU[^"]+)"/; $eid=$1; $f=~/recount_exon_id\s+"([^"]+)"/; $rid=$1; $gid=$h{$eid."|".$rid}; if(!defined($gid)) { print STDERR "missing gid for $f, skipping\n"; print "$f\n"; next; } $f=~s/gene_id\s+"[^"]+"/gene_id "$gid"/; print "$f\n";' > exon_sums/mouse.exon_sums.M023.updated_gids.gtf pcat exon_sums/mouse.exon_sums.M023.gtf.gz | perl -ne 'BEGIN { open(IN,") { chomp($line); $line=~/recount_exon_id\s+"([^"]+)"/; $rid=$1; push(@{$h{$rid}},$line); } close(IN); } chomp; $f=$_; if($f=~/^#/) { print "$f\n"; next; } $f=~/recount_exon_id\s+"([^"]+)"/; $rid=$1; if(!defined($h{$rid})) { print STDERR "missing gid for $f, skipping\n"; next; } $newline=pop(@{$h{$rid}}); print "$newline\n";' > exon_sums/mouse.exon_sums.M023.gtf.updated2 2>missing diff <(LC_ALL=C sort mouse.exon_sums.M023.gtf.new) <(LC_ALL=C sort exon_sums/mouse.exon_sums.M023.gtf.updated2) > diff