pcat mouse.exon_sums.M023.gtf.gz.old_mislabeled_exons | perl -ne 'BEGIN { open(IN,"<../gencode.vM23.primary_assembly.annotation.gtf.gz.exons"); while($line=) { chomp($line); @f=split(/\t/,$line,-1); $rid=join("|",($f[0],$f[3],$f[4],$f[6])); push(@{$h{$rid}},$line); } close(IN); } chomp; $f=$_; if($f=~/^#/) { print "$f\n"; next; } $f=~/recount_exon_id\s+"([^"]+)"/; $rid=$1; if(!defined($h{$rid})) { print STDERR "missing gid for $f, skipping\n"; next; } $newline=pop(@{$h{$rid}}); print "$newline recount_exon_id \"$rid\";\n";' > mouse.exon_sums.M023.gtf.updated3 2>missing pcat mouse.exon_sums.M023.gtf.gz.old_mislabeled_exons | perl -ne 'BEGIN { open(IN,"<../gencode.vM23.primary_assembly.annotation.gtf.gz.exons"); while($line=) { chomp($line); @f=split(/\t/,$line,-1); $rid=join("|",($f[0],$f[3],$f[4],$f[6])); push(@{$h{$rid}},$line); } close(IN); } chomp; $f=$_; if($f=~/^#/) { print "$f\n"; next; } $f=~/recount_exon_id\s+"([^"]+)"/; $rid=$1; if(!defined($h{$rid})) { print STDERR "missing gid for $f, skipping\n"; next; } $newline=pop(@{$h{$rid}}); $newline=~/exon_id\s+"([^"]+)"/; $eid=$1; $newline=~s/\s+exon_id\s+"$eid";\s+/ /; $newline=~s/\t([^\t]+)$/\t$eid $1/; print "$newline recount_exon_id \"$rid\";\n";' > mouse.exon_sums.M023.gtf.updated4 2> missing2 cat mouse.exon_sums.M023.gtf.updated4 | perl -ne 'chomp; $f=$_; @f=split(/\t/,$f,-1); $i=pop(@f); $i=~/recount_exon_id\s+"([^"]+)"/; $rid=$1; $i=~/exon_id\s+"([^"]+)"/; $eid=$1; $i=~/gene_id\s+"([^"]+)"/; $gid=$1; print join("\t",@f)."\t$gid\t$eid\t$rid\n";' > mouse.exon_sums.M023.gtf.updated4.cut pcat mouse.exon_sums.M023.gtf.gz.old_mislabeled_exons | fgrep $'\texon\t' | perl -ne 'chomp; $f=$_; $f=~/gene_id\s+"([^"]+)"/; $gid=$1; print "$gid\n";' | LC_ALL=C sort | uniq -c > mouse.exon_sums.M023.gtf.gz.old_mislabeled_exons.exonsPERgene