prune_alignments.pl

prune_alignments.pl

#!/usr/bin/perl -w use strict; use Bio::AlignIO; my $usage = <<END; The script will accept a MFA multiple sequence alignment and remove the specified sequences. Usage: ./prune_alignment.pl aln_file species1 species2 speciesn END ; my $infile = shift or die $usage; my @to_remove = @ARGV; @to_remove == 0 ? die "No sequences to remove, it is pointless to go on!\n" : print STDERR "I will delete @to_remove" and sleep 1; my $in = Bio::AlignIO->new(-format => 'fasta', -file => $infile); my $out = Bio::AlignIO->new(-format => 'fasta', -file => ">$infile.truncated"); my $aln = $in->next_aln; for my $seq ($aln->each_seq) { print STDERR $seq->id,"\n"; for my $name (@to_remove) { if ($seq->id =~ /$name$/) { $aln->remove_seq($seq); print STDERR $seq->id, " has been removed from the alignment \n"; } } } $out->write_aln($aln); # fix the IDs system "perl -i -pe 's/\\/\\d+-\\d+//' $infile.truncated"; print STDERR "\nThe new alignment file has been written to $infile.truncated\n";