prune_alignments.pl

#!/usr/bin/perl -w                                                                                                                                                                
use strict;
use Bio::AlignIO;


my $usage = <<END;
The script will accept a MFA multiple sequence alignment and remove the specified sequences.

Usage: ./prune_alignment.pl aln_file species1 species2 speciesn

END
;

my $infile = shift or die $usage;
my @to_remove = @ARGV;

@to_remove == 0 ? die "No sequences to remove, it is pointless to go on!\n"
: print STDERR "I will delete @to_remove" and sleep 1;

my $in  = Bio::AlignIO->new(-format => 'fasta', -file => $infile);
my $out = Bio::AlignIO->new(-format => 'fasta', -file => ">$infile.truncated");

my $aln = $in->next_aln;
for my $seq ($aln->each_seq) {
  print STDERR $seq->id,"\n";
  for my $name (@to_remove) {
    if ($seq->id =~ /$name$/) {
      $aln->remove_seq($seq);
      print STDERR $seq->id, " has been removed from the alignment \n";
    }
  }
}

$out->write_aln($aln);

# fix the IDs                                                                                                                                                                     
system "perl -i -pe 's/\\/\\d+-\\d+//' $infile.truncated";

print STDERR "\nThe new alignment file has been written to $infile.truncated\n";