#!/usr/bin/perl -w
use strict;
use Bio::AlignIO;
my $usage = <<END;
The script will accept a MFA multiple sequence alignment and remove the specified sequences.
Usage: ./prune_alignment.pl aln_file species1 species2 speciesn
END
;
my $infile = shift or die $usage;
my @to_remove = @ARGV;
@to_remove == 0 ? die "No sequences to remove, it is pointless to go on!\n"
: print STDERR "I will delete @to_remove" and sleep 1;
my $in = Bio::AlignIO->new(-format => 'fasta', -file => $infile);
my $out = Bio::AlignIO->new(-format => 'fasta', -file => ">$infile.truncated");
my $aln = $in->next_aln;
for my $seq ($aln->each_seq) {
print STDERR $seq->id,"\n";
for my $name (@to_remove) {
if ($seq->id =~ /$name$/) {
$aln->remove_seq($seq);
print STDERR $seq->id, " has been removed from the alignment \n";
}
}
}
$out->write_aln($aln);
# fix the IDs
system "perl -i -pe 's/\\/\\d+-\\d+//' $infile.truncated";
print STDERR "\nThe new alignment file has been written to $infile.truncated\n";