package Bio::FastaGrep; =pod =head1 NAME Bio::FastaGrep =head1 SYNOPSIS use Bio::FastaGrep qw/fastagrep/; foreach (fastagrep( qr/\bIPI00580177\b/, '~/data-ext/resources/fasta/FF.ipi.CHICK.*.fasta')){ my ($f,$i,$h,$s) = @$_; # file, index, header, sequence print "$f:$i:$h\n$s\n"; } =head1 DESCRIPTION Bio::FastaGrep does a grep-link function on fasta files. There are a few simple options that you can supply, along with a pattern and a list of files/fileglobs. The module exports nothing by default but can export fastagrep. Usage of fastagrep is: =over 4 =item fastagrep @options, $pattern, @filepatterns =back Options are described below. Pattern is a perl regular expression. Filepatterns is one or more strings to give to perl's glob function. =head1 OPTIONS =over 4 =item -S | -H | -A =back The default is -H, which means that only the header of each record is evaluated with the supplied pattern. -S means that the sequence is evaluated and -A mean all is evaluated. =over 4 =item -n => $char =back The -n option allows the user to specify a difference newline (default is "\\n"). This is quite important! You can specify 'n', 'r' or 'nr', leaving out the backslashes if you like as fastagrep will correct this automatically. =head1 AUTHOR Jimi-Carlo Bukowski-Wills =head1 SEE ALSO File::Grep Bio::DB::Fasta Bio::SearchIO =cut use strict; use warnings; use IO::File; use Carp; use base 'Exporter'; our @EXPORT_OK = (qw/fastagrep/); use subs qw/fastagrep/; sub fastagrep { my $mode = '-H'; my $newline = "\n"; local $/ = $newline.'>'; while($_[0] =~ /^-\w+/){ if($_[0] eq '-n'){ shift @_; $newline = shift @_; $newline =~ s/n/\n/g; $newline =~ s/r/\r/g; } else { $mode = shift @_; } } my $pattern = shift @_; my @results = (); while(@_){ my $glob = shift @_; foreach my $fn(glob $glob){ my $fh = new IO::File($fn) or croak "Could not read $fn $!"; my $i = 0; while(<$fh>){ chomp; my ($h,$s) = split /$newline/, $_, 2; $s =~ s/\s+//gs; $h =~ s/^>// unless $i; if( ($mode eq '-H' && $h =~ /$pattern/) || ($mode eq '-S' && $s =~ /$pattern/) || ($mode eq '-A' && $h."\n".$s =~ /$pattern/) ){ push @results, [$fn,$i,$h,$s]; } $i ++; } } } return @results; }