# This code is written by Shantanoo Mahajan. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 4. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. #!/usr/bin/env perl use LWP::Simple; use Data::Dumper; use Text::CSV; $VERSION=1.00 sub parse_data { my ($data) = (@_); my @ret; my @data = split('\n',$data); foreach (@data) { if(/^
/ .. /<\/PRE><\/BLOCKQUOTE>$/) {
s///i;
s/<\/PRE><\/BLOCKQUOTE>//i;
s/<\/B>//i;
s/(.*)<\/A>/$1/i;
push @ret,$_;
}
}
return \@ret;
}
sub get_details {
my ($data) = (@_);
my $ret;
$ret->{name} = shift(@$data);
my @tmp;
my $flag = 1;
while($flag) {
if($data->[0] =~ /^Tel/) {
$flag = 0;
} else {
push @tmp, shift(@$data);
}
}
$ret->{address} = join(', ',@tmp);
foreach (@$data) {
my ($field,$value) = split(/\s*:\s*/,$_);
$ret->{$field} = $value;
}
return($ret);
}
my $req_info;
my $line;
my $csv = Text::CSV->new();
$status = $csv->combine("Name","Address","Telephone","Email","Fax");
$line = $csv->string();
print "$line\n";
for(my $i=1;$i<=8353;$i++) {
my $url = 'http://www.european-patent-office.org/cgi-bin/cgiwrap/vi00n006/reps/detail.pl.cgi?id='.$i;
my $data = get($url);
my $tmp = get_details(parse_data($data));
#$req_info->{$i} = get_details(parse_data($data));
$status = $csv->combine($tmp->{name},$tmp->{address},$tmp->{Tel},$tmp->{Email},$tmp->{Fax});
$line = $csv->string();
print "$line\n";
}