Perl - parsowanie XML
: 27 stycznia 2013, 18:52
Witam.
Mam problem ze skryptem napisanym w języku programowania perl.
Gdy próbuję wykonać skrypt poleceniem
zwracany jest błąd:
Błąd występuje zarówno przy dużym (350 MB) jak i bardzo małym (około 250 KB) pliku map.osm.
Kod skryptu convert.pl:
Wynik polecenia:
P.S.
Skrypt służy do konwertorownia danych w formacie XML map OpenStreetMap do formatu tekstowego.
System operacyjny: Debian Squeeze i Perl5
Z góry dziękuję za pomoc.
Mam problem ze skryptem napisanym w języku programowania perl.
Gdy próbuję wykonać skrypt poleceniem
Kod: Zaznacz cały
perl convert.pl map.osm
Kod: Zaznacz cały
Out of memory!
Kod skryptu convert.pl:
Kod: Zaznacz cały
use XML::Parser;
my $Filename = shift();
# Temporary data
my (%MainAttr,$Type,%Tags, @WaySegments);
# Stats
my %AllTags;
# Stored data
my (@Nodes, @Segments, @Ways, %Stats);
# Processing stage
#----------------------------------------------
my $P = new XML::Parser(Handlers => {Start => \&DoStart, End => \&DoEnd, Char => \&DoChar});
$P->parsefile($Filename);
printf STDERR "Creating output files\n";
# Combine way data into segments
#----------------------------------------------
if(open(WAYS,">ways.txt")){
foreach my $Way (@Ways){
#printf WAYS "Way: %s,%s\n", $Way->{"segments"}, $Way->{"name"};
my @SubSegments = split(/,/,$Way->{"segments"});
$Stats{"empty ways"}++ if(scalar(@SubSegments) < 1);
printf WAYS "Copying keys: %s to segments %s\n",
join(",",keys(%$Way)),
join(",",@SubSegments);
# Each segment in a way inherits the way's attributes
foreach my $Segment(@SubSegments){
foreach my $Key(keys(%$Way)){
$Segments[$Segment]{$Key} = $Way->{$Key}
}
}
}
close WAYS;
}
# Main output (segments)
#----------------------------------------------
if(open(OSM, ">osm.txt")){
foreach my $Segment(@Segments){
my $From = $Segment->{"from"};
my $To = $Segment->{"to"};
$Stats{"segments without endpoints"}++ if($From == 0 or $To == 0);
printf OSM "%f,%f,%f,%f,%s,%s,%s\n",
$Nodes[$From]{"lat"},
$Nodes[$From]{"lon"},
$Nodes[$To]{"lat"},
$Nodes[$To]{"lon"},
$Segment->{"class"},
$Segment->{"name"},
$Segment->{"highway"};
}
close OSM;
}
# Secondary output (named points)
#----------------------------------------------
if(open(POINTS, ">points.txt")){
foreach my $Node(@Nodes){
$Stats{"Nodes with zero lat/long"}++ if($Node->{"lat"} == 0 and $Node->{"lon"} == 0);
if($Node->{"name"} || $Node->{"amenity"} || $Node->{"class"}){
printf POINTS "%f,%f,%s,%s,%s\n",
$Node->{"lat"},
$Node->{"lon"},
$Node->{"name"},
$Node->{"amenity"},
$Node->{"class"};
}
}
close POINTS;
}
# Statistics output
#----------------------------------------------
if(open(STATS, ">stats.txt")){
foreach(sort {$AllTags{$b} <=> $AllTags{$a}} keys(%AllTags)){
printf STATS "* %d %s\n", $AllTags{$_}, $_;
}
printf STATS "\n\nStats:\n";
foreach(keys(%Stats)){
printf STATS "* %d %s\n", $Stats{$_}, $_;
}
}
printf STDERR "Done\n";
exit;
# Function is called whenever an XML tag is started
#----------------------------------------------
sub DoStart()
{
my ($Expat, $Name, %Attr) = @_;
if($Name eq "node"){
undef %Tags;
%MainAttr = %Attr;
$Type = "n";
}
if($Name eq "segment"){
undef %Tags;
%MainAttr = %Attr;
$Type = "s";
}
if($Name eq "way"){
undef %Tags;
undef @WaySegments;
%MainAttr = %Attr;
$Type = "w";
}
if($Name eq "tag"){
# TODO: protect against id,from,to,lat,long,etc. being used as tags
$Tags{$Attr{"k"}} = $Attr{"v"};
$AllTags{$Attr{"k"}}++;
$Stats{"tags"}++;
}
if($Name eq "seg"){
push(@WaySegments, $Attr{"id"});
}
}
# Function is called whenever an XML tag is ended
#----------------------------------------------
sub DoEnd(){
my ($Expat, $Element) = @_;
if($Element eq "node"){
my $ID = $MainAttr{"id"};
$Nodes[$ID]{"lat"} = $MainAttr{"lat"};
$Nodes[$ID]{"lon"} = $MainAttr{"lon"};
foreach(keys(%Tags)){
$Nodes[$ID]{$_} = $Tags{$_};
}
$Stats{"named nodes"}++ if($Nodes[$ID]{"name"});
$Stats{"tagged nodes"}++ if($MainAttr{"tags"});
$Stats{"nodes"}++;
#print "Node:".join(",",keys(%Tags))."\n" if(scalar(keys(%Tags))>0);
}
if($Element eq "segment"){
my $ID = $MainAttr{"id"};
$Segments[$ID]{"from"} = $MainAttr{"from"};
$Segments[$ID]{"to"} = $MainAttr{"to"};
foreach(keys(%Tags)){
$Segments[$ID]{$_} = $Tags{$_};
}
$Stats{"tagged segments"}++ if($MainAttr{"tags"});
$Stats{"segments"}++;
}
if($Element eq "way"){
my $ID = $MainAttr{"id"};
$Ways[$ID]{"segments"} = join(",",@WaySegments);
foreach(keys(%Tags)){
$Ways[$ID]{$_} = $Tags{$_};
}
$Stats{"Ways"}++;
}
}
# Function is called whenever text is encountered in the XML file
#----------------------------------------------
sub DoChar(){
my ($Expat, $String) = @_;
}
Kod: Zaznacz cały
ulimit -a
core file size (blocks, -c) 0
data seg size (kbytes, -d) unlimited
scheduling priority (-e) 0
file size (blocks, -f) unlimited
pending signals (-i) 16382
max locked memory (kbytes, -l) 64
max memory size (kbytes, -m) unlimited
open files (-n) 1024
pipe size (512 bytes, -p) 8
POSIX message queues (bytes, -q) 819200
real-time priority (-r) 0
stack size (kbytes, -s) 8192
cpu time (seconds, -t) unlimited
max user processes (-u) unlimited
virtual memory (kbytes, -v) unlimited
file locks (-x) unlimited
Skrypt służy do konwertorownia danych w formacie XML map OpenStreetMap do formatu tekstowego.
System operacyjny: Debian Squeeze i Perl5
Z góry dziękuję za pomoc.