#!/usr/bin/perl ## # dump_wordpress.pl - dump wordpress db -> multimarkdown files ## # Copyright (C) 2016 by attila # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL # WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE # AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL # DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR # PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER # TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. ## use strict; use warnings; use Getopt::Std; use IO::File; use DBI; use vars qw($VERBOSE $VERSION); $VERSION = '0.1.0'; # c.f. Getopt::Std pod $Getopt::Std::STANDARD_HELP_VERSION = 1; sub VERSION_MESSAGE { print STDERR qq|prog v.$VERSION\n|; } sub HELP_MESSAGE { print STDERR <<__HeLP__; purpose: what usage: prog [-v] [...] ... bool opts: -v verbose opts with args: -l errlog spew errors to errlog instead of stderr -D dir change working dir before starting -P pidfile write pid file usage notes: Use SIGINT or SIGTERM to shut down cleanly. __HeLP__ } sub usage { print STDERR "$0: ERROR: @_\n" if @_; HELP_MESSAGE(); exit(@_ ? 1 : 0); } our %opts; our $outdir; our $dbname; our $dsn; our $table; our $user; our $pass; our $dbh; our $posts_sql; our $posts_q; our $row; our %posts; our $rels_table = 'wp_term_relationships'; our $taxo_table = 'wp_term_taxonomy'; our $terms_table = 'wp_terms'; # suck tags for a post id# out of mysql sub get_tags { my($id) = @_; my $sql = qq{select term_taxonomy_id as tid from ${rels_table} }. q{where object_id=?}; my $stmt = $dbh->prepare($sql) or die "prepare($sql): $DBI::errstr"; my %tags; $stmt->execute($id) or die "execute($sql,id=$id): $stmt->errstr"; while (defined(my $r = $stmt->fetchrow_hashref())) { my $taxid = int($r->{'tid'}); my $sql2 = qq{select * from ${taxo_table} }. q{where term_taxonomy_id=?}; my $stmt2 = $dbh->prepare($sql2) or die "prepare($sql2): $DBI::errstr"; $stmt2->execute($taxid) or die "execute($sql2): $stmt2->errstr"; my $r2 = $stmt2->fetchrow_hashref(); if ($r2 && $r2->{'taxonomy'} eq 'post_tag') { my $termid = int($r2->{'term_id'}); my $sql3 = qq{select * from ${terms_table} }. q{where term_id=?}; my $stmt3 = $dbh->prepare($sql3) or die "prepare($sql3): $DBI::errstr"; $stmt3->execute($termid) or die "execute($sql3,$termid): $stmt3->errstr"; my $r3 = $stmt3->fetchrow_hashref(); if ($r3) { $tags{$r3->{'name'}}++; } $stmt3->finish(); } $stmt2->finish(); } $stmt->finish(); return sort(keys(%tags)); } # mung vaguely htmlish content into markdown where possible sub markdownify { my($html) = @_; my $md = $html; $md =~ s,\r,\n,gs; $md =~ s,(.*?),"#" x $1." $2 "."#" x $1,gsei; $md =~ s,(.*?),[$3]($2),gsi; $md =~ s,(.*?),`$1`,gsi; $md =~ s,
]+>,```,gsi;
	$md =~ s,
,```,gsi; # $md =~ s,
,```,gsi; # $md =~ s,
,```,gsi; $md =~ s,

,,gsi; $md =~ s,

,\n,gsi; $md =~ s,(|||),*,gsi; $md =~ s,(|),**,gsi; $md =~ s, , ,gsi; $md =~ s,&,&,gsi; $md =~ s,>,>,gsi; $md =~ s,<,<,gsi; return $md; } # strip all leading and trailing whitespace sub strip { my($str) = @_; $str =~ s/(^\s+|\s+$)//gs; return $str; } # process a single post sub handle_post { my($data) = @_; unless (-d "$outdir") { mkdir($outdir) or die "mkdir($outdir): $!"; } my $id = int($data->{'ID'}); my @tags = get_tags($id); my $name = $data->{'post_name'}; my $outfile; $name = strip($name); if ($name =~ /^(\d+)-revision$/) { my $rid = $1; if (!exists($posts{$rid})) { warn("#$id '$name' refers to nonexistent #$rid\n"); } else { my $base = $posts{$rid} . '-revision'; $rid = 1; $name = sprintf(q{%s-%d},$base,$rid); $outfile = join('/',$outdir,"${name}.md"); while (-f $outfile) { ++$rid; $name = sprintf(q{%s-%d},$base,$rid); $outfile = join('/',$outdir,"${name}.md"); } } } $outfile ||= join('/',$outdir,"${name}.md"); debug("writing post id $id to $outfile"); my $fh = IO::File->new("> $outfile") or die "$outfile: $!"; my $map = [ post_title => ['Title', '%s'], post_name => ['name','%s'], post_date => ['Date','%s'], post_modified => ['Edit','%s'], ID => ['Wordpress ID','%d'], post_status => ['Wordpress Status','%s'], ]; for (my $i = 0; $i < scalar(@$map); $i += 2) { my($key,$name,$fmt) = ($map->[$i],$map->[1+$i]->[0], $map->[1+$i]->[1]); $fh->write(sprintf(qq{$name: $fmt\n},$data->{$key})); } $fh->write(sprintf(qq{Tags: %s\n},join(",",@tags))) if @tags; $fh->write("\n"); $fh->write(markdownify($data->{'post_content'})."\n"); $fh->close(); } getopts('vd:D:T:u:p:', \%opts); usage('no directory given') unless @ARGV; $outdir = shift(@ARGV); usage('too many arguments given') if @ARGV; $VERBOSE = $opts{'v'} || 0; my $debugout = $VERBOSE ? sub { warn("# @_\n"); } : sub { }; sub debug { &$debugout(); } $dbname = $opts{'d'} || 'wordpress'; $dsn = $opts{'D'} || qq{dbi:mysql:dbname=$dbname}; $table = $opts{'T'} || 'wp_posts'; $user = $opts{'u'}; $pass = $opts{'p'}; $dbh = DBI->connect($dsn, $user, $pass, { RaiseError => 1, PrintError => 1 }) or die "could not connect to db at $dsn as $user: $DBI::errstr"; debug("connected to $dsn as $user"); $posts_sql = qq{select * from $table order by ID asc}; $posts_q = $dbh->prepare($posts_sql) or die "prepare '$posts_sql': $DBI::errstr"; debug("prepared: $posts_sql"); $posts_q->execute() or die "execute '$posts_sql': $posts_q->errstr"; while (defined($row = $posts_q->fetchrow_hashref())) { handle_post($row); } $dbh->disconnect(); exit(0); ## # Local variables: # mode: perl # tab-width: 8 # perl-indent-level: 8 # perl-continued-statement-offset: 4 # indent-tabs-mode: t # comment-column: 40 # End: ##