#!/usr/bin/ruby # doc2doku # takes in raw antiword output and converts, to some extent, # to dokuwiki format (including interlacing footnotes) DELIMITER=/\-{23}\n/ patterns = { # cp1252 "—" => "---", "–" => "--", "‘" => "'", "’" => "'", "“" => '"', "”" => '"', "…" => "..." } # functions class String def title? return self =~ /^=.*=$/ end def quote? return self =~ /^>/ end def blank? return self.strip.empty? end def table? self =~ /^[|^]/ end end # find footnotes inputname = ARGV[0] input = File.readlines(inputname) footnotes=false input.each do |line| if footnotes if line =~ /\[(\d+)\]\s(.*)\n/ # mew footnote footnotes[$1.to_i] = $2 elsif $2 and $2.strip.length > 0 footnotes[length-1] += $2 end elsif line =~ DELIMITER footnotes=[] end end # insert footnotes in text if footnotes input.each do |line| if line =~ /\[(\d{1,3})\]/ number = $1.to_i if footnotes[number] match = $& line.sub!(match, "((#{footnotes[number]}))") redo end end end elsif puts "No footnotes found." end # fix crap input.each do |line| patterns.each do |broken, fixed| if line.include? broken line.sub!(broken, fixed) redo end end end # quotes and titles input.each_with_index do |line, index| if line =~ /^\s{4}(.*)$/ input[index] = "> #{$1}\n" elsif line =~ /^\s{3}(.*)$/ input[index] = "======#{$1}======\n" end end # output everything input.each_with_index do |line, index| if line =~ DELIMITER then exit end puts line # do we need another line afterwards? # NO if: # the subsequent or current line is blank -- we never need two # the subsequent AND current line is a table # the subsequent AND current line is a quote previous = input[index-1] subsequent = input[index+1] next unless subsequent unless line.blank? or subsequent.blank? or (line.table? and subsequent.table?) or (line.quote? and subsequent.quote?) puts "" end end