From 854931b9973c459f0bd5a0fbe5f80319fa3f03aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karel=20Pi=C4=8Dman?= Date: Thu, 14 Jun 2018 12:54:51 +0200 Subject: [PATCH] Code clean up --- extra/xapian_indexer.rb | 123 ++++++---------------------------------- 1 file changed, 18 insertions(+), 105 deletions(-) diff --git a/extra/xapian_indexer.rb b/extra/xapian_indexer.rb index 3af77788..8ab2677f 100644 --- a/extra/xapian_indexer.rb +++ b/extra/xapian_indexer.rb @@ -2,7 +2,7 @@ # encoding: utf-8 # -# Redmine Xapian is a Redmine plugin to allow attachments searches by content. +# Redmine plugin for Document Management System "Features" # # Copyright © 2010 Xabier Elkano # Copyright © 2011-18 Karel Pičman @@ -21,10 +21,12 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -################################################################################################ +require 'optparse' + +######################################################################################################################## # BEGIN Configuration parameters # Configure the following parameters (most of them can be configured through the command line): -################################################################################################ +######################################################################################################################## # Redmine installation directory $redmine_root = '/opt/redmine' @@ -33,77 +35,32 @@ $redmine_root = '/opt/redmine' $files = 'dmsf' # scriptindex binary path -$scriptindex = '/usr/bin/scriptindex' +$scriptindex = '/usr/bin/scriptindex' # omindex binary path -$omindex = '/usr/bin/omindex' +$omindex = '/usr/bin/omindex' # Directory containing xapian databases for omindex (Attachments indexing) -$dbrootpath = '/var/tmp/dmsf-index' +$dbrootpath = '/var/tmp/dmsf-index' # Verbose output, values of 0 no verbose, greater than 0 verbose output -$verbose = 0 +$verbose = 0 # Define stemmed languages to index attachments Eg. [ 'english', 'italian', 'spanish' ] -# Available languages are danish dutch english finnish french german german2 hungarian italian kraaij_pohlmann lovins -# norwegian porter portuguese romanian russian spanish swedish turkish: +# Available languages are danish, dutch, english, finnish, french, german, german2, hungarian, italian, kraaij_pohlmann, +# lovins, norwegian, porter, portuguese, romanian, russian, spanish, swedish and turkish. $stem_langs = ['english'] -# Temporary directory for indexing, it can be tmpfs -$tempdir = '/tmp' - -# Binaries for text conversion -$pdftotext = '/usr/bin/pdftotext -enc UTF-8' -$antiword = '/usr/bin/antiword' -$catdoc = '/usr/bin/catdoc' -$xls2csv = '/usr/bin/xls2csv' -$catppt = '/usr/bin/catppt' -$unzip = '/usr/bin/unzip -o' -$unrtf = '/usr/bin/unrtf -t text 2>/dev/null' - -################################################################################################ +######################################################################################################################## # END Configuration parameters -################################################################################################ +######################################################################################################################## $environment = File.join($redmine_root, 'config/environment.rb') $databasepath = nil $env = 'production' $retryfailed = nil -MIME_TYPES = { - 'application/pdf' => 'pdf', - 'application/rtf' => 'rtf', - 'application/msword' => 'doc', - 'application/vnd.ms-excel' => 'xls', - 'application/vnd.ms-powerpoint' => 'ppt,pps', - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx', - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlsx', - 'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pptx', - 'application/vnd.openxmlformats-officedocument.presentationml.slideshow' => 'ppsx', - 'application/vnd.oasis.opendocument.spreadsheet' => 'ods', - 'application/vnd.oasis.opendocument.text' => 'odt', - 'application/vnd.oasis.opendocument.presentation' => 'odp', - 'application/javascript' => 'js' -}.freeze - -FORMAT_HANDLERS = { - 'pdf' => $pdftotext, - 'doc' => $catdoc, - 'xls' => $xls2csv, - 'ppt,pps' => $catppt, - 'docx' => $unzip, - 'xlsx' => $unzip, - 'pptx' => $unzip, - 'ppsx' => $unzip, - 'ods' => $unzip, - 'odt' => $unzip, - 'odp' => $unzip, - 'rtf' => $unrtf -}.freeze - -require 'optparse' - -VERSION = '0.1' +VERSION = '0.2'.freeze optparse = OptionParser.new do |opts| opts.banner = 'Usage: xapian_indexer.rb [OPTIONS...]' @@ -112,10 +69,11 @@ optparse = OptionParser.new do |opts| opts.separator('') opts.separator('') opts.separator('Options:') - opts.on('-s', '--stemming_lang a,b,c', Array,'Comma separated list of stemming languages for indexing') { |s| $stem_langs = s } + opts.on('-s', '--stemming_lang a,b,c', Array, + 'Comma separated list of stemming languages for indexing') { |s| $stem_langs = s } opts.on('-v', '--verbose', 'verbose') {$verbose += 1} - opts.on('-e', '--environment ENV', 'Rails ENVIRONMENT (development, testing or production), default production') { |e| $env = e} - opts.on('-t', '--temp-dir PATH', 'Temporary directory for indexing'){ |t| $tempdir = t } + opts.on('-e', '--environment ENV', + 'Rails ENVIRONMENT (development, testing or production), default production') { |e| $env = e} opts.on('-V', '--version', 'show version and exit') { puts VERSION; exit} opts.on('-h', '--help', 'show help and exit') { puts opts; exit } opts.on('-R', '--retry-failed', 'retry files which omindex failed to extract text') { $retryfailed = 1 } @@ -130,49 +88,6 @@ optparse.parse! ENV['RAILS_ENV'] = $env -STATUS_SUCCESS = 1 -STATUS_FAIL = -1 - -ADD_OR_UPDATE = 1 -DELETE = 0 - -class IndexingError < StandardError; end - -def supported_mime_type(entry) - mtype = Redmine::MimeType.of(entry) - MIME_TYPES.include?(mtype) || Redmine::MimeType.is_type?('text', mtype) -end - -def convert_to_text(fpath, type) - text = nil - return text if !File.exist?(FORMAT_HANDLERS[type].split(' ').first) - case type - when 'pdf' - text = `#{FORMAT_HANDLERS[type]} #{fpath} -` - when /(xlsx|docx|odt|pptx)/i - system "#{$unzip} -d #{$tempdir}/temp #{fpath} > /dev/null", :out=>'/dev/null' - case type - when 'xlsx' - fout = "#{$tempdir}/temp/xl/sharedStrings.xml" - when 'docx' - fout = "#{$tempdir}/temp/word/document.xml" - when 'odt' - fout = "#{$tempdir}/temp/content.xml" - when 'pptx' - fout = "#{$tempdir}/temp/docProps/app.xml" - end - begin - text = File.read(fout) - FileUtils.rm_rf("#{$tempdir}/temp") - rescue Exception => e - log "Error: #{e.to_s} reading #{fout}", true - end - else - text = `#{FORMAT_HANDLERS[type]} #{fpath}` - end - return text -end - def log(text, error = false) if error $stderr.puts text @@ -199,8 +114,6 @@ rescue LoadError exit 1 end -include Rails.application.routes.url_helpers - log "Redmine environment [RAILS_ENV=#{$env}] correctly loaded ..." # Indexing documents