Code clean up
This commit is contained in:
parent
0869ffb46a
commit
854931b997
@ -2,7 +2,7 @@
|
||||
|
||||
# encoding: utf-8
|
||||
#
|
||||
# Redmine Xapian is a Redmine plugin to allow attachments searches by content.
|
||||
# Redmine plugin for Document Management System "Features"
|
||||
#
|
||||
# Copyright © 2010 Xabier Elkano
|
||||
# Copyright © 2011-18 Karel Pičman <karel.picman@kontron.com>
|
||||
@ -21,10 +21,12 @@
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
################################################################################################
|
||||
require 'optparse'
|
||||
|
||||
########################################################################################################################
|
||||
# BEGIN Configuration parameters
|
||||
# Configure the following parameters (most of them can be configured through the command line):
|
||||
################################################################################################
|
||||
########################################################################################################################
|
||||
|
||||
# Redmine installation directory
|
||||
$redmine_root = '/opt/redmine'
|
||||
@ -45,65 +47,20 @@ $dbrootpath = '/var/tmp/dmsf-index'
|
||||
$verbose = 0
|
||||
|
||||
# Define stemmed languages to index attachments Eg. [ 'english', 'italian', 'spanish' ]
|
||||
# Available languages are danish dutch english finnish french german german2 hungarian italian kraaij_pohlmann lovins
|
||||
# norwegian porter portuguese romanian russian spanish swedish turkish:
|
||||
# Available languages are danish, dutch, english, finnish, french, german, german2, hungarian, italian, kraaij_pohlmann,
|
||||
# lovins, norwegian, porter, portuguese, romanian, russian, spanish, swedish and turkish.
|
||||
$stem_langs = ['english']
|
||||
|
||||
# Temporary directory for indexing, it can be tmpfs
|
||||
$tempdir = '/tmp'
|
||||
|
||||
# Binaries for text conversion
|
||||
$pdftotext = '/usr/bin/pdftotext -enc UTF-8'
|
||||
$antiword = '/usr/bin/antiword'
|
||||
$catdoc = '/usr/bin/catdoc'
|
||||
$xls2csv = '/usr/bin/xls2csv'
|
||||
$catppt = '/usr/bin/catppt'
|
||||
$unzip = '/usr/bin/unzip -o'
|
||||
$unrtf = '/usr/bin/unrtf -t text 2>/dev/null'
|
||||
|
||||
################################################################################################
|
||||
########################################################################################################################
|
||||
# END Configuration parameters
|
||||
################################################################################################
|
||||
########################################################################################################################
|
||||
|
||||
$environment = File.join($redmine_root, 'config/environment.rb')
|
||||
$databasepath = nil
|
||||
$env = 'production'
|
||||
$retryfailed = nil
|
||||
|
||||
MIME_TYPES = {
|
||||
'application/pdf' => 'pdf',
|
||||
'application/rtf' => 'rtf',
|
||||
'application/msword' => 'doc',
|
||||
'application/vnd.ms-excel' => 'xls',
|
||||
'application/vnd.ms-powerpoint' => 'ppt,pps',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlsx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pptx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.slideshow' => 'ppsx',
|
||||
'application/vnd.oasis.opendocument.spreadsheet' => 'ods',
|
||||
'application/vnd.oasis.opendocument.text' => 'odt',
|
||||
'application/vnd.oasis.opendocument.presentation' => 'odp',
|
||||
'application/javascript' => 'js'
|
||||
}.freeze
|
||||
|
||||
FORMAT_HANDLERS = {
|
||||
'pdf' => $pdftotext,
|
||||
'doc' => $catdoc,
|
||||
'xls' => $xls2csv,
|
||||
'ppt,pps' => $catppt,
|
||||
'docx' => $unzip,
|
||||
'xlsx' => $unzip,
|
||||
'pptx' => $unzip,
|
||||
'ppsx' => $unzip,
|
||||
'ods' => $unzip,
|
||||
'odt' => $unzip,
|
||||
'odp' => $unzip,
|
||||
'rtf' => $unrtf
|
||||
}.freeze
|
||||
|
||||
require 'optparse'
|
||||
|
||||
VERSION = '0.1'
|
||||
VERSION = '0.2'.freeze
|
||||
|
||||
optparse = OptionParser.new do |opts|
|
||||
opts.banner = 'Usage: xapian_indexer.rb [OPTIONS...]'
|
||||
@ -112,10 +69,11 @@ optparse = OptionParser.new do |opts|
|
||||
opts.separator('')
|
||||
opts.separator('')
|
||||
opts.separator('Options:')
|
||||
opts.on('-s', '--stemming_lang a,b,c', Array,'Comma separated list of stemming languages for indexing') { |s| $stem_langs = s }
|
||||
opts.on('-s', '--stemming_lang a,b,c', Array,
|
||||
'Comma separated list of stemming languages for indexing') { |s| $stem_langs = s }
|
||||
opts.on('-v', '--verbose', 'verbose') {$verbose += 1}
|
||||
opts.on('-e', '--environment ENV', 'Rails ENVIRONMENT (development, testing or production), default production') { |e| $env = e}
|
||||
opts.on('-t', '--temp-dir PATH', 'Temporary directory for indexing'){ |t| $tempdir = t }
|
||||
opts.on('-e', '--environment ENV',
|
||||
'Rails ENVIRONMENT (development, testing or production), default production') { |e| $env = e}
|
||||
opts.on('-V', '--version', 'show version and exit') { puts VERSION; exit}
|
||||
opts.on('-h', '--help', 'show help and exit') { puts opts; exit }
|
||||
opts.on('-R', '--retry-failed', 'retry files which omindex failed to extract text') { $retryfailed = 1 }
|
||||
@ -130,49 +88,6 @@ optparse.parse!
|
||||
|
||||
ENV['RAILS_ENV'] = $env
|
||||
|
||||
STATUS_SUCCESS = 1
|
||||
STATUS_FAIL = -1
|
||||
|
||||
ADD_OR_UPDATE = 1
|
||||
DELETE = 0
|
||||
|
||||
class IndexingError < StandardError; end
|
||||
|
||||
def supported_mime_type(entry)
|
||||
mtype = Redmine::MimeType.of(entry)
|
||||
MIME_TYPES.include?(mtype) || Redmine::MimeType.is_type?('text', mtype)
|
||||
end
|
||||
|
||||
def convert_to_text(fpath, type)
|
||||
text = nil
|
||||
return text if !File.exist?(FORMAT_HANDLERS[type].split(' ').first)
|
||||
case type
|
||||
when 'pdf'
|
||||
text = `#{FORMAT_HANDLERS[type]} #{fpath} -`
|
||||
when /(xlsx|docx|odt|pptx)/i
|
||||
system "#{$unzip} -d #{$tempdir}/temp #{fpath} > /dev/null", :out=>'/dev/null'
|
||||
case type
|
||||
when 'xlsx'
|
||||
fout = "#{$tempdir}/temp/xl/sharedStrings.xml"
|
||||
when 'docx'
|
||||
fout = "#{$tempdir}/temp/word/document.xml"
|
||||
when 'odt'
|
||||
fout = "#{$tempdir}/temp/content.xml"
|
||||
when 'pptx'
|
||||
fout = "#{$tempdir}/temp/docProps/app.xml"
|
||||
end
|
||||
begin
|
||||
text = File.read(fout)
|
||||
FileUtils.rm_rf("#{$tempdir}/temp")
|
||||
rescue Exception => e
|
||||
log "Error: #{e.to_s} reading #{fout}", true
|
||||
end
|
||||
else
|
||||
text = `#{FORMAT_HANDLERS[type]} #{fpath}`
|
||||
end
|
||||
return text
|
||||
end
|
||||
|
||||
def log(text, error = false)
|
||||
if error
|
||||
$stderr.puts text
|
||||
@ -199,8 +114,6 @@ rescue LoadError
|
||||
exit 1
|
||||
end
|
||||
|
||||
include Rails.application.routes.url_helpers
|
||||
|
||||
log "Redmine environment [RAILS_ENV=#{$env}] correctly loaded ..."
|
||||
|
||||
# Indexing documents
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user