Fulltext search improvement
This commit is contained in:
parent
33c3f8df11
commit
2e8b51bb37
12
README.md
12
README.md
@ -162,17 +162,18 @@ Before installing ensure that the Redmine instance is stopped.
|
||||
### Fulltext search (optional)
|
||||
If you want to use fulltext search features, you must setup file content indexing.
|
||||
|
||||
It is necessary to index DMSF files with omega before searching attemts to recieve some output:
|
||||
It is necessary to index DMSF files with omega before searching attempts to receive some output:
|
||||
|
||||
omindex -s english --url / --db {path to index database from configuration} {path to storage from configuration}
|
||||
1. Change the configuration part of redmine_dmsf/extra/xapian_indexer.rb file according to your environment.
|
||||
2. Run `ruby redmine_dmsf/extra/xapian_indexer.rb -f`
|
||||
|
||||
This command must be run on regular basis (e.g. from cron)
|
||||
|
||||
Example of cron job (once per hour at 8th minute):
|
||||
|
||||
8 * * * * root /usr/bin/omindex -s english --url / --db /var/tmp/dmsf_index /opt/redmine/files/dmsf
|
||||
`8 * * * * root /usr/bin/ruby redmine_dmsf/extra/xapian_indexer.rb -f`
|
||||
|
||||
Use omindex -h for help.
|
||||
See redmine_dmsf/extra/xapian_indexer.rb for help.
|
||||
|
||||
Uninstalling DMSF
|
||||
-----------------
|
||||
@ -187,7 +188,8 @@ After these steps re-start your instance of Redmine.
|
||||
Contributing
|
||||
------------
|
||||
|
||||
If you've added something, why not share it. Fork the repository (github.com/danmunn/redmine_dmsf), make the changes and send a pull request to the maintainers.
|
||||
If you've added something, why not share it. Fork the repository (github.com/danmunn/redmine_dmsf),
|
||||
make the changes and send a pull request to the maintainers.
|
||||
|
||||
Changes with tests, and full documentation are preferred.
|
||||
|
||||
|
||||
@ -29,6 +29,8 @@ class DmsfFile < ActiveRecord::Base
|
||||
unloadable
|
||||
|
||||
include RedmineDmsf::Lockable
|
||||
|
||||
attr_accessor :event_description
|
||||
|
||||
belongs_to :project
|
||||
belongs_to :folder, :class_name => 'DmsfFolder', :foreign_key => 'dmsf_folder_id'
|
||||
@ -295,19 +297,23 @@ class DmsfFile < ActiveRecord::Base
|
||||
end
|
||||
|
||||
if !options[:titles_only] && $xapian_bindings_available
|
||||
database = nil
|
||||
begin
|
||||
database = Xapian::Database.new(Setting.plugin_redmine_dmsf['dmsf_index_database'].strip)
|
||||
rescue
|
||||
database = nil
|
||||
begin
|
||||
lang = Setting.plugin_redmine_dmsf['dmsf_stemming_lang'].strip
|
||||
databasepath = File.join(
|
||||
Setting.plugin_redmine_dmsf['dmsf_index_database'].strip, lang)
|
||||
database = Xapian::Database.new(databasepath)
|
||||
rescue Exception => e
|
||||
Rails.logger.warn 'REDMAIN_XAPIAN ERROR: Xapian database is not properly set or initiated or is corrupted.'
|
||||
Rails.logger.warn e.message
|
||||
end
|
||||
|
||||
unless database.nil?
|
||||
if database
|
||||
enquire = Xapian::Enquire.new(database)
|
||||
|
||||
query_string = tokens.join(' ')
|
||||
qp = Xapian::QueryParser.new()
|
||||
stemmer = Xapian::Stem.new(Setting.plugin_redmine_dmsf['dmsf_stemming_lang'].strip)
|
||||
stemmer = Xapian::Stem.new(lang)
|
||||
qp.stemmer = stemmer
|
||||
qp.database = database
|
||||
|
||||
@ -372,6 +378,7 @@ class DmsfFile < ActiveRecord::Base
|
||||
end
|
||||
|
||||
if (allowed && project_included)
|
||||
dmsf_file.event_description = dochash['sample']
|
||||
results.push(dmsf_file)
|
||||
results_count += 1
|
||||
end
|
||||
@ -383,7 +390,7 @@ class DmsfFile < ActiveRecord::Base
|
||||
end
|
||||
|
||||
[results, results_count]
|
||||
end
|
||||
end
|
||||
|
||||
def display_name
|
||||
if self.name.length > 50
|
||||
|
||||
@ -362,4 +362,8 @@ tr.gray .icon-file.application-x-gzip { background-image: url(../images/filetype
|
||||
label.required:after {
|
||||
content: " *";
|
||||
color: #bb0000;
|
||||
}
|
||||
}
|
||||
|
||||
/* Search results */
|
||||
dt.dmsf-file { background-image: url(../../../images/document.png); }
|
||||
dt.dmsf-folder { background-image: url(../../../images/folder.png); }
|
||||
588
extra/xapian_indexer.rb
Normal file
588
extra/xapian_indexer.rb
Normal file
@ -0,0 +1,588 @@
|
||||
#!/usr/bin/ruby -W0
|
||||
# encoding: UTF-8
|
||||
|
||||
# Copied from https://github.com/xelkano/redmine_xapian
|
||||
|
||||
################################################################################################
|
||||
# BEGIN Configuration parameters
|
||||
# Configure the following parameters (most of them can be configured through the command line):
|
||||
################################################################################################
|
||||
|
||||
# Redmine installation directory
|
||||
$redmine_root = "/var/www/redmine"
|
||||
|
||||
# DMSF document location $redmine_root/$dmsf_root
|
||||
$dmsf_root = 'files/dmsf'
|
||||
|
||||
# scriptindex binary path
|
||||
$scriptindex = "/usr/bin/scriptindex"
|
||||
|
||||
# omindex binary path
|
||||
$omindex = "/usr/bin/omindex"
|
||||
|
||||
# Directory containing xapian databases for omindex (Attachments indexing)
|
||||
$dbrootpath = "/var/tmp/dmsf-index"
|
||||
|
||||
# Verbose output, values of 0 no verbose, greater than 0 verbose output
|
||||
$verbose = 0
|
||||
|
||||
# Define stemmed languages to index attachments Ej [ 'english', 'italian', 'spanish' ]
|
||||
# Repository database will be always indexed in english
|
||||
# Available languages are danish dutch english finnish french german german2 hungarian italian kraaij_pohlmann lovins norwegian porter portuguese romanian russian spanish swedish turkish:
|
||||
$stem_langs = ['english']
|
||||
|
||||
#Project identifiers that will be indexed Ej [ 'prj_id1', 'prj_id2' ]
|
||||
$projects = [ 'prj_id1', 'prj_id2' ]
|
||||
|
||||
# Temporary directory for indexing, it can be tmpfs
|
||||
$tempdir = "/tmp"
|
||||
|
||||
# Binaries for text conversion
|
||||
$pdftotext = "/usr/bin/pdftotext -enc UTF-8"
|
||||
$antiword = "/usr/bin/antiword"
|
||||
$catdoc = "/usr/bin/catdoc"
|
||||
$xls2csv = "/usr/bin/xls2csv"
|
||||
$catppt = "/usr/bin/catppt"
|
||||
$unzip = "/usr/bin/unzip -o"
|
||||
$unrtf = "/usr/bin/unrtf -t text 2>/dev/null"
|
||||
|
||||
|
||||
################################################################################################
|
||||
# END Configuration parameters
|
||||
################################################################################################
|
||||
|
||||
$environment = File.join($redmine_root, "config/environment.rb")
|
||||
$project=nil
|
||||
$databasepath=nil
|
||||
$repositories=nil
|
||||
$onlyfiles=nil
|
||||
$onlyrepos=nil
|
||||
$env='production'
|
||||
$userch=nil
|
||||
$resetlog=nil
|
||||
|
||||
MIME_TYPES = {
|
||||
'application/pdf' => 'pdf',
|
||||
'application/rtf' => 'rtf',
|
||||
'application/msword' => 'doc',
|
||||
'application/vnd.ms-excel' => 'xls',
|
||||
'application/vnd.ms-powerpoint' => 'ppt,pps',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' => 'xlsx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation' => 'pptx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.slideshow' => 'ppsx',
|
||||
'application/vnd.oasis.opendocument.spreadsheet' => 'ods',
|
||||
'application/vnd.oasis.opendocument.text' => 'odt',
|
||||
'application/vnd.oasis.opendocument.presentation' => 'odp'
|
||||
}.freeze
|
||||
|
||||
|
||||
FORMAT_HANDLERS = {
|
||||
'pdf' => $pdftotext,
|
||||
'doc' => $catdoc,
|
||||
'xls' => $xls2csv,
|
||||
'ppt,pps' => $catppt,
|
||||
'docx' => $unzip,
|
||||
'xlsx' => $unzip,
|
||||
'pptx' => $unzip,
|
||||
'ppsx' => $unzip,
|
||||
'ods' => $unzip,
|
||||
'odt' => $unzip,
|
||||
'odp' => $unzip,
|
||||
'rtf' => $unrtf
|
||||
}.freeze
|
||||
|
||||
require 'optparse'
|
||||
|
||||
|
||||
VERSION = "0.1"
|
||||
SUPPORTED_SCM = %w( Subversion Darcs Mercurial Bazaar Git Filesystem )
|
||||
|
||||
|
||||
optparse = OptionParser.new do |opts|
|
||||
opts.banner = "Usage: xapian_indexer.rb [OPTIONS...]"
|
||||
opts.separator("")
|
||||
opts.separator("Index redmine files and repositories")
|
||||
opts.separator("")
|
||||
#opts.on("-k", "--key KEY", "use KEY as the Redmine repository key") { |k| $key = k}
|
||||
opts.separator("")
|
||||
opts.separator("Options:")
|
||||
opts.on("-p", "--projects a,b,c",Array, "Comma separated list of projects to index") { |p| $projects=p }
|
||||
opts.on("-s", "--stemming_lang a,b,c",Array,"Comma separated list of stemming languages for indexing") { |s| $stem_langs=s }
|
||||
#opts.on("-t", "--test", "only show what should be done") {$test = true}
|
||||
#opts.on("-f", "--force", "force reindex") {$force = true}
|
||||
opts.on("-v", "--verbose", "verbose") {$verbose += 1}
|
||||
opts.on("-f", "--files", "Only index Redmine attachments") {$onlyfiles = 1}
|
||||
opts.on("-r", "--repositories", "Only index Redmine repositories") {$onlyrepos = 1}
|
||||
opts.on("-e", "--environment ENV", "Rails ENVIRONMENT (development, testing or production), default production") {|e| $env = e}
|
||||
opts.on("-t", "--temp-dir PATH", "Temporary directory for indexing"){ |t| $tempdir = t }
|
||||
#opts.on("-c", "--revision REV", "Use revision as base"){ |c| $userch = c }
|
||||
opts.on("-x", "--resetlog", "Reset index log"){ $resetlog = 1 }
|
||||
opts.on("-V", "--version", "show version and exit") {puts VERSION; exit}
|
||||
opts.on("-h", "--help", "show help and exit") {puts opts; exit }
|
||||
#opts.on("-q", "--quiet", "no log") {$quiet = true}
|
||||
opts.separator("")
|
||||
opts.separator("Examples:")
|
||||
opts.separator(" xapian_indexer.rb -f -s english,italian -v")
|
||||
opts.separator(" xapian_indexer.rb -p project_id -x -t /tmpfs -v")
|
||||
opts.separator("")
|
||||
|
||||
opts.summary_width = 25
|
||||
end
|
||||
optparse.parse!
|
||||
|
||||
ENV['RAILS_ENV'] = $env
|
||||
|
||||
|
||||
STATUS_SUCCESS = 1
|
||||
STATUS_FAIL = -1
|
||||
|
||||
ADD_OR_UPDATE = 1
|
||||
DELETE = 0
|
||||
|
||||
MAIN_REPOSITORY_IDENTIFIER = 'main'
|
||||
|
||||
|
||||
|
||||
class IndexingError < StandardError; end
|
||||
|
||||
|
||||
#@rwdb = Xapian::WritableDatabase.new(databasepath, Xapian::DB_CREATE_OR_OVERWRITE)
|
||||
|
||||
|
||||
def indexing(repository)
|
||||
$repository=repository
|
||||
Rails.logger.info("Fetch changesets: %s - %s" % [$project.name,(repository.identifier or MAIN_REPOSITORY_IDENTIFIER)])
|
||||
log("- Fetch changesets: #{$project.name} - #{$repository.identifier}", :level=>1)
|
||||
$repository.fetch_changesets
|
||||
$repository.reload.changesets.reload
|
||||
|
||||
latest_changeset = $repository.changesets.find(:first)
|
||||
return if not latest_changeset
|
||||
|
||||
Rails.logger.debug("Latest revision: %s - %s - %s" % [
|
||||
$project.name,
|
||||
($repository.identifier or MAIN_REPOSITORY_IDENTIFIER),
|
||||
latest_changeset.revision])
|
||||
#latest_indexed = Indexinglog.find_by_repository_id_and_status( repository.id, STATUS_SUCCESS, :first)
|
||||
latest_indexed = Indexinglog.where("repository_id=#{$repository.id} AND status=#{STATUS_SUCCESS}").last
|
||||
Rails.logger.debug "Debug latest_indexed " + latest_indexed.inspect
|
||||
begin
|
||||
$indexconf = Tempfile.new( "index.conf", $tempdir )
|
||||
$indexconf.write "url : field boolean=Q unique=Q\n"
|
||||
$indexconf.write "body : index truncate=400 field=sample\n"
|
||||
$indexconf.write "date: field=date\n"
|
||||
$indexconf.close
|
||||
if not latest_indexed
|
||||
Rails.logger.debug "DEBUG: repo #{$repository.identifier} not indexed, indexing all"
|
||||
log("\t>repo #{$repository.identifier} not indexed, indexing all", :level=>1)
|
||||
indexing_all($repository)
|
||||
else
|
||||
Rails.logger.debug "DEBUG: repo #{$repository.identifier} indexed, indexing diff"
|
||||
log("\t>repo #{$repository.identifier} already indexed, indexing only diff", :level=>1)
|
||||
indexing_diff($repository, latest_indexed.changeset, latest_changeset)
|
||||
end
|
||||
$indexconf.unlink
|
||||
rescue IndexingError => e
|
||||
add_log($repository, latest_changeset, STATUS_FAIL, e.message)
|
||||
else
|
||||
add_log($repository, latest_changeset, STATUS_SUCCESS)
|
||||
Rails.logger.info("Successfully indexed: %s - %s - %s" % [
|
||||
$project.name,
|
||||
($repository.identifier or MAIN_REPOSITORY_IDENTIFIER),
|
||||
latest_changeset.revision])
|
||||
end
|
||||
end
|
||||
|
||||
def supported_mime_type(entry)
|
||||
mtype=Redmine::MimeType.of(entry)
|
||||
included = false
|
||||
included = MIME_TYPES.include?(mtype) || mtype.split('/').first.eql?("text") unless mtype.nil?
|
||||
#puts "Mtype for #{entry} is #{mtype} returning: #{included.inspect}"
|
||||
return included
|
||||
end
|
||||
|
||||
def add_log(repository, changeset, status, message=nil)
|
||||
log = Indexinglog.where("repository_id=#{repository.id}").last
|
||||
if not log
|
||||
log = Indexinglog.new
|
||||
log.repository = repository
|
||||
log.changeset = changeset
|
||||
log.status = status
|
||||
log.message = message if message
|
||||
log.save!
|
||||
Rails.logger.info("New log for repo %s saved!"% [repository.identifier] )
|
||||
log("\t>New log for repo #{repository.identifier} saved!", :level=>1)
|
||||
else
|
||||
log.changeset_id=changeset.id
|
||||
log.status=status
|
||||
log.message = message if message
|
||||
log.save!
|
||||
Rails.logger.info("Log for repo %s updated!"% [repository.identifier] )
|
||||
log("\t>Log for repo #{repository.identifier} updated!", :level=>1)
|
||||
end
|
||||
end
|
||||
|
||||
def update_log(repository, changeset, status, message=nil)
|
||||
log = Indexinglog.where("repository_id=#{repository.id}").last
|
||||
if log
|
||||
log.changeset_id=changeset.id
|
||||
log.status=status if status
|
||||
log.message = message if message
|
||||
log.save!
|
||||
Rails.logger.info("Log for repo %s updated!"% [repository.identifier] )
|
||||
log("\t>Log for repo #{repository.identifier} updated!", :level=>1)
|
||||
end
|
||||
end
|
||||
|
||||
def delete_log(repository)
|
||||
Indexinglog.delete_all("repository_id=#{repository.id}")
|
||||
Rails.logger.info("Log for repo %s removed!"% [repository.identifier] )
|
||||
log("\t>Log for repo #{repository.identifier} removed!", :level=>1)
|
||||
end
|
||||
|
||||
|
||||
def indexing_all(repository)
|
||||
def walk(repository, identifier, entries)
|
||||
Rails.logger.debug "DEBUG: walk entries size: " + entries.size.inspect
|
||||
return if entries.size < 1
|
||||
entries.each do |entry|
|
||||
Rails.logger.debug "DEBUG: walking into: " + entry.lastrev.time.inspect
|
||||
if entry.is_dir?
|
||||
walk(repository, identifier, repository.entries(entry.path, identifier))
|
||||
elsif entry.is_file?
|
||||
add_or_update_index(repository, identifier, entry.path, entry.lastrev, ADD_OR_UPDATE, MIME_TYPES[Redmine::MimeType.of(entry.path)] ) if supported_mime_type(entry.path)
|
||||
#if !Redmine::MimeType.is_type?('text', entry.path) then
|
||||
# mtype=Redmine::MimeType.of(entry.path)
|
||||
# log("Non text entry #{mtype.inspect} #{entry.path}", :level=>1)
|
||||
#end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Rails.logger.info("Indexing all: %s" % [
|
||||
(repository.identifier or MAIN_REPOSITORY_IDENTIFIER)])
|
||||
if repository.branches
|
||||
repository.branches.each do |branch|
|
||||
Rails.logger.debug("Walking in branch: %s - %s" % [
|
||||
(repository.identifier or MAIN_REPOSITORY_IDENTIFIER), branch])
|
||||
walk(repository, branch, repository.entries(nil, branch))
|
||||
end
|
||||
else
|
||||
Rails.logger.debug("Walking in branch: %s - %s" % [
|
||||
(repository.identifier or MAIN_REPOSITORY_IDENTIFIER), "[NOBRANCH]"])
|
||||
walk(repository, nil, repository.entries(nil, nil))
|
||||
end
|
||||
if repository.tags
|
||||
repository.tags.each do |tag|
|
||||
Rails.logger.debug("Walking in tag: %s - %s" % [
|
||||
(repository.identifier or MAIN_REPOSITORY_IDENTIFIER), tag])
|
||||
walk(repository, tag, repository.entries(nil, tag))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def indexing_diff(repository, diff_from, diff_to)
|
||||
def walk(repository, identifier, changesets)
|
||||
Rails.logger.debug "DEBUG: walking into " + changesets.inspect
|
||||
return if not changesets or changesets.size <= 0
|
||||
changesets.sort! { |a, b| a.id <=> b.id }
|
||||
|
||||
actions = Hash::new
|
||||
# SCM actions
|
||||
# * A - Add
|
||||
# * M - Modified
|
||||
# * R - Replaced
|
||||
# * D - Deleted
|
||||
changesets.each do |changeset|
|
||||
Rails.logger.debug "DEBUG: changeset changes for #{changeset.id} " + changeset.filechanges.inspect
|
||||
next unless changeset.filechanges
|
||||
changeset.filechanges.each do |change|
|
||||
if change.action == 'D'
|
||||
actions[change.path] = DELETE
|
||||
#entry = repository.entry(change.path, identifier)
|
||||
#add_or_update_index(repository, identifier, entry, DELETE )
|
||||
else
|
||||
actions[change.path] = ADD_OR_UPDATE
|
||||
#entry = repository.entry(change.path, identifier)
|
||||
#add_or_update_index(repository, identifier, entry, ADD_OR_UPDATE )
|
||||
end
|
||||
end
|
||||
end
|
||||
return unless actions
|
||||
actions.each do |path, action|
|
||||
entry = repository.entry(path, identifier)
|
||||
if (!entry.nil? and entry.is_file?) or action == DELETE
|
||||
log("Error indexing path: #{path.inspect}, action: #{action.inspect}, identifier: #{identifier.inspect}", :level=>1) if entry.nil? and action != DELETE
|
||||
Rails.logger.debug "DEBUG: entry to index " + entry.inspect
|
||||
lastrev=nil
|
||||
lastrev=entry.lastrev unless entry.nil?
|
||||
add_or_update_index(repository, identifier, path, lastrev, action, MIME_TYPES[Redmine::MimeType.of(path)] ) if supported_mime_type(path) or action == DELETE
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if diff_from.id >= diff_to.id
|
||||
Rails.logger.info("Already indexed: %s (from: %s to %s)" % [
|
||||
(repository.identifier or MAIN_REPOSITORY_IDENTIFIER),diff_from.id, diff_to.id])
|
||||
log("\t>Already indexed: #{repository.identifier} (from #{diff_from.id} to #{diff_to.id})", :level=>1)
|
||||
return
|
||||
end
|
||||
|
||||
Rails.logger.info("Indexing diff: %s (from: %s to %s)" % [
|
||||
(repository.identifier or MAIN_REPOSITORY_IDENTIFIER),diff_from.id, diff_to.id])
|
||||
Rails.logger.info("Indexing all: %s" % [
|
||||
(repository.identifier or MAIN_REPOSITORY_IDENTIFIER)])
|
||||
if repository.branches
|
||||
repository.branches.each do |branch|
|
||||
Rails.logger.debug("Walking in branch: %s - %s" % [(repository.identifier or MAIN_REPOSITORY_IDENTIFIER), branch])
|
||||
walk(repository, branch, repository.latest_changesets("", branch, diff_to.id - diff_from.id)\
|
||||
.select { |changeset| changeset.id > diff_from.id and changeset.id <= diff_to.id})
|
||||
|
||||
end
|
||||
else
|
||||
Rails.logger.debug("Walking in branch: %s - %s" % [(repository.identifier or MAIN_REPOSITORY_IDENTIFIER), "[NOBRANCH]"])
|
||||
walk(repository, nil, repository.latest_changesets("", nil, diff_to.id - diff_from.id)\
|
||||
.select { |changeset| changeset.id > diff_from.id and changeset.id <= diff_to.id})
|
||||
end
|
||||
if repository.tags
|
||||
repository.tags.each do |tag|
|
||||
Rails.logger.debug("Walking in tag: %s - %s" % [(repository.identifier or MAIN_REPOSITORY_IDENTIFIER), tag])
|
||||
walk(repository, tag, repository.latest_changesets("", tag, diff_to.id - diff_from.id)\
|
||||
.select { |changeset| changeset.id > diff_from.id and changeset.id <= diff_to.id})
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def generate_uri(repository, identifier, path)
|
||||
return url_for(:controller => 'repositories',
|
||||
:action => 'entry',
|
||||
:id => $project,
|
||||
:repository_id => repository.identifier,
|
||||
:rev => identifier,
|
||||
:path => repository.relative_path(path),
|
||||
:only_path => true)
|
||||
end
|
||||
|
||||
def print_and_flush(str)
|
||||
print str
|
||||
$stdout.flush
|
||||
end
|
||||
|
||||
|
||||
def convert_to_text(fpath, type)
|
||||
text = nil
|
||||
return text if !File.exists?(FORMAT_HANDLERS[type].split(' ').first)
|
||||
case type
|
||||
when "pdf"
|
||||
#fout=fpath.chomp(File.extname(fpath)) + ".txt"
|
||||
text=`#{FORMAT_HANDLERS[type]} #{fpath} -`
|
||||
#begin
|
||||
# text=File.read(fout)
|
||||
# File.unlink(fout)
|
||||
#rescue Exception => e
|
||||
# log("\tError: #{e.to_s} reading #{fout}", :level=>1)
|
||||
#end
|
||||
when /(xlsx|docx|odt|pptx)/i
|
||||
system "#{$unzip} -d #{$tempdir}/temp #{fpath} > /dev/null", :out=>'/dev/null'
|
||||
fout= "#{$tempdir}/temp/" + "xl/sharedStrings.xml" if type.eql?("xlsx")
|
||||
fout= "#{$tempdir}/temp/" + "word/document.xml" if type.eql?("docx")
|
||||
fout= "#{$tempdir}/temp/" + "content.xml" if type.eql?("odt")
|
||||
fout= "#{$tempdir}/temp/" + "docProps/app.xml" if type.eql?("pptx")
|
||||
begin
|
||||
text=File.read(fout)
|
||||
FileUtils.rm_rf("#{$tempdir}/temp")
|
||||
rescue Exception => e
|
||||
log("\tError: #{e.to_s} reading #{fout}", :level=>1)
|
||||
end
|
||||
else
|
||||
text = `#{FORMAT_HANDLERS[type]} #{fpath}`
|
||||
end
|
||||
return text
|
||||
end
|
||||
|
||||
def add_or_update_index(repository, identifier, path, lastrev, action, type)
|
||||
uri = generate_uri(repository, identifier, path)
|
||||
return unless uri
|
||||
text=nil
|
||||
if Redmine::MimeType.is_type?('text', path) #type eq 'txt'
|
||||
text = repository.cat(path, identifier)
|
||||
else
|
||||
fname = path.split( '/').last.tr(' ', '_')
|
||||
bstr = nil
|
||||
bstr = repository.cat(path, identifier)
|
||||
File.open( "#{$tempdir}/#{fname}", 'wb+') do | bs |
|
||||
bs.write(bstr)
|
||||
end
|
||||
text = convert_to_text( "#{$tempdir}/#{fname}", type) if File.exists?("#{$tempdir}/#{fname}") and !bstr.nil?
|
||||
File.unlink("#{$tempdir}/#{fname}")
|
||||
end
|
||||
#return delete_doc(uri) unless text
|
||||
#return unless text
|
||||
Rails.logger.debug "generated uri: " + uri.inspect
|
||||
#log("\t>Generated uri: #{uri.inspect}", :level=>1)
|
||||
Rails.logger.debug "Mime type text" if Redmine::MimeType.is_type?('text', path)
|
||||
log("\t>Indexing: #{path}", :level=>1)
|
||||
begin
|
||||
itext = Tempfile.new( "filetoindex.tmp", $tempdir )
|
||||
itext.write("url=#{uri.to_s}\n")
|
||||
if action != DELETE then
|
||||
sdate = lastrev.time || Time.at(0).in_time_zone
|
||||
itext.write("date=#{sdate.to_s}\n")
|
||||
body=nil
|
||||
text.force_encoding('UTF-8')
|
||||
text.each_line do |line|
|
||||
#Rails.logger.debug "inspecting line: " + line.inspect
|
||||
if body.blank?
|
||||
itext.write("body=#{line}")
|
||||
body=1
|
||||
else
|
||||
itext.write("=#{line}")
|
||||
end
|
||||
end
|
||||
#print_and_flush "A" if $verbose
|
||||
else
|
||||
#print_and_flush "D" if $verbose
|
||||
Rails.logger.debug "DEBUG path: %s should be deleted" % [path]
|
||||
end
|
||||
itext.close
|
||||
Rails.logger.debug "TEXT #{itext.path} generated "
|
||||
#@rwdb.close #Closing because of use of scriptindex
|
||||
Rails.logger.debug "DEBUG index cmd: #{$scriptindex} -s #{$user_stem_lang} #{$databasepath} #{$indexconf.path} #{itext.path}"
|
||||
system_or_raise("#{$scriptindex} -s english #{$databasepath} #{$indexconf.path} #{itext.path} " )
|
||||
itext.unlink
|
||||
Rails.logger.info ("New doc added to xapian database")
|
||||
rescue Exception => e
|
||||
Rails.logger.error("ERROR text not indexed beacause an error #{e.to_s}")
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
#include RepositoryIndexer
|
||||
|
||||
def log(text, options={})
|
||||
level = options[:level] || 0
|
||||
dtext=Time.now.asctime.to_s + ": #{text}"
|
||||
puts dtext unless $quiet or level > $verbose
|
||||
exit 1 if options[:exit]
|
||||
end
|
||||
|
||||
def system_or_raise(command)
|
||||
raise "\"#{command}\" failed" unless system command ,:out=>'/dev/null'
|
||||
end
|
||||
|
||||
def find_project(prt)
|
||||
#puts "find project for #{prt}"
|
||||
scope = Project.active.has_module(:repository)
|
||||
project = nil
|
||||
project = scope.find_by_identifier(prt)
|
||||
Rails.logger.debug "DEBUG: project found: #{project.inspect}"
|
||||
raise ActiveRecord::RecordNotFound unless project
|
||||
rescue ActiveRecord::RecordNotFound
|
||||
log("- ERROR project #{prt} not found", :level => 1)
|
||||
@project=project
|
||||
end
|
||||
|
||||
def create_dir(path)
|
||||
$rdo=0
|
||||
begin
|
||||
Dir.mkdir(path)
|
||||
sleep 1
|
||||
rescue SystemCallError
|
||||
$rdo=1
|
||||
end
|
||||
$rdo
|
||||
end
|
||||
|
||||
log("- Trying to load Redmine environment <<#{$environment}>>...", :level => 1)
|
||||
|
||||
begin
|
||||
require $environment
|
||||
rescue LoadError
|
||||
puts "\n\tRedmine #{$environment} cannot be loaded!! Be sure the redmine installation directory is correct!\n"
|
||||
puts "\tEdit script and correct path\n\n"
|
||||
exit 1
|
||||
end
|
||||
|
||||
include Rails.application.routes.url_helpers
|
||||
|
||||
log("- Redmine environment [RAILS_ENV=#{$env}] correctly loaded ...", :level => 1)
|
||||
|
||||
|
||||
if $test
|
||||
log("- Running in test mode ...")
|
||||
end
|
||||
|
||||
|
||||
# Indexing files
|
||||
if not $onlyrepos then
|
||||
if not File.exist?($omindex) then
|
||||
log("- ERROR! #{$omindex} does not exist, exiting...")
|
||||
exit 1
|
||||
end
|
||||
$stem_langs.each do | lang |
|
||||
$filespath=File.join($redmine_root, $dmsf_root)
|
||||
if not File.directory?($filespath) then
|
||||
log("- ERROR accessing #{$filespath}, exiting ...")
|
||||
exit 1
|
||||
end
|
||||
dbpath=File.join($dbrootpath,lang)
|
||||
if not File.directory?(dbpath)
|
||||
log("- ERROR! #{dbpath} does not exist, creating ...")
|
||||
if not create_dir(dbpath) then
|
||||
log("- ERROR! #{dbpath} can not be created!, exiting ...")
|
||||
exit 1
|
||||
end
|
||||
end
|
||||
log("- Indexing files under #{$filespath} with omindex stemming in #{lang} ...", :level=>1)
|
||||
system_or_raise ("#{$omindex} -s #{lang} --db #{dbpath} #{$filespath} --url / > /dev/null")
|
||||
end
|
||||
log("- Redmine files indexed ...", :level=>1)
|
||||
end
|
||||
|
||||
# Indexing repositories
|
||||
if not $onlyfiles then
|
||||
if not File.exist?($scriptindex) then
|
||||
log("- ERROR! #{$scriptindex} does not exist, exiting...")
|
||||
exit 1
|
||||
end
|
||||
$databasepath = File.join( $dbrootpath.rstrip, "repodb" )
|
||||
if not File.directory?($databasepath)
|
||||
log("Db directory #{$databasepath} does not exist, creating...")
|
||||
begin
|
||||
Dir.mkdir($databasepath)
|
||||
sleep 1
|
||||
rescue SystemCallError
|
||||
log("ERROR! #{$databasepath} can not be created!, exiting ...")
|
||||
exit 1
|
||||
end
|
||||
end
|
||||
$project=nil
|
||||
$projects.each do |proj|
|
||||
begin
|
||||
scope = Project.active.has_module(:repository)
|
||||
$project = scope.find_by_identifier(proj)
|
||||
raise ActiveRecord::RecordNotFound unless $project
|
||||
log("- Indexing repositories for #{$project.name} ...", :level=>1)
|
||||
$repositories = $project.repositories.select { |repository| repository.supports_cat? }
|
||||
$repositories.each do |repository|
|
||||
if repository.identifier.nil? then
|
||||
log("\t>Ignoring repo id #{repository.id}, repo has undefined identifier", :level=>1)
|
||||
else
|
||||
if !$userch.nil? then
|
||||
changeset=Changeset.where("revision='#{$userch}' and repository_id='#{repository.id}'").first
|
||||
update_log(repository,changeset,nil,nil) unless changeset.nil?
|
||||
end
|
||||
delete_log(repository) if ($resetlog)
|
||||
indexing(repository)
|
||||
end
|
||||
end
|
||||
rescue ActiveRecord::RecordNotFound
|
||||
log("- ERROR project identifier #{proj} not found, ignoring...", :level => 1)
|
||||
Rails.logger.error "Project identifier #{proj} not found "
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
exit 0
|
||||
Loading…
x
Reference in New Issue
Block a user