#! /usr/bin/env python3

#       This program is free software; you can redistribute it and/or modify
#       it under the terms of the GNU General Public License as published by
#       the Free Software Foundation; either version 3 of the License, or
#       (at your option) any later version.
#
#       This program is distributed in the hope that it will be useful,
#       but WITHOUT ANY WARRANTY; without even the implied warranty of
#       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#       GNU General Public License for more details.
#
#       You should have received a copy of the GNU General Public License
#       along with this program; if not, write to the Free Software
#       Foundation, Inc.

import optparse
import os
import queue
import shutil
import sys
import threading
import time

# Check if we are working in the source tree or from an installed package
# When in the source tree, adjust the python path accordingly for importation of
# djvubind modules from source.
loc = os.path.realpath(__file__)
loc = os.path.dirname(loc)
loc = os.path.join(loc+'/', '../djvubind')
loc = os.path.normpath(loc)
if os.path.isdir(loc):
    sys.path.insert(0, os.path.dirname(loc))

import djvubind.encode
import djvubind.ocr
import djvubind.organizer
import djvubind.utils


class ThreadAnalyze(threading.Thread):
    def __init__(self, q):
        threading.Thread.__init__(self)
        self.queue = q

        self.quit = False

    def run(self):
        while not self.quit:
            try:
                # Process the page
                page = self.queue.get()
                page.is_bitonal()
                page.get_dpi()
            except queue.Empty:
                self.quit = True
            except:
                msg = 'wrn: Analysis failure on {0}.'.format(os.path.split(page.path)[1])
                msg = djvubind.utils.color(msg, 'red')
                print(msg, file=sys.stderr)
                sys.exit(1)
            finally:
                self.queue.task_done()

class ThreadOCR(threading.Thread):
    def __init__(self, q, ocr):
        threading.Thread.__init__(self)
        self.queue = q
        self.ocr = ocr

        self.quit = False

    def run(self):
        while not self.quit:
            try:
                # Process the page
                page = self.queue.get()
                boxing = self.ocr.analyze(page.path)
                page.text = djvubind.ocr.translate(boxing)
            except queue.Empty:
                self.quit = True
            except:
                msg = 'wrn: OCR failure on {0} - This page will have no OCR content.'.format(os.path.split(page.path)[1])
                msg = djvubind.utils.color(msg, 'red')
                print(msg, file=sys.stderr)
            finally:
                self.queue.task_done()


class Project:
    """
    Abstraction of the entire project.  This should make things like status
    reports, clean exits on errors, and access to information a little easier.
    """

    def __init__(self, opts):
        self.get_config(opts)

        self.out = os.path.abspath('book.djvu')

        self.book = djvubind.organizer.Book()
        self.enc = djvubind.encode.Encoder(self.opts)
        #self.ocr = djvubind.ocr.OCR(self.opts)
        self.ocr = djvubind.ocr.engine(self.opts['ocr_engine'], self.opts['tesseract_options'])

    def add_file(self, filename, type='page'):
        """
        Adds a file to the project.
        type can be 'page', 'cover_front', 'cover_back', 'metadata', or 'bookmarks'.
        """

        # Check that type is valid and file exists.
        if type not in ['page', 'cover_front', 'cover_back', 'metadata', 'bookmarks']:
            msg = 'err: Project.add_file(): type "{0}" is unknown.'.format(type)
            print(msg, file=sys.stderr)
            sys.exit(1)
        if not os.path.isfile(filename):
            msg = 'err: Project.add_file(): "{0}" does not exist or is not a file.'.format(filename)
            print(msg, file=sys.stderr)
            sys.exit(1)

        # Hand the files over to self.book to manage.
        if type == 'page':
            self.book.insert_page(filename)
        else:
            self.book.suppliments[type] = filename

        return None

    def analyze(self):
        """
        Retrieve and store information about each image (dpi, bitonal, etc.).
        """

        # Create queu and populate with pages to process
        q = queue.Queue()
        for i in self.book.pages:
            q.put(i)

        # Create threads to process the pages in queue
        print('  Spawning {0} processing threads.'.format(self.opts['cores']))
        for i in range(self.opts['cores']):
            p = ThreadAnalyze(q)
            p.daemon = True
            p.start()

        # Wait for everything to digest.  Note that we don't simply call q.join()
        # because it blocks, preventing something like ctrl-c from killing the program.
        pagecount = len(self.book.pages)
        while not q.empty():
            try:
                time.sleep(3)
                # Report completion percentage.
                # N.b., this is perfect, since queue.qsize() isn't completely reliable in a threaded
                # environment, but it will do well enough to give the user and idea of where we are.
                position = ( (pagecount - q.qsize() - self.opts['cores']) / pagecount ) * 100
                print('  {0:.2f}% completed.       '.format(position), end='\r')
            except KeyboardInterrupt:
                print('')
                sys.exit(1)
        q.join()

        return None

    def bind(self):
        """
        Fully encodes all images into a single djvu file.  This includes adding
        known ocr information, covers, metadata, etc.
        """

        self.enc.enc_book(self.book, self.out)

        return None

    def get_config(self, opts):
        """
        Retrives configuration options set in the user's config file.  Options
        passed through the command line (already in 'opts') should be
        translated and overwrite config file options.
        """

        # Set default options
        self.opts = {'cores':-1,
                     'ocr':True,
                     'ocr_engine':'tesseract',
                     'cuneiform_options':'',
                     'tesseract_options':'',
                     'bitonal_encoder':'cjb2',
                     'color_encoder':'csepdjvu',
                     'c44_options':'',
                     'cjb2_options':'-lossy',
                     'cpaldjvu_options':'',
                     'csepdjvu_options':'',
                     'minidjvu_options':'--match -pages-per-dict 100',
                     'title_start':False,
                     'title_exclude':{},
                     'title_uppercase':False,
                     'win_path':'C:\\Program Files\\DjVuZone\\DjVuLibre\\'}

        # Load the global config file first
        if not sys.platform.startswith('win'):
            filename = '/etc/djvubind/config'
            if os.path.isfile(filename):
                config_opts = djvubind.utils.parse_config(filename)
                self.opts.update(config_opts)

        # Load the options from the user's config file, if it exists.
        if sys.platform.startswith('win'):
            filename = os.path.expanduser('~\\Application Data\\djvubind\\config')
        else:
            filename = os.path.expanduser('~/.config/djvubind/config')
        filename = os.path.normpath(filename)
        if os.path.isfile(filename):
            config_opts = djvubind.utils.parse_config(filename)
            self.opts.update(config_opts)
        else:
            if os.path.isfile('/etc/djvubind/config'):
                conf_dir = os.path.expanduser('~/.config/djvubind')
                if not os.path.isdir(conf_dir):
                    os.makedirs(conf_dir)
                shutil.copy('/etc/djvubind/config', filename)
                config_opts = djvubind.utils.parse_config(filename)
                self.opts.update(config_opts)
            else:
                msg = 'msg: Project.get_config(): No user config file found ({0}).'.format(filename)
                msg = msg + '\n' + 'A sample config file is included in the source (docs/config).'
                print(msg, file=sys.stderr)


        # Set cetain variables to the proper type
        self.opts['cores'] = int(self.opts['cores'])
        self.opts['ocr'] = (self.opts['ocr'] == 'True')

        # Overwrite or create values for certain command line options
        if opts.no_ocr:
            self.opts['ocr'] = False
        if opts.ocr_engine is not None:
            self.opts['ocr_engine'] = opts.ocr_engine
        if opts.tesseract_options is not None:
            self.opts['tesseract_options'] = opts.tesseract_options
        if opts.cuneiform_options is not None:
            self.opts['cuneiform_options'] = opts.cuneiform_options
        if opts.title_start:
            self.opts['title_start'] = opts.title_start
        for special in opts.title_exclude:
            if ':' in special:
                special = special.split(':')
                self.opts['title_exclude'][special[0]] = special[1]
            else:
                self.opts['title_exclude'][special] = None
        self.opts['title_uppercase'] = opts.title_uppercase
        self.opts['verbose'] = opts.verbose
        self.opts['quiet'] = opts.quiet

        # Detect number of cores if not manually set already
        if self.opts['cores'] == -1:
            self.opts['cores'] = djvubind.utils.cpu_count()

        # Update windows PATH so that we can find the executable we need.
        if sys.platform.startswith('win'):
            if self.opts['win_path'] != '':
                os.environ['PATH'] = '{0};{1}'.format(self.opts['win_path'], os.environ['PATH'])

        if self.opts['verbose']:
            print('Executing with these parameters:')
            print(self.opts)
            print('')

        return None

    def get_ocr(self):
        """
        Performs optical character analysis on all images, excluding covers.
        """

        if not self.opts['ocr']:
            print('  OCR is disabled and will be skipped.')
            return None

        # Create queu and populate with pages to process
        q = queue.Queue()
        for i in self.book.pages:
            q.put(i)

        # Create threads to process the pages in queue
        print('  Spawning {0} processing threads.'.format(self.opts['cores']))
        for i in range(self.opts['cores']):
            p = ThreadOCR(q, self.ocr)
            p.daemon = True
            p.start()

        # Wait for everything to digest.  Note that we don't simply call q.join()
        # because it blocks, preventing something like ctrl-c from killing the
        # program.
        pagecount = len(self.book.pages)
        while not q.empty():
            try:
                time.sleep(3)
                # Report completion percentage.
                # N.b., this is not perfect, since queue.qsize() isn't completely reliable in a threaded
                # environment, but it will do well enough to give the user and idea of where we are.
                position = ( (pagecount - q.qsize() - self.opts['cores']) / pagecount ) * 100
                print('  {0:.2f}% completed.       '.format(position), end='\r')
            except KeyboardInterrupt:
                print('')
                sys.exit(1)
        q.join()

        return None


if __name__ == '__main__':
    version  = '1.2.1'
    version += ' ( $Revision: 4e797677f1bc $)'.replace('$', '')

    # Command line parsing
    usage = "usage: %prog [options] directory"
    description = "djvubind is designed to facilitate creating high-quality djvu files, including positional ocr, metadata, and bookmarks."
    parser = optparse.OptionParser(usage, version=version, description=description)
    parser.set_defaults(quiet=False, verbose=False,
                        no_ocr=False, ocr_engine=None, tesseract_options=None, cuneiform_options=None,
                        cover_front='cover_front.jpg', cover_back='cover_back.jpg',
                        metadata='metadata', bookmarks='bookmarks',
                        title_start=False, title_exclude=[], title_uppercase=False)
    parser.add_option("--cover-front", dest="cover_front", help="Specifies an alternate front cover image.  By default, '%default' is used if present.")
    parser.add_option("--cover-back", dest="cover_back", help="Specifies an alternate back cover image.  By default, '%default' is used if present.")
    parser.add_option("--metadata", dest="metadata", help="Specifies an alternate metadata file.  By default, '%default' is used if present.")
    parser.add_option("--bookmarks", dest="bookmarks", help="Specifies an alternate bookmarks file.  By default, '%default' is used if present.")
    parser.add_option("--no-ocr", action="store_true", dest="no_ocr", help="Images will not be processed for text content.")
    parser.add_option("--ocr-engine", dest="ocr_engine", help="Select which ocr engine to use (cuneiform|tesseract).  By default, '%default' is used.")
    parser.add_option("--tesseract-options", dest="tesseract_options", help="Additional command line options to pass to tesseract.")
    parser.add_option("--cuneiform-options", dest="cuneiform_options", help="Additional command line options to pass to cuneiform.")
    parser.add_option("--title-start", dest="title_start", help="The image filename that is page 1.  Pages before this will titled with roman numerals.")
    parser.add_option("--title-exclude", action="append", dest="title_exclude", help="An image that should be excluded from page numbering.  An alternate title can be provided after a colon (e.g. page_01.tif:cover).")
    parser.add_option("--title-uppercase", action="store_true", dest="title_uppercase", help="Use uppercase roman numerals instead of lowercase.")
    parser.add_option("-q", "--quiet", action="store_true", dest="quiet")
    parser.add_option("-v", "--verbose", action="store_true", dest="verbose")
    (options, args) = parser.parse_args(sys.argv)

    if options.quiet:
        sys.stdout = open(os.devnull, 'w')
    if options.verbose:
        print('djvubind version {0} on {1}'.format(version, sys.platform))

    # Sanity checks on command line arguments and options
    if len(args) == 2:
        if not os.path.isdir(args[1]):
            print('The argument ({0}) is not a directory.'.format(args[1]), file=sys.stderr)
            sys.exit(1)
    elif (len(args) > 2):
        print('Too many arguments, check your command syntax.', file=sys.stderr)
        sys.exit(1)

    # Project needs to be initialized before doing dependency checks, since the
    # configuration file may supply PATH updates for Window environments.
    proj = Project(options)

    # Dependency check
    # N.B. checks for ocr engines *should* take place in ocr.OCR(), since which
    # ones are needed requires knowledge of config preferences, bitonal/nonbitonal, etc.
    # Likewise for encoders other than djvulibre tools (albeit in encode.Encode())
    deps = ['cpaldjvu', 'cjb2', 'djvm', 'djvused', 'identify']
    for dep in deps:
        if (not djvubind.utils.is_executable(dep)):
            print('err: __main__: external dependency ({0}) cannot be found.'.format(dep), file=sys.stderr)
            sys.exit(1)

    # Increment the file name if a previous book.djvu already exists.
    i = 0
    while os.path.isfile(proj.out):
        i = i + 1
        proj.out = 'book(' + str(i) + ').djvu'
        proj.out = os.path.abspath(proj.out)

    # Change to working directory if necessary
    if len(args) == 2:
        os.chdir(args[1])

    # Add files to the project
    print('{0} Collecting files to be processed.'.format(djvubind.utils.color('*', 'green')))
    counter = djvubind.utils.counter(start=1, roman=True)
    if os.path.isfile(options.cover_front):
        proj.add_file(options.cover_front, 'cover_front')
    if os.path.isfile(options.cover_back):
        proj.add_file(options.cover_back, 'cover_back')
    if os.path.isfile(options.metadata):
        proj.add_file(options.metadata, 'metadata')
    if os.path.isfile(options.bookmarks):
        proj.add_file(options.bookmarks, 'bookmarks')
    for filename in djvubind.utils.list_files():
        ext = filename.split('.')[-1]
        ext = ext.lower()
        if (ext in ['tif', 'tiff', 'pnm', 'pbm', 'pgm', 'ppm']) and (filename not in [options.cover_front, options.cover_back]):
            proj.add_file(filename, 'page')
            if proj.opts['title_start'] is not False:
                filename = os.path.basename(filename)
                if proj.opts['title_start'] == filename:
                    counter = djvubind.utils.counter(start=1)
                if filename in proj.opts['title_exclude']:
                    proj.book.pages[-1].title = proj.opts['title_exclude'][filename]
                else:
                    proj.book.pages[-1].title = next(counter)
                    if proj.opts['title_uppercase']:
                        proj.book.pages[-1].title = proj.book.pages[-1].title.upper()

    # Check that titles are not being specified without a starting page
    if (proj.opts['title_start'] is False) and (proj.opts['title_exclude'] != {}):
        msg = 'err: --title-exclude may only be used with --title-start.'
        msg = djvubind.utils.color(msg, 'red')
        print(msg, file=sys.stderr)
        sys.exit(1)

    # Check for duplicate titles
    titles = {}
    for key in proj.book.pages:
        value = key.title
        if value in titles and value is not None:
            titles[value] += 1
        else:
            titles[value] = 1
    dups = [key for key in titles if titles[key]>1]
    if len(dups):
        msg = 'err: The same title cannot be used for multiple pages. This is a limitation of djvused. Duplicated titles are listed below:'
        msg = djvubind.utils.color(msg, 'red')
        print(msg, file=sys.stderr)
        for dup in dups:
            print(dup, file=sys.stderr)
            sys.exit(1)

    if len(proj.book.pages) == 0:
        print('  No files found to bind.')
        sys.exit(0)
    else:
        print('  Binding a total of {0} file(s).'.format(len(proj.book.pages)))

    print('{0} Analyzing image information.'.format(djvubind.utils.color('*', 'green')))
    proj.analyze()
    proj.book.get_dpi()

    print('{0} Performing optical character recognition.'.format(djvubind.utils.color('*', 'green')))
    proj.get_ocr()

    #proj.book.save_report()

    print('{0} Encoding all information to {1}.'.format(djvubind.utils.color('*', 'green'), proj.out))
    proj.bind()


