#!/usr/bin/env python
#-*- coding:utf-8 -*-

# (c) 2008 João S. O. Bueno
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

#

#
#  Projeto LDP-BR
#
# script:   muda-iniciais
#
# objetivo: Altera o estilo de maiúsculas de strings em .po
#
# 27/06/08 - Versão 1.0 - Criado por Joao S. O. Bueno (gwidion@mpc.com.br)
# 28/06/08 - Versão 1.1 - Adicionado o suporte a dicionario
                          # (sugestão Fernando Boaglio)
# 28/06/08 - Versão 1.2 - correções de bugs por Joao S. O. Bueno (gwidion@mpc.com.br)
# 29/06/08 - Versão 1.3 - refatoração do código principal para facilitar 
#                         leitura e correção de bugs (João)

import sys, os

VERSION = "1.3"

class MyDict(dict):
    def __getitem__(self, key):
        if isinstance(key, tuple):
            default = key[1]
            key = key[0]
        else:
            default = ""
        try:
            return dict.__getitem__(self,key)
        except KeyError:
            return default
            
WHITESPACE = (" ", "\t", "\n", "\r")

class Msgstr(object):
    """
    Recebe um bloco de linhas contendo uma msgstr
    -decide se a mensagem propriamente dita no bloco de linhas
      prcisa ser alterada
    - disponibiliza o bloco de linhas, alterado ou não
    - disponibilzia um contador ed alterações realizdas
    
    """
    #Variaeis de clase, incializadas para cada cada 
    #arquivo processado. 
    MNEMONICCHAR = u"_"
    set_of_capitalized_words = set()
    
    def __init__(self, po_lines):
        
        self.po_lines = po_lines
        #self.lines
        #self.line_wrappers = []

        self._extract_message_text()
        
        #self.alter
        
        self.changeable = True
        #self.changed_lines = []
        #self.chars_changed
        self.change_lines()
        self.changed = bool(self.chars_changed)

    def get_changed_lines(self):
        out_buffer = []
        for wrapper, line in zip(self.line_wrappers, self.changed_lines):
            out_buffer.append(wrapper % line)
        return out_buffer

    
    def change_lines(self):
        self.changed_lines = []
        self.chars_changed = 0
        line_num = 0 
        for line in self.lines:
            self.changed_lines.append(self._change_line(line_num, line))
            #para efeito de perdoar a primeira letra da Msgstr, só
            #contamos as linhas com conteúdo
            if self.changed_lines[-1]:
                line_num += 1
        #popriedade self.changeable é atualizada em _change_line
        if not self.changeable:
            self.changed_lines = self.lines
            self.chars_changed = 0

    def _next_word(self, position, line):
        return line[position:].split()[0]

    def _at_word_start(self, position, in_line):
        char = in_line[position]
        if not char.isalpha():
            return False
        if position == 0:
            return True
        if in_line[position - 1].isspace() or in_line[position - 1] == self.MNEMONICCHAR:
            return True
        return False
    
    def _next_word(self, position, in_line):
        return in_line[position:].split()[0]

    def _change_line (self, line_num, line):
        if not self.changeable:
            return line
        in_line = line.decode("utf-8")
        out_line_buffer = []
        at_first_word = True

        for position, char in enumerate(in_line):
            if self._at_word_start(position, in_line):
                this_char_changeable = True
                if line_num == 0 and at_first_word:
                    if  not char.isupper():
                        self.changeable = False
                        return
                    this_char_changeable = False
                next_word = self._next_word(position, in_line)

                if next_word in self.set_of_capitalized_words:
                    this_char_changeable = False
                elif len(next_word) >= 2 and self._isupper(next_word):
                    this_char_changeable = False
                elif char.islower():
                    this_char_changeable = False
                    if len(next_word) > 3 and next_word != "para":
                        #assume que a mensagem toda não está com maiusculas repetidas
                        #a preposição "para" pode não ter as inicial em maiuscula em
                        #strings desse tipo. Pode haver outras palvras nessa categoria
                        self.changeable = False
                        return
                at_first_word = False
            else:
                this_char_changeable = False

            if this_char_changeable:
                #e, a linha que manda na bodega toda:
                out_line_buffer.append(char.lower())
                self.chars_changed += 1
            else:
                out_line_buffer.append(char)

        return "".join(out_line_buffer).encode("utf-8")

    def _isupper(self, word):
        word = word.replace(self.MNEMONICCHAR, u"")
        return word.isupper()

    def _extract_message_text(self):
        self.lines = []
        self.line_wrappers = []
        for line in self.po_lines:
            wrapper, text = self.grab_quoted(line)
            self.lines.append(text)
            self.line_wrappers.append(wrapper)

    
    def get_text(self):
        return "".join(self.lines)
    def get_changed_text(self):
        return "".join(self.changed_lines)
    
    def grab_quoted(self, line):
        """separa o texto entre aspas duplas na linha passada"""
        start = line.find('"')
        end = line.rfind('"')
        if start == -1 or start == end:
            return "", None
        wrapper = line[:start+1] + "%s" + line[end:]
        return  wrapper, line[start + 1 : end]
    

    #changeable = property(self.is_changeable)
    text = property(get_text)
    changed_text = property(get_changed_text)

class Changer(object):
    """
    - Recebe streams de entrada e saída
    - Lê linha por linha essas strings -
    - junta os blocos de linha que contém as msgstrs, e passa-os 
      a um objeto da classe Msgstr para tratamento
    - se for o caso:
        - confirma a alteração de forma interativa
        - atualiza a linha que indica se a msgstr é fuzzy
    - senão: escreve o bloco inalterado
    - escreve as linhas na stream de saída
    - imprime as informações em sdterr, no nível de verbosidade desejado
    """
    def __init__(self, **kwargs):
        """
        atributos:
        self.in_stream - file stream de entrada
        self.out_stream - file stream de saída
       
        """
        object.__init__(self)
        options = MyDict(kwargs)
        self.message_stream = sys.stderr
        
        self.in_stream = options["in_stream"]
        self.out_stream = options["out_stream"]
        
        self.fuzzy = options["fuzzy", True]
        self.interactive = options["interactive", False]
        self.verbosity = options["verbosity", 1]
        
        #altera as variáveis da classe Msgstr- todas as instancias de Msgstr nesta
        #execução do programa:
        
        #FIXME: deveriam ser executados uma vez só para todos os arquivos.

        Msgstr.MNEMONICCHAR = options["mnemonic", "_"].decode("utf-8")
        Msgstr.set_of_capitalized_words = DictLoaderSet(options["dict_file", None])
        

    def ask_user(self, new, current):
        #FIXME: se o usuário abortar, deveriamos salvar as alterações já feitas
        #a alteração interativa pode ser tediosa.
        sys.stdout.write("Original:\n%s \n\nSugestão:\n%s\n\n" % (current, new))
        answer = raw_input("Alterar? [S/n]")
        if answer.lower() in ("", "s", "y"):
            return True
        return False
            
    def change_fuzzy_line(self, index, out_buffer):
        if "fuzzy" in out_buffer[index]:
            return
        if "c-format" in out_buffer[index]:
            out_buffer[index] = out_buffer[index][:-1] + ", fuzzy\n"
        else:
            out_buffer.insert(index + 1, "#, fuzzy\n")

    def commit_msgstr(self, out_buffer, msgstr_buffer, fuzzy_line_position, line=""):
        #this does all the processing as well:
        msgstr = Msgstr(msgstr_buffer)
        if msgstr.changed and (not self.interactive or 
            self.ask_user(msgstr.changed_text, msgstr.text)):
            out_buffer += msgstr.get_changed_lines()
            self.msgstrs_changed += 1
            self.chars_changed += msgstr.chars_changed
            if self.fuzzy:
                self.change_fuzzy_line (fuzzy_line_position, out_buffer)
            if self.verbosity == 2:
                    self.message_stream.write ("Linha %d:\nMensagem: \n %sAlterada para:\n %s \n\n\n" % (len(out_buffer),msgstr.text, msgstr.changed_text))
        else:
            out_buffer += msgstr_buffer
        out_buffer.append(line)
        
    def do_it(self):
        #total changes in file:
        self.chars_changed = 0
        self.msgstrs_changed = 0

        #line being processed is part of a msgstr:
        in_msgstr = False

        # position of the line that should containa  fuzzy statement inside the out_buffer
        fuzzy_line_position = 0

        #buffers output text,a cummllating copied and chanegd text:
        out_buffer = []
        for line in self.in_stream.readlines():
            #if current line is not part of a msfstr>
            if not in_msgstr:
                #is it the begniing of a msgstr?
                if line.startswith("msgstr"):
                    #print msgstrs_changed
                    in_msgstr = True
                    msgstr_buffer = []
                elif line.startswith("msgid"):
                    fuzzy_line_position = len(out_buffer) - 1
                    if out_buffer[fuzzy_line_position].startswith("msgctxt"):
                        fuzzy_line_position -= 1

            if not in_msgstr:
                out_buffer.append(line)
            else:
                if line.startswith("msgstr") or line[0] in ("\'", "\""):
                    msgstr_buffer.append(line)
                else:
                    in_msgstr = False
                    self.commit_msgstr(out_buffer, msgstr_buffer, fuzzy_line_position, line)
        
        #Fim do processamento do arquivo
        if in_msgstr: #o maldito arquivo podia ter um \n no final  !!!
                      #agora eu sei por que o gcc pede isso! :-)
            self.commit_msgstr(out_buffer, msgstr_buffer, fuzzy_line_position)
        #Fim da interação sobre as linhas do arquivo
        self.out_stream.write("".join(out_buffer))
        if self.verbosity >= 3:
            self.message_stream.write("".join(out_buffer))
        if self.verbosity >= 1:
            self.message_stream.write("   Alterados %d caracteres em %d mensagens\n\n" %  (self.chars_changed, self.msgstrs_changed))

    def close_streams(self):
        self.out_stream.close()
        self.in_stream.close()


class DictLoaderSet(set):
    def __init__(self, file_name="pt_BR-palavras-em-maiusculo.dic", encoding="iso-8859-1"):
        set.__init__(self)
        try:
            dict_file = open(file_name, "rt")
            for line in dict_file.xreadlines():
                self.add(line.decode(encoding).strip())
            dict_file.close()
        except IOError, error:
            sys.stderr.write ("""Aviso: não foi possível abrir o arquivo de dicionários de maiúsculas "%s":\n%s\nProsseguindo com dicionário em branco\n\n""" % (file_name, error))

   
def command_line_mode():
    from optparse import OptionParser
    
    usage = u"""prog [opções] [caminho_para/pt_BR.po outro_caminho_para/pt_BR.po]"""
    
    #FIXME: a formatação que o OptionParser impõe para a description atrapalha um pouco:
    #não há coo forçar quebras de linha na description.
    description= u"""Elimina a maiúsculas repetidas das mensagens de um arquivo .po,
    colocando-o em conformidade com as regras da ABL, como acordado no projeto LDP-BR.
    Use "-" como nome de arquivo para processar <stdin>. Esse script faz parte do projeto
    LDP-BR.  Autor - João S. O. Bueno, 2008, licenciado sob a GPL v.3.0
"""
    parser = OptionParser(usage=usage, description=description, version="%%prog v%s" % VERSION )
    
    parser.add_option("-n", "--no-fuzzy", dest="fuzzy",
                      action="store_false",
                      default=True, 
                      help = u"Não marca as mensagens alteradas pelo programa como fuzzy. (Por padrão elas são marcadas.)")

    parser.add_option("-i", "--interactive", dest="interactive",
                      action="store_true",
                      default=False,
                      help=u"Confirma cada mudança de mensagem")

    parser.add_option("-m", "--mnemonic", dest="mnemonic",
                      action="store",
                      default="_",
                      help=u"Caractére que representa os mnemônicos aceleradores (por padrão \"_\")")
                      
    parser.add_option("--dry-run", dest="dry_run",
                      action="store_true",
                      default=False,
                      help=u"Apenas exibe o que seria alterado, não grava alterações")
                      
    parser.add_option("--output-file", "-o", dest="output",
                      default=[], action="append",
                      help=u"Arquivo destino. Por padrão é o próprio arquivo de entrada. Se houver mais de um arquivo de entrada, deve haver um número correspondente de opções '-o'. Use o nome '-' para <stdout>")
                      
    parser.add_option("--no-backup", dest="no_backup",
                      action="store_true",
                      default=False,
                      help=u"Se especificado, não cria uma cópia de segurança do arquivo .po"
                     )
    parser.add_option("--verbosity", "-v", dest="verbosity", type="int",
                      default=1,
                      help=u"Nível de verbosidade (0-3), padrão: 1"
                     )
    parser.add_option("--dict", dest="dict_file",
                      default="pt_BR-palavras-em-maiusculo.dic",
                      help=u"Arquivo com dicionário de palávras maiúsculas. (1 por linha, codificação iso-8859-1)"
                     )
    parser.add_option("--quiet", "-q", dest="quiet",
                      action="store_true",
                      default=False,
                      help=u"Não gera saída (equivale a --verbosity=0)"
                     )
    options, files_to_change = parser.parse_args()
    
    if options.quiet:
        options.verbosity = 0
    
    if options.interactive and "-" in files_to_change:
        parser.error("Não é possível confirmar as alterações interativamente se <stdin> é usado como arquivo de entrada")
        sys.exit(1)
    if "-" in files_to_change and len(files_to_change) > 1:
        parser.error("Não é possível combinar outros arquivos com <stdin> (\"-\") ")
        sys.exit(2)
    output_files = files_to_change
    if options.output:
        if "-" in options.output and len(options.output) == 1:
            output_files = ["-"] * len(files_to_change)
        elif len(options.output) != len(files_to_change):
            parser.error("Número de arquivos de saída difere do número de arquivos de entrada!")
            sys.exit(3)
        else:
            output_files = options.output

    if not files_to_change:
        parser.print_help()
        sys.exit(0)

    #### começo da ação:
    changer = Changer(fuzzy=options.fuzzy, 
                      interactive=options.interactive,
                      verbosity=options.verbosity,
                      mnemonic=options.mnemonic,
                      dict_file=options.dict_file)
    for input_file, output_file in zip(files_to_change, output_files):
        new_input = ""

        if input_file == "-":
            in_stream = sys.stdin
        else:
            in_stream = open(input_file, "rt")

        if options.dry_run:
            if os.name == "posix":
                out_stream = open("/dev/null", "wt")
            else:
                import cStringIO
                out_stream = cStringIO.StringIO()
        elif output_file == "-":
            out_stream = sys.stdout
        elif input_file == output_file:
            #se alguma dessas operaçõe de arquivo der ume rro, deixe
            # o programa abortar -  as mensagens padrão do python
            # devem ser o suficiente para o usuário acertar a 
            #situação
            new_output = output_file + "#"
            if os.path.exists(new_output):
                os.unlink(new_output)
            out_stream = open(new_output, "wt")
        else:
            out_stream = open(output_file, "wt")
        changer.in_stream = in_stream
        changer.out_stream = out_stream
        
        if options.verbosity >= 1:
            sys.stderr.write("Lendo de %s e escrevendo em %s\n" % (input_file, output_file))

        changer.do_it()
        
        changer.close_streams()
        if input_file == output_file and not options.dry_run and not output_file == "-":
            new_input = input_file + "~"
            if os.path.exists(new_input):
                os.unlink(new_input)
            os.rename(input_file, new_input)
            os.rename(new_output, input_file)
        
        if new_input and options.no_backup:
            os.unlink(new_input)
    
if __name__ == "__main__":
    command_line_mode()