" File: AutoFenc.vim " Brief: Tries to automatically detect file encoding. " Author: Petr Zemek " Version: 1.6-dev " " Description: " A Vim plugin that tries to automatically detect and set file encoding when " opening a file. See https://github.com/s3rvac/AutoFenc for more details. " " License: " Copyright (C) 2009-2016 Petr Zemek " " This program is free software: you can redistribute it and/or modify it " under the terms of the GNU General Public License as published by the Free " Software Foundation, either version 2 of the License, or (at your option) " any later version. " " This program is distributed in the hope that it will be useful, but WITHOUT " ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or " FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for " more details. " " You should have received a copy of the GNU General Public License " along with this program. If not, see . " Check if the plugin was already loaded. Also, die gracefully if the used Vim " version is too old. if exists('autofenc_loaded') || v:version < 700 finish endif " Make the loaded variable actually useful by including the version number. let autofenc_loaded = '1.6-dev' " This plugin uses line continuations. if &cpo =~ 'C' let s:oldcpo = &cpo set cpo-=C endif "------------------------------------------------------------------------------- " Checks whether the selected variable (first parameter) is already set and " if not, it sets it to the value of the second parameter. "------------------------------------------------------------------------------- function s:CheckAndSetVar(var, value) if !exists(a:var) exec 'let '.a:var.' = '.string(a:value) endif endfunction " Configuration options initialization (see the script description for more " information). call s:CheckAndSetVar('g:autofenc_enable', 1) call s:CheckAndSetVar('g:autofenc_emit_messages', 0) call s:CheckAndSetVar('g:autofenc_max_file_size', 10485760) call s:CheckAndSetVar('g:autofenc_disable_for_files_matching', '^[-_a-zA-Z0-9]\+://') call s:CheckAndSetVar('g:autofenc_disable_for_file_types', []) call s:CheckAndSetVar('g:autofenc_autodetect_bom', (&fileencodings !~# 'ucs-bom')) call s:CheckAndSetVar('g:autofenc_autodetect_html', 1) call s:CheckAndSetVar('g:autofenc_autodetect_html_filetypes', '^\(html.*\|xhtml\|aspperl\|aspvbs\|cf\|dtml\|gsp\|jsp\|mason\|php\|plp\|smarty\|spyce\|webmacro\)$') call s:CheckAndSetVar('g:autofenc_autodetect_xml', 1) call s:CheckAndSetVar('g:autofenc_autodetect_xml_filetypes', '^\(xml\|xquery\|xsd\|xslt\?\|ant\|dsl\|mxml\|svg\|wsh\|xbl\)$') call s:CheckAndSetVar('g:autofenc_autodetect_css', 1) call s:CheckAndSetVar('g:autofenc_autodetect_css_filetypes', '^\(css\|sass\)$') call s:CheckAndSetVar('g:autofenc_autodetect_comment', 1) call s:CheckAndSetVar('g:autofenc_autodetect_commentexpr', '\c^.*\<\(\(\(file\)\?en\)\?coding\|charset\)\s*[:=]\?\s*\<\zs[-A-Za-z0-9_]\+') call s:CheckAndSetVar('g:autofenc_autodetect_num_of_lines', 5) call s:CheckAndSetVar('g:autofenc_autodetect_ext_prog', 1) call s:CheckAndSetVar('g:autofenc_ext_prog_path', 'enca') call s:CheckAndSetVar('g:autofenc_ext_prog_args', '-i') call s:CheckAndSetVar('g:autofenc_ext_prog_unknown_fenc', '???') call s:CheckAndSetVar('g:autofenc_enc_blacklist', '') "------------------------------------------------------------------------------- " Normalizes the selected encoding and returns it, so it can be safely used as " a new encoding. This function should be called before a new encoding is set. "------------------------------------------------------------------------------- function s:NormalizeEncoding(enc) let nenc = tolower(a:enc) " The recent versions of the TOhtml runtime plugin have some nice charset " to encoding functions which even allow user overrides. Use them if " available. let nenc2 = "" silent! let nenc2 = tohtml#EncodingFromCharset(nenc) if nenc2 != "" return nenc2 " If the TOhtml function is unavailable, at least handle some canonical " encoding names in Vim. elseif nenc =~ 'iso[-_]8859-1' return 'latin1' elseif nenc =~ 'iso[-_]8859-2' return 'latin2' elseif nenc ==? 'gb2312' return 'cp936' " GB2312 imprecisely means CP936 in HTML elseif nenc =~ '\(cp\|win\(dows\)\?\)-125\d' return 'cp125'.nenc[strlen(nenc)-1] elseif nenc == 'utf8' return 'utf-8' elseif g:autofenc_emit_messages && nenc !~ '^\%(8bit-\|2byte-\)\?\%(latin[12]\|utf-8\|utf-16\%(le\)\?\|ucs-[24]\%(le\)\?\|iso-8859-\d\{1,2}\|cp\d\{3,4}\)$' echomsg 'AutoFenc: Detected unrecognized charset, trying fenc='.nenc endif return nenc endfunction "------------------------------------------------------------------------------- " Sets the selected file encoding. Returns 1 if the file was reloaded, " 0 otherwise. "------------------------------------------------------------------------------- function s:SetFileEncoding(enc) let nenc = s:NormalizeEncoding(a:enc) " Check whether we're not trying to set the current file encoding. if nenc != "" && nenc !=? &fenc if exists('&autochdir') && &autochdir " Other autocmds may have changed the window's working directory; " when 'autochdir' is set, the :edit will reset that, so " temporarily disable the setting. let old_autochdir = &autochdir set noautochdir endif try " Set the file encoding and reload it, keeping any user-specified " fileformat, and keeping any bad bytes in case the header is wrong " (this won't let the user save if a conversion error happened on " read). exec 'edit ++enc='.nenc.' ++ff='.&ff.' ++bad=keep' finally if exists('old_autochdir') let &autochdir = old_autochdir endif endtry " The file was reloaded. return 1 else " The file was not reloaded. return 0 endif endfunction "------------------------------------------------------------------------------- " Tries to detect a BOM (byte order mark) at the beginning of the file to " detect multibyte encodings. If there is a BOM, it returns the appropriate " encoding, otherwise the empty string is returned. "------------------------------------------------------------------------------- function s:BOMEncodingDetection() " The implementation of this function is based on a part of the " FencsView.vim plugin by Ming Bai " (http://www.vim.org/scripts/script.php?script_id=1708). " Get the first line of the file. let file_content = readfile(expand('%:p'), 'b', 1) if file_content == [] " Empty file return '' endif let first_line = file_content[0] " Check whether it contains BOM and if so, return appropriate encoding. " Note: If the index is out of bounds, ahx is set to '' automatically. let ah1 = first_line[0] let ah2 = first_line[1] let ah3 = first_line[2] let ah4 = first_line[3] " TODO: I don't know why but if there is a NUL byte, the char2nr() function " transforms it to a newline (0x0A) instead of 0x00... let a1 = char2nr(ah1) == 0x0A ? 0x00 : char2nr(ah1) let a2 = char2nr(ah2) == 0x0A ? 0x00 : char2nr(ah2) let a3 = char2nr(ah3) == 0x0A ? 0x00 : char2nr(ah3) let a4 = char2nr(ah4) == 0x0A ? 0x00 : char2nr(ah4) if a1.a2.a3.a4 == 0x00.0x00.0xfe.0xff return 'utf-32' elseif a1.a2.a3.a4 == 0xff.0xfe.0x00.0x00 return 'utf-32le' elseif a1.a2.a3 == 0xef.0xbb.0xbf return 'utf-8' elseif a1.a2 == 0xfe.0xff return 'utf-16' elseif a1.a2 == 0xff.0xfe return 'utf-16le' endif " There was no legal BOM. return '' endfunction "------------------------------------------------------------------------------- " Tries the HTML way of encoding detection of the current file and returns the " detected encoding (or the empty string, if the encoding was not detected). "------------------------------------------------------------------------------- function s:HTMLEncodingDetection() " This method is based on the meta tag in the head of the HTML document " () " Store the actual position in the file and move to the very first line in " the file. let curpos=getpos('.') keepjumps 1 let enc = '' let charset_line = search('\c skipping" endif return '' endif " Get full path of the currently edited file. let file_path = expand('%:p') " Create the complete external program command by appending program " arguments and the current file path to the external program. let ext_prog_cmd = g:autofenc_ext_prog_path.' '.g:autofenc_ext_prog_args.' '.s:SafeShellescape(file_path) " Run it to get the encoding. let enc = system(ext_prog_cmd) if v:shell_error != 0 " An error occurred. if g:autofenc_emit_messages echomsg "AutoFenc: Command `".ext_prog_cmd."` failed with error code ".v:shell_error." -> skipping" endif return '' endif " Remove trailing newline from the output (system() removes any \r from " the result automatically). let enc = substitute(enc, '\n', '', '') if enc != g:autofenc_ext_prog_unknown_fenc " The encoding was (probably) detected successfully. return enc endif return '' endfunction "------------------------------------------------------------------------------- " Tries to detect encoding of the current file via several ways (according " to the configuration) and returns it. If the encoding was not detected " successfully, it returns the empty string - this can happen because: " - the file is in unknown encoding " - the file is not stored anywhere (e.g. a new file was opened) " - autodetection is disabled for this file (either the file is too large " or autodetection is disabled for this file, see configuration) "------------------------------------------------------------------------------- function s:DetectFileEncoding() " Check whether the autodetection should be performed (see function " description for more information). let file_path = expand('%:p') let file_size = getfsize(file_path) if file_path == '' || \ file_size > g:autofenc_max_file_size || file_size < 0 || \ file_path =~ g:autofenc_disable_for_files_matching || \ index(g:autofenc_disable_for_file_types, &ft, 0, 1) != -1 return '' endif " BOM encoding detection. if g:autofenc_autodetect_bom let enc = s:BOMEncodingDetection() if enc != '' return enc endif endif " HTML encoding detection. if g:autofenc_autodetect_html && &filetype =~? g:autofenc_autodetect_html_filetypes let enc = s:HTMLEncodingDetection() if enc != '' return enc endif endif " XML encoding detection. if g:autofenc_autodetect_xml && &filetype =~? g:autofenc_autodetect_xml_filetypes let enc = s:XMLEncodingDetection() if enc != '' return enc endif endif " CSS encoding detection. if g:autofenc_autodetect_css && &filetype =~? g:autofenc_autodetect_css_filetypes let enc = s:CSSEncodingDetection() if enc != '' return enc endif endif " Comment encoding detection. if g:autofenc_autodetect_comment let enc = s:CommentEncodingDetection() if enc != '' return enc endif endif " External program encoding detection. if g:autofenc_autodetect_ext_prog let enc = s:ExtProgEncodingDetection() if enc != '' return enc endif endif " The encoding was not detected. return '' endfunction "------------------------------------------------------------------------------- " Main plugin function. Tries to autodetect the correct file encoding and sets " the detected one (if any). If the ASCII encoding is detected, it does nothing " so allow Vim to set its internal encoding instead. "------------------------------------------------------------------------------- function s:DetectAndSetFileEncoding() let enc = s:DetectFileEncoding() " Don't call again on the nested trigger from the edit. let b:autofenc_done = enc if (enc != '') && (enc !=? 'ascii') && \ (g:autofenc_enc_blacklist == '' || enc !~? g:autofenc_enc_blacklist) if s:SetFileEncoding(enc) if g:autofenc_emit_messages echomsg "AutoFenc: Detected [".enc."] from file, loaded with fenc=".&fenc endif endif endif endfunction " Set the detected file encoding. if g:autofenc_enable augroup AutoFenc au! " We need to check that we're not in the middle of a reload due to this " plugin otherwise can recurse forever. But unlet the variable to allow " re-detection on the next read of this buffer if it is just unloaded. au BufRead * nested \ if !exists('b:autofenc_done') | \ if v:cmdarg !~ '++enc' | \ call s:DetectAndSetFileEncoding() | \ endif | \ else | \ unlet b:autofenc_done | \ endif augroup END endif " Restore line continuations (and the rest of &cpo) when done. if exists('s:oldcpo') let &cpo = s:oldcpo unlet s:oldcpo endif " vim: noet