indent-region-excluding-pre-with-html

EmacsでePubファイルジェネレーターのようなものを作っていたのですが、(indent-region (point-min) (point-max))した時にpreタグ内までインデントされてしまったので、それを回避するための関数を書きました。indent-regionでは、indent-region-functionが設定されていると、その関数が呼ばれるようになっています。

まずは、モードフック時にバッファーローカルなindent-region-functionを設定します。

(defun set-indent-region-function-for-html ()
  (if (or (string-match "HTML" (format-mode-line mode-name))
          (and
           (string-match "nXML" (format-mode-line mode-name))
           (and rng-current-schema-file-name
                (string-match "html" rng-current-schema-file-name))))
      (setq-local indent-region-function 'indent-region-excluding-pre-with-html)
    (setq-local indent-region-function nil)))

メインの処理の流れはindent-regionのデフォルトと同様ですが、preタグそのものを含む行とpreタグ内外の行でそれぞれインデントするかどうかが考えられます。

(defun indent-region-excluding-pre-with-html (start end)
  (save-excursion
    (setq end (copy-marker end))
    (goto-char start)
    (let ((pred-inside-of-pre-func
           (if (string-match "HTML" (format-mode-line mode-name))
                'predicate-inside-of-pre-with-sgml
             'predicate-inside-of-pre-with-nxml))
          line)
      (while (and (>= end (point))
                  (not (eobp)))
        (unless (and (bolp) (eolp))
          (setq line (buffer-substring-no-properties (line-beginning-position)
                                                     (line-end-position)))
          (if (string-match
               "\\(</pre>\\|<pre\\(?:[\s\t\n]\\|>\\)\\)"
               line)
              ;; including pre tag on the line
              (unless (eq 1 (string-match
                             "/"
                             (substring line
                                        (match-beginning 1)
                                        (match-end 1))))
                ;; not end tag
                (indent-according-to-mode))
            ;; no pre tag
            (unless (funcall pred-inside-of-pre-func)
              (indent-according-to-mode))))
        (forward-line 1)))
    (move-marker end nil)))

現ポイントがpreタグ内外かは、HTML(SGML)モードとnXMLモードで分けました。SGMLモードはノードツリーを使わずに検索でノード移動をするためにうまくいかない場合があるかもしれない。

(defun predicate-inside-of-pre-with-sgml ()
  (save-excursion
    (let* ((last-point (point))
           (re-tag-name "<\\([a-z0-9]+\\)\\(?:[\s\t\n]\\|>\\|/\\)")
           (re-block-tags (regexp-opt (split-string "address article aside blockquote div dl fieldset figure footer form h1 h2 h3 h4 h5 h6 header hr main nav ol p pre section table ul li dd body head html")))
           (re-block-tag-names (concat "\\b" re-block-tags "\\b"))
           (re-block-end-tags (concat "</" re-block-tags ">"))
           tag)
      (catch 'return
        (when (and (looking-at re-tag-name)
                   (string-match re-block-tag-names
                                 (buffer-substring-no-properties (match-beginning 1)
                                                                 (match-end 1))))
          (throw 'return nil))
        (sgml-skip-tag-backward 1)
        (while (and (looking-at "<!")
                    (not (= last-point (point))))
          (setq last-point (point))
          (sgml-skip-tag-backward 1))
        (when (looking-back "\\`\n*") (throw 'return nil))
        (when (looking-at re-tag-name)
          (setq tag (buffer-substring-no-properties (match-beginning 1)
                                                    (match-end 1))))
        (when (and tag
                   (string-match re-block-end-tags
                                 (buffer-substring-no-properties (point)
                                                                 last-point)))
          (throw 'return nil))
        (while (not (or (= last-point (point))
                        (string-match re-block-tag-names tag)))
          (setq last-point (point))
          (sgml-skip-tag-backward 1)
          (when (looking-at re-tag-name)
            (setq tag (buffer-substring-no-properties (match-beginning 1)
                                                      (match-end 1)))))
        (when (null tag) (throw 'return nil))
        (numberp (string-match "\\<pre\\>" tag))))))

(defun predicate-inside-of-pre-with-nxml ()
  (save-excursion
    (let ((last-point (point))
          tag
          (re-tag-name "<\\([a-z0-9]+\\)\\(?:[\s\t\n]\\|>\\|/\\)")
          (re-block-tags (concat "\\b" (regexp-opt (split-string "address article aside blockquote div dl fieldset figure footer form h1 h2 h3 h4 h5 h6 header hr main nav ol p pre section table ul body head html")) "\\b")))
      (catch 'return
        (when (and (looking-at re-tag-name)
                   (string-match
                    re-block-tags
                    (buffer-substring-no-properties (match-beginning 1)
                                                    (match-end 1))))
          (throw 'return nil))
        (condition-case nil
            (nxml-backward-up-element)
          (error (throw 'return nil)))
        (when (looking-at re-tag-name)
          (setq tag (buffer-substring-no-properties (match-beginning 1)
                                                    (match-end 1))))
        (while (not (or (= last-point (point))
                        (string-match re-block-tags tag)))
          (setq last-point (point))
          (nxml-backward-up-element)
          (when (looking-at re-tag-name)
            (setq tag (buffer-substring-no-properties (match-beginning 1)
                                                      (match-end 1)))))
        (when (null tag) (throw 'return nil))
        (numberp (string-match "\\<pre\\>" tag))))))

SGMLモードとnXMLモードでは(indent-according-to-mode)の挙動が異なるため、PREタグの位置によってインデンテーションが異なります。SGMLモードのインデンテーションの方が好みではありますが、今後考えるべきかもしれません。gist