2012年5月30日水曜日

[Racket]字句解析

Racketのparser-toolsを使って字句解析をしてみます。
#lang racket
(require parser-tools/lex
  parser-tools/lex-sre)

(define-lex-abbrev newline
  (or (: #\return #\newline)
      (: #\newline)
      (: #\return)))

(define-lex-abbrev whitespace
  (+
   (or (: #\tab)
       (: #\space))))

(define-lex-abbrev section
  (: #\[
     (* (~ #\]))
     #\]))

(define-lex-abbrev =
  #\=)

(define-lex-abbrev id
  (+ (~ #\space #\tab #\newline #\return #\[ #\])))

(define l
  (lexer
   ;; skip
   [newline (l input-port)]
   ;; skip
   [whitespace (l input-port)]
   [section `(section . ,lexeme)]
   [= `(equal . "=")]
   [(eof) `(eof . null)]
   [id `(id . ,lexeme)]))


(define p (open-input-string "[section]\n key = value"))

(display (l p))
;=> (section . [section])
(display (l p))
;=> (id . key)
(display (l p))
;=> (equal . =)
(display (l p))
;=> (id . value)
(display (l p))
;=> (eof . null)

2012年5月28日月曜日

[Racket] HTMLのパース

RacketでHTMLをパースするにはhtmlモジュールとxmlモジュールを使います。
#lang racket

(require
 (prefix-in url: net/url)
 (prefix-in h: html)
 (prefix-in x: xml))

(define (get-attribute name elem)
  (let loop ((rest (h:html-element-attributes elem)))
    (cond
     [(null? rest) #f]
     [(eq? name (x:attribute-name (car rest)))
      (x:attribute-value (car rest))]
     [else (loop (cdr rest))])))

(define (get-element-by-id id elem)
  (match elem
    ([struct h:html-full (attributes content)]
     (let ((val (get-attribute 'id elem)))
       (if (and val (string=? val id))
    elem
    (let loop ((rest content))
      (if (null? rest)
   #f
   (or (get-element-by-id id (car rest))
       (loop (cdr rest))))))))
    ([struct h:html-element (attributes)]
     (let ((val (get-attribute 'id elem)))
       (if (and val (string=? val id))
    val
    #f)))
    (else #f)))

(define (get-html url-string)
  (h:read-html
   (url:get-pure-port
    (url:string->url url-string))))

(let ((obj (get-html "http://docs.racket-lang.org/html/index.html")))
  (let ((body (get-element-by-id "doc-racket-lang-org" obj)))
    (match body
      ([struct h:html-full (attributes content)]
       (displayln attributes)
       (displayln content)))))