#lang racket
(require parser-tools/lex
parser-tools/lex-sre)
(define-lex-abbrev newline
(or (: #\return #\newline)
(: #\newline)
(: #\return)))
(define-lex-abbrev whitespace
(+
(or (: #\tab)
(: #\space))))
(define-lex-abbrev section
(: #\[
(* (~ #\]))
#\]))
(define-lex-abbrev =
#\=)
(define-lex-abbrev id
(+ (~ #\space #\tab #\newline #\return #\[ #\])))
(define l
(lexer
;; skip
[newline (l input-port)]
;; skip
[whitespace (l input-port)]
[section `(section . ,lexeme)]
[= `(equal . "=")]
[(eof) `(eof . null)]
[id `(id . ,lexeme)]))
(define p (open-input-string "[section]\n key = value"))
(display (l p))
;=> (section . [section])
(display (l p))
;=> (id . key)
(display (l p))
;=> (equal . =)
(display (l p))
;=> (id . value)
(display (l p))
;=> (eof . null)
2012年5月30日水曜日
[Racket]字句解析
Racketのparser-toolsを使って字句解析をしてみます。
2012年5月28日月曜日
[Racket] HTMLのパース
RacketでHTMLをパースするにはhtmlモジュールとxmlモジュールを使います。
#lang racket
(require
(prefix-in url: net/url)
(prefix-in h: html)
(prefix-in x: xml))
(define (get-attribute name elem)
(let loop ((rest (h:html-element-attributes elem)))
(cond
[(null? rest) #f]
[(eq? name (x:attribute-name (car rest)))
(x:attribute-value (car rest))]
[else (loop (cdr rest))])))
(define (get-element-by-id id elem)
(match elem
([struct h:html-full (attributes content)]
(let ((val (get-attribute 'id elem)))
(if (and val (string=? val id))
elem
(let loop ((rest content))
(if (null? rest)
#f
(or (get-element-by-id id (car rest))
(loop (cdr rest))))))))
([struct h:html-element (attributes)]
(let ((val (get-attribute 'id elem)))
(if (and val (string=? val id))
val
#f)))
(else #f)))
(define (get-html url-string)
(h:read-html
(url:get-pure-port
(url:string->url url-string))))
(let ((obj (get-html "http://docs.racket-lang.org/html/index.html")))
(let ((body (get-element-by-id "doc-racket-lang-org" obj)))
(match body
([struct h:html-full (attributes content)]
(displayln attributes)
(displayln content)))))
登録:
コメント (Atom)