#lang racket
(require
(prefix-in url: net/url)
(prefix-in h: html)
(prefix-in x: xml))
(define (get-attribute name elem)
(let loop ((rest (h:html-element-attributes elem)))
(cond
[(null? rest) #f]
[(eq? name (x:attribute-name (car rest)))
(x:attribute-value (car rest))]
[else (loop (cdr rest))])))
(define (get-element-by-id id elem)
(match elem
([struct h:html-full (attributes content)]
(let ((val (get-attribute 'id elem)))
(if (and val (string=? val id))
elem
(let loop ((rest content))
(if (null? rest)
#f
(or (get-element-by-id id (car rest))
(loop (cdr rest))))))))
([struct h:html-element (attributes)]
(let ((val (get-attribute 'id elem)))
(if (and val (string=? val id))
val
#f)))
(else #f)))
(define (get-html url-string)
(h:read-html
(url:get-pure-port
(url:string->url url-string))))
(let ((obj (get-html "http://docs.racket-lang.org/html/index.html")))
(let ((body (get-element-by-id "doc-racket-lang-org" obj)))
(match body
([struct h:html-full (attributes content)]
(displayln attributes)
(displayln content)))))
2012年5月28日月曜日
[Racket] HTMLのパース
RacketでHTMLをパースするにはhtmlモジュールとxmlモジュールを使います。
登録:
コメントの投稿 (Atom)
0 件のコメント:
コメントを投稿