#lang racket (require (prefix-in url: net/url) (prefix-in h: html) (prefix-in x: xml)) (define (get-attribute name elem) (let loop ((rest (h:html-element-attributes elem))) (cond [(null? rest) #f] [(eq? name (x:attribute-name (car rest))) (x:attribute-value (car rest))] [else (loop (cdr rest))]))) (define (get-element-by-id id elem) (match elem ([struct h:html-full (attributes content)] (let ((val (get-attribute 'id elem))) (if (and val (string=? val id)) elem (let loop ((rest content)) (if (null? rest) #f (or (get-element-by-id id (car rest)) (loop (cdr rest)))))))) ([struct h:html-element (attributes)] (let ((val (get-attribute 'id elem))) (if (and val (string=? val id)) val #f))) (else #f))) (define (get-html url-string) (h:read-html (url:get-pure-port (url:string->url url-string)))) (let ((obj (get-html "http://docs.racket-lang.org/html/index.html"))) (let ((body (get-element-by-id "doc-racket-lang-org" obj))) (match body ([struct h:html-full (attributes content)] (displayln attributes) (displayln content)))))
2012年5月28日月曜日
[Racket] HTMLのパース
RacketでHTMLをパースするにはhtmlモジュールとxmlモジュールを使います。
登録:
コメントの投稿 (Atom)
0 件のコメント:
コメントを投稿