[syslinux:lwip] pxe, http: support readdir (ls) over http

syslinux-bot for H. Peter Anvin hpa at zytor.com
Mon May 2 23:42:04 PDT 2011


Commit-ID:  40068cfcbf723c9363194bf20a80b80a53c249fd
Gitweb:     http://syslinux.zytor.com/commit/40068cfcbf723c9363194bf20a80b80a53c249fd
Author:     H. Peter Anvin <hpa at zytor.com>
AuthorDate: Mon, 2 May 2011 23:36:41 -0700
Committer:  H. Peter Anvin <hpa at zytor.com>
CommitDate: Mon, 2 May 2011 23:39:43 -0700

pxe, http: support readdir (ls) over http

Use a heuristic http index parser (which is assumed to work with most
webserver-generated indicies) to support ls over http.

Signed-off-by: H. Peter Anvin <hpa at zytor.com>


---
 NEWS                       |    3 +-
 com32/lib/Makefile         |    1 +
 core/fs/pxe/http.c         |    8 +-
 core/fs/pxe/http_readdir.c |  471 ++++++++++++++++++++++++++++++++++++++++++++
 core/fs/pxe/pxe.c          |   18 ++-
 core/fs/pxe/pxe.h          |    4 +
 6 files changed, 501 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index 3474c80..4fa140f 100644
--- a/NEWS
+++ b/NEWS
@@ -5,7 +5,8 @@ to all derivatives.
 Changes in 4.10:
 	* PXELINUX: An entirely new network implementation based on
 	  the lwIP embedded TCP/IP stack.  As a result, plain PXELINUX
-	  can now support HTTP and FTP without gPXE/iPXE.
+	  can now support HTTP and FTP without gPXE/iPXE.  ls/readdir
+	  functionality is supported over http with an indexing webserver.
 	* Rename the "ipappend" option to "sysappend" ("ipappend" is
 	  still accepted as an alias) and make it available for all
 	  derivatives.  Add additional strings derived from the system
diff --git a/com32/lib/Makefile b/com32/lib/Makefile
index 20002ac..7f1c499 100644
--- a/com32/lib/Makefile
+++ b/com32/lib/Makefile
@@ -133,6 +133,7 @@ LIBCOREOBJS = 	\
 	memcpy.o mempcpy.o memset.o memcmp.o memmove.o			\
 	strlen.o stpcpy.o strcpy.o strcmp.o strlcpy.o strlcat.o		\
 	strchr.o strncmp.o strncpy.o					\
+	ctypes.o strtoul.o strntoumax.o					\
 	\
 	asprintf.o snprintf.o sprintf.o vsnprintf.o			\
 	\
diff --git a/core/fs/pxe/http.c b/core/fs/pxe/http.c
index 3f2bb15..1fd87aa 100644
--- a/core/fs/pxe/http.c
+++ b/core/fs/pxe/http.c
@@ -145,6 +145,12 @@ void http_bake_cookies(void)
     http_do_bake_cookies(cookie_buf);
 }
 
+static const struct pxe_conn_ops http_conn_ops = {
+    .fill_buffer	= tcp_fill_buffer,
+    .close		= tcp_close_file,
+    .readdir		= http_readdir,
+};
+
 void http_open(struct url_info *url, int flags, struct inode *inode,
 	       const char **redir)
 {
@@ -179,7 +185,7 @@ void http_open(struct url_info *url, int flags, struct inode *inode,
 	return;			/* http is broken... */
 
     /* This is a straightforward TCP connection after headers */
-    socket->ops = &tcp_conn_ops;
+    socket->ops = &http_conn_ops;
 
     /* Reset all of the variables */
     inode->size = content_length = -1;
diff --git a/core/fs/pxe/http_readdir.c b/core/fs/pxe/http_readdir.c
new file mode 100644
index 0000000..b6e480e
--- /dev/null
+++ b/core/fs/pxe/http_readdir.c
@@ -0,0 +1,471 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *   Copyright 2011 Intel Corporation; author: H. Peter Anvin
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ *   Boston MA 02110-1301, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <inttypes.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <dprintf.h>
+#include "pxe.h"
+
+enum http_readdir_state {
+    st_start,			/*  0 Initial state */
+    st_open,			/*  1 "<" */
+    st_a,			/*  2 "<a" */
+    st_attribute,		/*  3 "<a " */
+    st_h,			/*  4 "<a h" */
+    st_hr,			/*  5 */
+    st_hre,			/*  6 */
+    st_href,			/*  7 */
+    st_hrefeq,			/*  8 */
+    st_hrefqu,			/*  9 */
+    st_badtag,			/* 10 */
+    st_badtagqu,		/* 11 */
+    st_badattr,			/* 12 */
+    st_badattrqu,		/* 13 */
+};
+
+struct machine {
+    char xchar;
+    uint8_t st_xchar;
+    uint8_t st_left;		/* < */
+    uint8_t st_right;		/* > */
+    uint8_t st_space;		/* white */
+    uint8_t st_other;		/* anything else */
+};
+
+static const struct machine statemachine[] = {
+    /* xchar	st_xchar	st_left		st_right	st_space	st_other */
+    { 0,	0,		st_open,	st_start,	st_start,	st_start },
+    { 'a',	st_a,		st_badtag,	st_start,	st_open,	st_badtag },
+    { 0,	0,		st_open,	st_open,	st_attribute,	st_badtag },
+    { 'h',	st_h,		st_open,	st_start,	st_attribute,	st_badattr },
+    { 'r',	st_hr,		st_open,	st_start,	st_attribute,	st_badattr },
+    { 'e',	st_hre,		st_open,	st_start,	st_attribute,	st_badattr },
+    { 'f',	st_href,	st_open,	st_start,	st_attribute,	st_badattr },
+    { '=',	st_hrefeq,	st_open,	st_start,	st_attribute,	st_badattr },
+    { '\"',	st_hrefqu,	st_open,	st_start,	st_attribute,	st_hrefeq },
+    { '\"',	st_attribute,	st_hrefqu,	st_hrefqu,	st_hrefqu,	st_hrefqu },
+    { '\"',	st_badtagqu,	st_open,	st_start,	st_badtag,	st_badtag },
+    { '\"',	st_badtag,	st_badtagqu,	st_badtagqu,	st_badtagqu,	st_badtagqu },
+    { '\"',	st_badattrqu,	st_open,	st_start,	st_attribute,	st_badattr },
+    { '\"',	st_attribute,	st_badattrqu,	st_badattrqu,	st_badattrqu,	st_badattrqu },
+};
+
+struct html_entity {
+    uint16_t ucs;
+    const char entity[9];
+};
+
+static const struct html_entity entities[] = {
+    {   34, "quot" },
+    {   38, "amp" },
+    {   60, "lt" },
+    {   62, "gt" },
+#ifdef HTTP_ALL_ENTITIES
+    {  160, "nbsp" },
+    {  161, "iexcl" },
+    {  162, "cent" },
+    {  163, "pound" },
+    {  164, "curren" },
+    {  165, "yen" },
+    {  166, "brvbar" },
+    {  167, "sect" },
+    {  168, "uml" },
+    {  169, "copy" },
+    {  170, "ordf" },
+    {  171, "laquo" },
+    {  172, "not" },
+    {  173, "shy" },
+    {  174, "reg" },
+    {  175, "macr" },
+    {  176, "deg" },
+    {  177, "plusmn" },
+    {  178, "sup2" },
+    {  179, "sup3" },
+    {  180, "acute" },
+    {  181, "micro" },
+    {  182, "para" },
+    {  183, "middot" },
+    {  184, "cedil" },
+    {  185, "sup1" },
+    {  186, "ordm" },
+    {  187, "raquo" },
+    {  188, "frac14" },
+    {  189, "frac12" },
+    {  190, "frac34" },
+    {  191, "iquest" },
+    {  192, "Agrave" },
+    {  193, "Aacute" },
+    {  194, "Acirc" },
+    {  195, "Atilde" },
+    {  196, "Auml" },
+    {  197, "Aring" },
+    {  198, "AElig" },
+    {  199, "Ccedil" },
+    {  200, "Egrave" },
+    {  201, "Eacute" },
+    {  202, "Ecirc" },
+    {  203, "Euml" },
+    {  204, "Igrave" },
+    {  205, "Iacute" },
+    {  206, "Icirc" },
+    {  207, "Iuml" },
+    {  208, "ETH" },
+    {  209, "Ntilde" },
+    {  210, "Ograve" },
+    {  211, "Oacute" },
+    {  212, "Ocirc" },
+    {  213, "Otilde" },
+    {  214, "Ouml" },
+    {  215, "times" },
+    {  216, "Oslash" },
+    {  217, "Ugrave" },
+    {  218, "Uacute" },
+    {  219, "Ucirc" },
+    {  220, "Uuml" },
+    {  221, "Yacute" },
+    {  222, "THORN" },
+    {  223, "szlig" },
+    {  224, "agrave" },
+    {  225, "aacute" },
+    {  226, "acirc" },
+    {  227, "atilde" },
+    {  228, "auml" },
+    {  229, "aring" },
+    {  230, "aelig" },
+    {  231, "ccedil" },
+    {  232, "egrave" },
+    {  233, "eacute" },
+    {  234, "ecirc" },
+    {  235, "euml" },
+    {  236, "igrave" },
+    {  237, "iacute" },
+    {  238, "icirc" },
+    {  239, "iuml" },
+    {  240, "eth" },
+    {  241, "ntilde" },
+    {  242, "ograve" },
+    {  243, "oacute" },
+    {  244, "ocirc" },
+    {  245, "otilde" },
+    {  246, "ouml" },
+    {  247, "divide" },
+    {  248, "oslash" },
+    {  249, "ugrave" },
+    {  250, "uacute" },
+    {  251, "ucirc" },
+    {  252, "uuml" },
+    {  253, "yacute" },
+    {  254, "thorn" },
+    {  255, "yuml" },
+    {  338, "OElig" },
+    {  339, "oelig" },
+    {  352, "Scaron" },
+    {  353, "scaron" },
+    {  376, "Yuml" },
+    {  402, "fnof" },
+    {  710, "circ" },
+    {  732, "tilde" },
+    {  913, "Alpha" },
+    {  914, "Beta" },
+    {  915, "Gamma" },
+    {  916, "Delta" },
+    {  917, "Epsilon" },
+    {  918, "Zeta" },
+    {  919, "Eta" },
+    {  920, "Theta" },
+    {  921, "Iota" },
+    {  922, "Kappa" },
+    {  923, "Lambda" },
+    {  924, "Mu" },
+    {  925, "Nu" },
+    {  926, "Xi" },
+    {  927, "Omicron" },
+    {  928, "Pi" },
+    {  929, "Rho" },
+    {  931, "Sigma" },
+    {  932, "Tau" },
+    {  933, "Upsilon" },
+    {  934, "Phi" },
+    {  935, "Chi" },
+    {  936, "Psi" },
+    {  937, "Omega" },
+    {  945, "alpha" },
+    {  946, "beta" },
+    {  947, "gamma" },
+    {  948, "delta" },
+    {  949, "epsilon" },
+    {  950, "zeta" },
+    {  951, "eta" },
+    {  952, "theta" },
+    {  953, "iota" },
+    {  954, "kappa" },
+    {  955, "lambda" },
+    {  956, "mu" },
+    {  957, "nu" },
+    {  958, "xi" },
+    {  959, "omicron" },
+    {  960, "pi" },
+    {  961, "rho" },
+    {  962, "sigmaf" },
+    {  963, "sigma" },
+    {  964, "tau" },
+    {  965, "upsilon" },
+    {  966, "phi" },
+    {  967, "chi" },
+    {  968, "psi" },
+    {  969, "omega" },
+    {  977, "thetasym" },
+    {  978, "upsih" },
+    {  982, "piv" },
+    { 8194, "ensp" },
+    { 8195, "emsp" },
+    { 8201, "thinsp" },
+    { 8204, "zwnj" },
+    { 8205, "zwj" },
+    { 8206, "lrm" },
+    { 8207, "rlm" },
+    { 8211, "ndash" },
+    { 8212, "mdash" },
+    { 8216, "lsquo" },
+    { 8217, "rsquo" },
+    { 8218, "sbquo" },
+    { 8220, "ldquo" },
+    { 8221, "rdquo" },
+    { 8222, "bdquo" },
+    { 8224, "dagger" },
+    { 8225, "Dagger" },
+    { 8226, "bull" },
+    { 8230, "hellip" },
+    { 8240, "permil" },
+    { 8242, "prime" },
+    { 8243, "Prime" },
+    { 8249, "lsaquo" },
+    { 8250, "rsaquo" },
+    { 8254, "oline" },
+    { 8260, "frasl" },
+    { 8364, "euro" },
+    { 8465, "image" },
+    { 8472, "weierp" },
+    { 8476, "real" },
+    { 8482, "trade" },
+    { 8501, "alefsym" },
+    { 8592, "larr" },
+    { 8593, "uarr" },
+    { 8594, "rarr" },
+    { 8595, "darr" },
+    { 8596, "harr" },
+    { 8629, "crarr" },
+    { 8656, "lArr" },
+    { 8657, "uArr" },
+    { 8658, "rArr" },
+    { 8659, "dArr" },
+    { 8660, "hArr" },
+    { 8704, "forall" },
+    { 8706, "part" },
+    { 8707, "exist" },
+    { 8709, "empty" },
+    { 8711, "nabla" },
+    { 8712, "isin" },
+    { 8713, "notin" },
+    { 8715, "ni" },
+    { 8719, "prod" },
+    { 8721, "sum" },
+    { 8722, "minus" },
+    { 8727, "lowast" },
+    { 8730, "radic" },
+    { 8733, "prop" },
+    { 8734, "infin" },
+    { 8736, "ang" },
+    { 8743, "and" },
+    { 8744, "or" },
+    { 8745, "cap" },
+    { 8746, "cup" },
+    { 8747, "int" },
+    { 8756, "there4" },
+    { 8764, "sim" },
+    { 8773, "cong" },
+    { 8776, "asymp" },
+    { 8800, "ne" },
+    { 8801, "equiv" },
+    { 8804, "le" },
+    { 8805, "ge" },
+    { 8834, "sub" },
+    { 8835, "sup" },
+    { 8836, "nsub" },
+    { 8838, "sube" },
+    { 8839, "supe" },
+    { 8853, "oplus" },
+    { 8855, "otimes" },
+    { 8869, "perp" },
+    { 8901, "sdot" },
+    { 8968, "lceil" },
+    { 8969, "rceil" },
+    { 8970, "lfloor" },
+    { 8971, "rfloor" },
+    { 9001, "lang" },
+    { 9002, "rang" },
+    { 9674, "loz" },
+    { 9824, "spades" },
+    { 9827, "clubs" },
+    { 9829, "hearts" },
+    { 9830, "diams" },
+#endif /* HTTP_ALL_ENTITIES */
+    { 0, "" }
+};
+
+struct entity_state {
+    char entity_buf[16];
+    char *ep;
+};
+
+static char *emit(char *p, int c, struct entity_state *st)
+{
+    const struct html_entity *ent;
+    unsigned int ucs;
+
+    if (!st->ep) {
+	if (c == '&') {
+	    /* Entity open */
+	    st->ep = st->entity_buf;
+	} else {
+	    *p++ = c;
+	}
+    } else {
+	if (c == ';') {
+	    st->ep = NULL;
+	    *p = '\0';
+	    if (st->entity_buf[0] == '#') {
+		if ((st->entity_buf[1] | 0x20)== 'x') {
+		    ucs = strtoul(st->entity_buf + 2, NULL, 16);
+		} else {
+		    ucs = strtoul(st->entity_buf + 1, NULL, 10);
+		}
+	    } else {
+		for (ent = entities; ent->ucs; ent++) {
+		    if (!strcmp(st->entity_buf, ent->entity))
+			break;
+		}
+		ucs = ent->ucs;
+	    }
+	    if (ucs < 32 || ucs >= 0x10ffff)
+		return p;	/* Bogus */
+	    if (ucs >= 0x10000) {
+		*p++ = 0xf0 + (ucs >> 18);
+		*p++ = 0x80 + ((ucs >> 12) & 0x3f);
+		*p++ = 0x80 + ((ucs >> 6) & 0x3f);
+		*p++ = 0x80 + (ucs & 0x3f);
+	    } else if (ucs >= 0x800) {
+		*p++ = 0xe0 + (ucs >> 12);
+		*p++ = 0x80 + ((ucs >> 6) & 0x3f);
+		*p++ = 0x80 + (ucs & 0x3f);
+	    } else if (ucs >= 0x80) {
+		*p++ = 0xc0 + (ucs >> 6);
+		*p++ = 0x80 + (ucs & 0x3f);
+	    } else {
+		*p++ = ucs;
+	    }
+	} else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) {
+	    *st->ep++ = c;
+	}
+    }
+    return p;
+}
+
+static const char *http_get_filename(struct inode *inode, char *buf)
+{
+    int c, lc;
+    char *p;
+    const struct machine *sm;
+    struct entity_state es;
+    enum http_readdir_state state = st_start;
+    enum http_readdir_state pstate = st_start;
+
+    memset(&es, 0, sizeof es);
+
+    p = buf;
+    for (;;) {
+	c = pxe_getc(inode);
+	if (c == -1)
+	    return NULL;
+
+	lc = tolower(c);
+
+	sm = &statemachine[state];
+
+	if (lc == sm->xchar)
+	    state = sm->st_xchar;
+	else if (c == '<')
+	    state = sm->st_left;
+	else if (c == '>')
+	    state = sm->st_right;
+	else if (isspace(c))
+	    state = sm->st_space;
+	else
+	    state = sm->st_other;
+
+	if (state == st_hrefeq || state == st_hrefqu) {
+	    if (state != pstate)
+		p = buf;
+	    else if (p < buf + FILENAME_MAX)
+		p = emit(p, c, &es);
+	    pstate = state;
+	} else {
+	    if (pstate != st_start)
+		pstate = st_start;
+	    if (p != buf && state == st_start) {
+		*p = '\0';
+		return buf;
+	    }
+	}
+    }
+}
+
+int http_readdir(struct inode *inode, struct dirent *dirent)
+{
+    char buf[FILENAME_MAX + 6];
+    const char *fn, *sp;
+
+    for (;;) {
+	fn = http_get_filename(inode, buf);
+
+	if (!fn)
+	    return -1;		/* End of directory */
+
+	/* Ignore entries with http special characters */
+	if (strchr(fn, '#'))
+	    continue;
+	if (strchr(fn, '?'))
+	    continue;
+
+	/* A slash if present has to be the last character, and not the first */
+	sp = strchr(fn, '/');
+	if (sp) {
+	    if (sp == fn || sp[1])
+		continue;
+	} else {
+	    sp = strchr(fn, '\0');
+	}
+
+	if (sp > fn + NAME_MAX)
+	    continue;
+
+	dirent->d_ino = 0;	/* Not applicable */
+	dirent->d_off = 0;	/* Not applicable */
+	dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1;
+	dirent->d_type = *sp == '/' ? DT_DIR : DT_REG;
+	memcpy(dirent->d_name, fn, sp-fn);
+	dirent->d_name[sp-fn] = '\0';
+	return 0;
+    }
+}
diff --git a/core/fs/pxe/pxe.c b/core/fs/pxe/pxe.c
index f54e595..acefac9 100644
--- a/core/fs/pxe/pxe.c
+++ b/core/fs/pxe/pxe.c
@@ -367,10 +367,12 @@ static void __pxe_searchdir(const char *filename, int flags, struct file *file)
 #endif
     }
 
-    if (inode->size)
+    if (inode->size) {
 	file->inode = inode;
-    else
+	file->inode->mode = (flags & O_DIRECTORY) ? DT_DIR : DT_REG;
+    } else {
         free_socket(inode);
+    }
 
     return;
 }
@@ -1091,6 +1093,17 @@ cant_free:
     return;
 }
 
+static int pxe_readdir(struct file *file, struct dirent *dirent)
+{
+    struct inode *inode = file->inode;
+    struct pxe_pvt_inode *socket = PVT(inode);
+
+    if (socket->ops->readdir)
+	return socket->ops->readdir(inode, dirent);
+    else
+	return -1;		/* No such operation */
+}
+
 const struct fs_ops pxe_fs_ops = {
     .fs_name       = "pxe",
     .fs_flags      = FS_NODEV,
@@ -1102,4 +1115,5 @@ const struct fs_ops pxe_fs_ops = {
     .close_file    = pxe_close_file,
     .mangle_name   = pxe_mangle_name,
     .load_config   = pxe_load_config,
+    .readdir	   = pxe_readdir,
 };
diff --git a/core/fs/pxe/pxe.h b/core/fs/pxe/pxe.h
index 6c84d8b..4c7e8e6 100644
--- a/core/fs/pxe/pxe.h
+++ b/core/fs/pxe/pxe.h
@@ -111,6 +111,7 @@ struct netbuf;
 struct pxe_conn_ops {
     void (*fill_buffer)(struct inode *inode);
     void (*close)(struct inode *inode);
+    int (*readdir)(struct inode *inode, struct dirent *dirent);
 };    
 
 struct pxe_pvt_inode {
@@ -222,6 +223,9 @@ void http_open(struct url_info *url, int flags, struct inode *inode,
 	       const char **redir);
 void http_bake_cookies(void);
 
+/* http_readdir.c */
+int http_readdir(struct inode *inode, struct dirent *dirent);
+
 /* ftp.c */
 void ftp_open(struct url_info *url, int flags, struct inode *inode,
 	      const char **redir);



More information about the Syslinux-commits mailing list