123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252 |
- /*
- * UPnP XML helper routines
- * Copyright (c) 2000-2003 Intel Corporation
- * Copyright (c) 2006-2007 Sony Corporation
- * Copyright (c) 2008-2009 Atheros Communications
- * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
- *
- * See wps_upnp.c for more details on licensing and code history.
- */
- #include "includes.h"
- #include "common.h"
- #include "base64.h"
- #include "http.h"
- #include "upnp_xml.h"
- /*
- * XML parsing and formatting
- *
- * XML is a markup language based on unicode; usually (and in our case,
- * always!) based on utf-8. utf-8 uses a variable number of bytes per
- * character. utf-8 has the advantage that all non-ASCII unicode characters are
- * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
- * characters are single ascii bytes, thus we can use typical text processing.
- *
- * (One other interesting thing about utf-8 is that it is possible to look at
- * any random byte and determine if it is the first byte of a character as
- * versus a continuation byte).
- *
- * The base syntax of XML uses a few ASCII punctionation characters; any
- * characters that would appear in the payload data are rewritten using
- * sequences, e.g., & for ampersand(&) and < for left angle bracket (<).
- * Five such escapes total (more can be defined but that does not apply to our
- * case). Thus we can safely parse for angle brackets etc.
- *
- * XML describes tree structures of tagged data, with each element beginning
- * with an opening tag <label> and ending with a closing tag </label> with
- * matching label. (There is also a self-closing tag <label/> which is supposed
- * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
- * to see it for our purpose).
- *
- * Actually the opening tags are a little more complicated because they can
- * contain "attributes" after the label (delimited by ascii space or tab chars)
- * of the form attribute_label="value" or attribute_label='value'; as it turns
- * out we do not have to read any of these attributes, just ignore them.
- *
- * Labels are any sequence of chars other than space, tab, right angle bracket
- * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
- * As it turns out, we can ignore the namespaces, in fact we can ignore the
- * entire tree hierarchy, because the plain labels we are looking for will be
- * unique (not in general, but for this application). We do however have to be
- * careful to skip over the namespaces.
- *
- * In generating XML we have to be more careful, but that is easy because
- * everything we do is pretty canned. The only real care to take is to escape
- * any special chars in our payload.
- */
- /**
- * xml_next_tag - Advance to next tag
- * @in: Input
- * @out: OUT: start of tag just after '<'
- * @out_tagname: OUT: start of name of tag, skipping namespace
- * @end: OUT: one after tag
- * Returns: 0 on success, 1 on failure
- *
- * A tag has form:
- * <left angle bracket><...><right angle bracket>
- * Within the angle brackets, there is an optional leading forward slash (which
- * makes the tag an ending tag), then an optional leading label (followed by
- * colon) and then the tag name itself.
- *
- * Note that angle brackets present in the original data must have been encoded
- * as < and > so they will not trouble us.
- */
- static int xml_next_tag(const char *in, const char **out,
- const char **out_tagname, const char **end)
- {
- while (*in && *in != '<')
- in++;
- if (*in != '<')
- return 1;
- *out = ++in;
- if (*in == '/')
- in++;
- *out_tagname = in; /* maybe */
- while (isalnum(*in) || *in == '-')
- in++;
- if (*in == ':')
- *out_tagname = ++in;
- while (*in && *in != '>')
- in++;
- if (*in != '>')
- return 1;
- *end = ++in;
- return 0;
- }
- /* xml_data_encode -- format data for xml file, escaping special characters.
- *
- * Note that we assume we are using utf8 both as input and as output!
- * In utf8, characters may be classed as follows:
- * 0xxxxxxx(2) -- 1 byte ascii char
- * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
- * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
- * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
- * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
- * 10xxxxxx(2) -- extension byte (6 payload bits per byte)
- * Some values implied by the above are however illegal because they
- * do not represent unicode chars or are not the shortest encoding.
- * Actually, we can almost entirely ignore the above and just do
- * text processing same as for ascii text.
- *
- * XML is written with arbitrary unicode characters, except that five
- * characters have special meaning and so must be escaped where they
- * appear in payload data... which we do here.
- */
- void xml_data_encode(struct wpabuf *buf, const char *data, int len)
- {
- int i;
- for (i = 0; i < len; i++) {
- u8 c = ((u8 *) data)[i];
- if (c == '<') {
- wpabuf_put_str(buf, "<");
- continue;
- }
- if (c == '>') {
- wpabuf_put_str(buf, ">");
- continue;
- }
- if (c == '&') {
- wpabuf_put_str(buf, "&");
- continue;
- }
- if (c == '\'') {
- wpabuf_put_str(buf, "'");
- continue;
- }
- if (c == '"') {
- wpabuf_put_str(buf, """);
- continue;
- }
- /*
- * We could try to represent control characters using the
- * sequence: &#x; where x is replaced by a hex numeral, but not
- * clear why we would do this.
- */
- wpabuf_put_u8(buf, c);
- }
- }
- /* xml_add_tagged_data -- format tagged data as a new xml line.
- *
- * tag must not have any special chars.
- * data may have special chars, which are escaped.
- */
- void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
- {
- wpabuf_printf(buf, "<%s>", tag);
- xml_data_encode(buf, data, os_strlen(data));
- wpabuf_printf(buf, "</%s>\n", tag);
- }
- /* A POST body looks something like (per upnp spec):
- * <?xml version="1.0"?>
- * <s:Envelope
- * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
- * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
- * <s:Body>
- * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
- * <argumentName>in arg value</argumentName>
- * other in args and their values go here, if any
- * </u:actionName>
- * </s:Body>
- * </s:Envelope>
- *
- * where :
- * s: might be some other namespace name followed by colon
- * u: might be some other namespace name followed by colon
- * actionName will be replaced according to action requested
- * schema following actionName will be WFA scheme instead
- * argumentName will be actual argument name
- * (in arg value) will be actual argument value
- */
- char * xml_get_first_item(const char *doc, const char *item)
- {
- const char *match = item;
- int match_len = os_strlen(item);
- const char *tag, *tagname, *end;
- char *value;
- /*
- * This is crude: ignore any possible tag name conflicts and go right
- * to the first tag of this name. This should be ok for the limited
- * domain of UPnP messages.
- */
- for (;;) {
- if (xml_next_tag(doc, &tag, &tagname, &end))
- return NULL;
- doc = end;
- if (!os_strncasecmp(tagname, match, match_len) &&
- *tag != '/' &&
- (tagname[match_len] == '>' ||
- !isgraph(tagname[match_len]))) {
- break;
- }
- }
- end = doc;
- while (*end && *end != '<')
- end++;
- value = os_zalloc(1 + (end - doc));
- if (value == NULL)
- return NULL;
- os_memcpy(value, doc, end - doc);
- return value;
- }
- struct wpabuf * xml_get_base64_item(const char *data, const char *name,
- enum http_reply_code *ret)
- {
- char *msg;
- struct wpabuf *buf;
- unsigned char *decoded;
- size_t len;
- msg = xml_get_first_item(data, name);
- if (msg == NULL) {
- *ret = UPNP_ARG_VALUE_INVALID;
- return NULL;
- }
- decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len);
- os_free(msg);
- if (decoded == NULL) {
- *ret = UPNP_OUT_OF_MEMORY;
- return NULL;
- }
- buf = wpabuf_alloc_ext_data(decoded, len);
- if (buf == NULL) {
- os_free(decoded);
- *ret = UPNP_OUT_OF_MEMORY;
- return NULL;
- }
- return buf;
- }
|