upnp_xml.c 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. /*
  2. * UPnP XML helper routines
  3. * Copyright (c) 2000-2003 Intel Corporation
  4. * Copyright (c) 2006-2007 Sony Corporation
  5. * Copyright (c) 2008-2009 Atheros Communications
  6. * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
  7. *
  8. * See wps_upnp.c for more details on licensing and code history.
  9. */
  10. #include "includes.h"
  11. #include "common.h"
  12. #include "base64.h"
  13. #include "http.h"
  14. #include "upnp_xml.h"
  15. /*
  16. * XML parsing and formatting
  17. *
  18. * XML is a markup language based on unicode; usually (and in our case,
  19. * always!) based on utf-8. utf-8 uses a variable number of bytes per
  20. * character. utf-8 has the advantage that all non-ASCII unicode characters are
  21. * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
  22. * characters are single ascii bytes, thus we can use typical text processing.
  23. *
  24. * (One other interesting thing about utf-8 is that it is possible to look at
  25. * any random byte and determine if it is the first byte of a character as
  26. * versus a continuation byte).
  27. *
  28. * The base syntax of XML uses a few ASCII punctionation characters; any
  29. * characters that would appear in the payload data are rewritten using
  30. * sequences, e.g., &amp; for ampersand(&) and &lt for left angle bracket (<).
  31. * Five such escapes total (more can be defined but that does not apply to our
  32. * case). Thus we can safely parse for angle brackets etc.
  33. *
  34. * XML describes tree structures of tagged data, with each element beginning
  35. * with an opening tag <label> and ending with a closing tag </label> with
  36. * matching label. (There is also a self-closing tag <label/> which is supposed
  37. * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
  38. * to see it for our purpose).
  39. *
  40. * Actually the opening tags are a little more complicated because they can
  41. * contain "attributes" after the label (delimited by ascii space or tab chars)
  42. * of the form attribute_label="value" or attribute_label='value'; as it turns
  43. * out we do not have to read any of these attributes, just ignore them.
  44. *
  45. * Labels are any sequence of chars other than space, tab, right angle bracket
  46. * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
  47. * As it turns out, we can ignore the namespaces, in fact we can ignore the
  48. * entire tree hierarchy, because the plain labels we are looking for will be
  49. * unique (not in general, but for this application). We do however have to be
  50. * careful to skip over the namespaces.
  51. *
  52. * In generating XML we have to be more careful, but that is easy because
  53. * everything we do is pretty canned. The only real care to take is to escape
  54. * any special chars in our payload.
  55. */
  56. /**
  57. * xml_next_tag - Advance to next tag
  58. * @in: Input
  59. * @out: OUT: start of tag just after '<'
  60. * @out_tagname: OUT: start of name of tag, skipping namespace
  61. * @end: OUT: one after tag
  62. * Returns: 0 on success, 1 on failure
  63. *
  64. * A tag has form:
  65. * <left angle bracket><...><right angle bracket>
  66. * Within the angle brackets, there is an optional leading forward slash (which
  67. * makes the tag an ending tag), then an optional leading label (followed by
  68. * colon) and then the tag name itself.
  69. *
  70. * Note that angle brackets present in the original data must have been encoded
  71. * as &lt; and &gt; so they will not trouble us.
  72. */
  73. static int xml_next_tag(const char *in, const char **out,
  74. const char **out_tagname, const char **end)
  75. {
  76. while (*in && *in != '<')
  77. in++;
  78. if (*in != '<')
  79. return 1;
  80. *out = ++in;
  81. if (*in == '/')
  82. in++;
  83. *out_tagname = in; /* maybe */
  84. while (isalnum(*in) || *in == '-')
  85. in++;
  86. if (*in == ':')
  87. *out_tagname = ++in;
  88. while (*in && *in != '>')
  89. in++;
  90. if (*in != '>')
  91. return 1;
  92. *end = ++in;
  93. return 0;
  94. }
  95. /* xml_data_encode -- format data for xml file, escaping special characters.
  96. *
  97. * Note that we assume we are using utf8 both as input and as output!
  98. * In utf8, characters may be classed as follows:
  99. * 0xxxxxxx(2) -- 1 byte ascii char
  100. * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
  101. * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
  102. * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
  103. * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
  104. * 10xxxxxx(2) -- extension byte (6 payload bits per byte)
  105. * Some values implied by the above are however illegal because they
  106. * do not represent unicode chars or are not the shortest encoding.
  107. * Actually, we can almost entirely ignore the above and just do
  108. * text processing same as for ascii text.
  109. *
  110. * XML is written with arbitrary unicode characters, except that five
  111. * characters have special meaning and so must be escaped where they
  112. * appear in payload data... which we do here.
  113. */
  114. void xml_data_encode(struct wpabuf *buf, const char *data, int len)
  115. {
  116. int i;
  117. for (i = 0; i < len; i++) {
  118. u8 c = ((u8 *) data)[i];
  119. if (c == '<') {
  120. wpabuf_put_str(buf, "&lt;");
  121. continue;
  122. }
  123. if (c == '>') {
  124. wpabuf_put_str(buf, "&gt;");
  125. continue;
  126. }
  127. if (c == '&') {
  128. wpabuf_put_str(buf, "&amp;");
  129. continue;
  130. }
  131. if (c == '\'') {
  132. wpabuf_put_str(buf, "&apos;");
  133. continue;
  134. }
  135. if (c == '"') {
  136. wpabuf_put_str(buf, "&quot;");
  137. continue;
  138. }
  139. /*
  140. * We could try to represent control characters using the
  141. * sequence: &#x; where x is replaced by a hex numeral, but not
  142. * clear why we would do this.
  143. */
  144. wpabuf_put_u8(buf, c);
  145. }
  146. }
  147. /* xml_add_tagged_data -- format tagged data as a new xml line.
  148. *
  149. * tag must not have any special chars.
  150. * data may have special chars, which are escaped.
  151. */
  152. void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
  153. {
  154. wpabuf_printf(buf, "<%s>", tag);
  155. xml_data_encode(buf, data, os_strlen(data));
  156. wpabuf_printf(buf, "</%s>\n", tag);
  157. }
  158. /* A POST body looks something like (per upnp spec):
  159. * <?xml version="1.0"?>
  160. * <s:Envelope
  161. * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
  162. * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
  163. * <s:Body>
  164. * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
  165. * <argumentName>in arg value</argumentName>
  166. * other in args and their values go here, if any
  167. * </u:actionName>
  168. * </s:Body>
  169. * </s:Envelope>
  170. *
  171. * where :
  172. * s: might be some other namespace name followed by colon
  173. * u: might be some other namespace name followed by colon
  174. * actionName will be replaced according to action requested
  175. * schema following actionName will be WFA scheme instead
  176. * argumentName will be actual argument name
  177. * (in arg value) will be actual argument value
  178. */
  179. char * xml_get_first_item(const char *doc, const char *item)
  180. {
  181. const char *match = item;
  182. int match_len = os_strlen(item);
  183. const char *tag, *tagname, *end;
  184. char *value;
  185. /*
  186. * This is crude: ignore any possible tag name conflicts and go right
  187. * to the first tag of this name. This should be ok for the limited
  188. * domain of UPnP messages.
  189. */
  190. for (;;) {
  191. if (xml_next_tag(doc, &tag, &tagname, &end))
  192. return NULL;
  193. doc = end;
  194. if (!os_strncasecmp(tagname, match, match_len) &&
  195. *tag != '/' &&
  196. (tagname[match_len] == '>' ||
  197. !isgraph(tagname[match_len]))) {
  198. break;
  199. }
  200. }
  201. end = doc;
  202. while (*end && *end != '<')
  203. end++;
  204. value = os_zalloc(1 + (end - doc));
  205. if (value == NULL)
  206. return NULL;
  207. os_memcpy(value, doc, end - doc);
  208. return value;
  209. }
  210. struct wpabuf * xml_get_base64_item(const char *data, const char *name,
  211. enum http_reply_code *ret)
  212. {
  213. char *msg;
  214. struct wpabuf *buf;
  215. unsigned char *decoded;
  216. size_t len;
  217. msg = xml_get_first_item(data, name);
  218. if (msg == NULL) {
  219. *ret = UPNP_ARG_VALUE_INVALID;
  220. return NULL;
  221. }
  222. decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len);
  223. os_free(msg);
  224. if (decoded == NULL) {
  225. *ret = UPNP_OUT_OF_MEMORY;
  226. return NULL;
  227. }
  228. buf = wpabuf_alloc_ext_data(decoded, len);
  229. if (buf == NULL) {
  230. os_free(decoded);
  231. *ret = UPNP_OUT_OF_MEMORY;
  232. return NULL;
  233. }
  234. return buf;
  235. }