Wirepas SDK
utf8_p.h
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2017 Intel Corporation
4 **
5 ** Permission is hereby granted, free of charge, to any person obtaining a copy
6 ** of this software and associated documentation files (the "Software"), to deal
7 ** in the Software without restriction, including without limitation the rights
8 ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 ** copies of the Software, and to permit persons to whom the Software is
10 ** furnished to do so, subject to the following conditions:
11 **
12 ** The above copyright notice and this permission notice shall be included in
13 ** all copies or substantial portions of the Software.
14 **
15 ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 ** THE SOFTWARE.
22 **
23 ****************************************************************************/
24 
25 #ifndef CBOR_UTF8_H
26 #define CBOR_UTF8_H
27 
28 #include "compilersupport_p.h"
29 
30 #include <stdint.h>
31 
32 static inline uint32_t get_utf8(const uint8_t **buffer, const uint8_t *end)
33 {
34  int charsNeeded;
35  uint32_t uc, min_uc;
36  uint8_t b;
37  ptrdiff_t n = end - *buffer;
38  if (n == 0)
39  return ~0U;
40 
41  uc = *(*buffer)++;
42  if (uc < 0x80) {
43  /* single-byte UTF-8 */
44  return uc;
45  }
46 
47  /* multi-byte UTF-8, decode it */
48  if (unlikely(uc <= 0xC1))
49  return ~0U;
50  if (uc < 0xE0) {
51  /* two-byte UTF-8 */
52  charsNeeded = 2;
53  min_uc = 0x80;
54  uc &= 0x1f;
55  } else if (uc < 0xF0) {
56  /* three-byte UTF-8 */
57  charsNeeded = 3;
58  min_uc = 0x800;
59  uc &= 0x0f;
60  } else if (uc < 0xF5) {
61  /* four-byte UTF-8 */
62  charsNeeded = 4;
63  min_uc = 0x10000;
64  uc &= 0x07;
65  } else {
66  return ~0U;
67  }
68 
69  if (n < charsNeeded - 1)
70  return ~0U;
71 
72  /* first continuation character */
73  b = *(*buffer)++;
74  if ((b & 0xc0) != 0x80)
75  return ~0U;
76  uc <<= 6;
77  uc |= b & 0x3f;
78 
79  if (charsNeeded > 2) {
80  /* second continuation character */
81  b = *(*buffer)++;
82  if ((b & 0xc0) != 0x80)
83  return ~0U;
84  uc <<= 6;
85  uc |= b & 0x3f;
86 
87  if (charsNeeded > 3) {
88  /* third continuation character */
89  b = *(*buffer)++;
90  if ((b & 0xc0) != 0x80)
91  return ~0U;
92  uc <<= 6;
93  uc |= b & 0x3f;
94  }
95  }
96 
97  /* overlong sequence? surrogate pair? out or range? */
98  if (uc < min_uc || uc - 0xd800U < 2048U || uc > 0x10ffff)
99  return ~0U;
100 
101  return uc;
102 }
103 
104 #endif /* CBOR_UTF8_H */
compilersupport_p.h
get_utf8
static uint32_t get_utf8(const uint8_t **buffer, const uint8_t *end)
Definition: utf8_p.h:32
unlikely
#define unlikely(x)
Definition: compilersupport_p.h:189