skalibs

Mirror/fork of https://skarnet.org/software/skalibs/
git clone https://ccx.te2000.cz/git/skalibs
Log | Files | Refs | README | LICENSE

memmem.c (6445B)


      1 /* MIT license. See below. */
      2 
      3 #include <skalibs/sysdeps.h>
      4 
      5 #ifndef SKALIBS_HASMEMMEM
      6 
      7 /*
      8    If the underlying platform does not provide memmem(), then the
      9    following implementation is used. It comes from the musl libc,
     10    which is MIT-licensed:
     11  
     12 ----------------------------------------------------------------------
     13 Copyright © 2005-2017 Rich Felker, Szabolcs Nagy, Timo Teräs, Alexander Monakov
     14 
     15 Permission is hereby granted, free of charge, to any person obtaining
     16 a copy of this software and associated documentation files (the
     17 "Software"), to deal in the Software without restriction, including
     18 without limitation the rights to use, copy, modify, merge, publish,
     19 distribute, sublicense, and/or sell copies of the Software, and to
     20 permit persons to whom the Software is furnished to do so, subject to
     21 the following conditions:
     22 
     23 The above copyright notice and this permission notice shall be
     24 included in all copies or substantial portions of the Software.
     25 
     26 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     27 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     28 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     29 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
     30 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     31 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     32 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     33 ----------------------------------------------------------------------
     34 
     35 */
     36 
     37 #include <string.h>
     38 #include <stdint.h>
     39 #include <skalibs/posixplz.h>
     40 
     41 static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
     42 {
     43         uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
     44         for (h+=2, k-=2; k; k--, hw = hw<<8 | *h++)
     45                 if (hw == nw) return (char *)h-2;
     46         return hw == nw ? (char *)h-2 : 0;
     47 }
     48 
     49 static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
     50 {
     51         uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
     52         uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
     53         for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8)
     54                 if (hw == nw) return (char *)h-3;
     55         return hw == nw ? (char *)h-3 : 0;
     56 }
     57 
     58 static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
     59 {
     60         uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
     61         uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
     62         for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++)
     63                 if (hw == nw) return (char *)h-4;
     64         return hw == nw ? (char *)h-4 : 0;
     65 }
     66 
     67 #define MAX(a,b) ((a)>(b)?(a):(b))
     68 #define MIN(a,b) ((a)<(b)?(a):(b))
     69 
     70 #define BITOP(a,b,op) \
     71  ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
     72 
     73 static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l)
     74 {
     75         size_t i, ip, jp, k, p, ms, p0, mem, mem0;
     76         size_t byteset[32 / sizeof(size_t)] = { 0 };
     77         size_t shift[256];
     78 
     79         /* Computing length of needle and fill shift table */
     80         for (i=0; i<l; i++)
     81                 BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
     82 
     83         /* Compute maximal suffix */
     84         ip = -1; jp = 0; k = p = 1;
     85         while (jp+k<l) {
     86                 if (n[ip+k] == n[jp+k]) {
     87                         if (k == p) {
     88                                 jp += p;
     89                                 k = 1;
     90                         } else k++;
     91                 } else if (n[ip+k] > n[jp+k]) {
     92                         jp += k;
     93                         k = 1;
     94                         p = jp - ip;
     95                 } else {
     96                         ip = jp++;
     97                         k = p = 1;
     98                 }
     99         }
    100         ms = ip;
    101         p0 = p;
    102 
    103         /* And with the opposite comparison */
    104         ip = -1; jp = 0; k = p = 1;
    105         while (jp+k<l) {
    106                 if (n[ip+k] == n[jp+k]) {
    107                         if (k == p) {
    108                                 jp += p;
    109                                 k = 1;
    110                         } else k++;
    111                 } else if (n[ip+k] < n[jp+k]) {
    112                         jp += k;
    113                         k = 1;
    114                         p = jp - ip;
    115                 } else {
    116                         ip = jp++;
    117                         k = p = 1;
    118                 }
    119         }
    120         if (ip+1 > ms+1) ms = ip;
    121         else p = p0;
    122 
    123         /* Periodic needle? */
    124         if (memcmp(n, n+p, ms+1)) {
    125                 mem0 = 0;
    126                 p = MAX(ms, l-ms-1) + 1;
    127         } else mem0 = l-p;
    128         mem = 0;
    129 
    130         /* Search loop */
    131         for (;;) {
    132                 /* If remainder of haystack is shorter than needle, done */
    133                 if (z-h < l) return 0;
    134 
    135                 /* Check last byte first; advance by shift on mismatch */
    136                 if (BITOP(byteset, h[l-1], &)) {
    137                         k = l-shift[h[l-1]];
    138                         if (k) {
    139                                 if (mem0 && mem && k < p) k = l-p;
    140                                 h += k;
    141                                 mem = 0;
    142                                 continue;
    143                         }
    144                 } else {
    145                         h += l;
    146                         mem = 0;
    147                         continue;
    148                 }
    149 
    150                 /* Compare right half */
    151                 for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
    152                 if (k < l) {
    153                         h += k-ms;
    154                         mem = 0;
    155                         continue;
    156                 }
    157                 /* Compare left half */
    158                 for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
    159                 if (k <= mem) return (char *)h;
    160                 h += p;
    161                 mem = mem0;
    162         }
    163 }
    164 
    165 void *memmem(const void *h0, size_t k, const void *n0, size_t l)
    166 {
    167         const unsigned char *h = h0, *n = n0;
    168 
    169         /* Return immediately on empty needle */
    170         if (!l) return (void *)h;
    171 
    172         /* Return immediately when needle is longer than haystack */
    173         if (k<l) return 0;
    174 
    175         /* Use faster algorithms for short needles */
    176         h = memchr(h0, *n, k);
    177         if (!h || l==1) return (void *)h;
    178         k -= h - (const unsigned char *)h0;
    179         if (k<l) return 0;
    180         if (l==2) return twobyte_memmem(h, k, n);
    181         if (l==3) return threebyte_memmem(h, k, n);
    182         if (l==4) return fourbyte_memmem(h, k, n);
    183 
    184         return twoway_memmem(h, h+k, n, l);
    185 }
    186 
    187 #endif