10 Star 1 Fork 12

src-openEuler / hyperscan

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
hyperscan-aarch64-support.patch 114.41 KB
一键复制 编辑 原始数据 按行查看 历史
sdlzx 提交于 2023-03-27 14:45 . Update to 5.4.1
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498
From e95491b3a2261aecdc5576a7e507b4f4ace88cbc Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Mon, 20 Jul 2020 17:20:15 +0800
Subject: [PATCH] Add aarch64 support
Signed-off-by: Liu Zixian <liuzixian4@huawei.com>
---
CMakeLists.txt | 108 +-
cmake/config.h.in | 9 +
cmake/platform.cmake | 13 +-
cmake/ragel.cmake | 20 +
src/crc32.c | 43 +
src/fdr/fdr.c | 136 ++-
src/hs_valid_platform.c | 9 +-
src/nfa/limex_exceptional.h | 22 +-
src/nfa/limex_internal.h | 2 +-
src/nfa/limex_native.c | 10 +-
src/nfa/shufti.c | 18 +-
src/nfa/truffle.c | 10 +-
src/parser/control_verbs.cpp | 340 +++++++
src/rose/counting_miracle.h | 2 +-
src/util/arch.h | 11 +
src/util/cpuid_flags.c | 6 +
src/util/cpuid_flags.h | 2 +
src/util/cpuid_inline.h | 17 +-
src/util/intrinsics.h | 12 +
src/util/popcount.h | 6 +-
src/util/simd_arm.h | 1069 ++++++++++++++++++++
src/util/simd_types.h | 17 +
src/util/simd_utils.h | 13 +
src/util/simd_x86.h | 10 +
tools/hscollider/CMakeLists.txt | 9 +-
tools/hscollider/ColliderCorporaParser.cpp | 474 +++++++++
unit/internal/simd_utils.cpp | 2 +-
util/CMakeLists.txt | 8 +-
util/ExpressionParser.cpp | 397 ++++++++
29 files changed, 2717 insertions(+), 78 deletions(-)
create mode 100644 src/parser/control_verbs.cpp
create mode 100644 src/util/simd_arm.h
create mode 100644 src/util/simd_utils.h
create mode 100644 tools/hscollider/ColliderCorporaParser.cpp
create mode 100644 util/ExpressionParser.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bd6d2de..8dbcb72 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,6 +74,7 @@ include (${CMAKE_MODULE_PATH}/boost.cmake)
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
find_package(PythonInterp)
find_program(RAGEL ragel)
+find_program(COPY cp)
if(PYTHONINTERP_FOUND)
set(PYTHON ${PYTHON_EXECUTABLE})
@@ -189,24 +190,30 @@ else()
# cpuid info and then chooses the best microarch it can (and replaces
# the flag), so use that for tune.
- # arg1 might exist if using ccache
- string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
- set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
- execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
- OUTPUT_VARIABLE _GCC_OUTPUT)
- string(FIND "${_GCC_OUTPUT}" "march" POS)
- string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
- string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
- GNUCC_ARCH "${_GCC_OUTPUT}")
-
- # test the parsed flag
- set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
- execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
- OUTPUT_QUIET ERROR_QUIET
- INPUT_FILE /dev/null
- RESULT_VARIABLE GNUCC_TUNE_TEST)
- if (NOT GNUCC_TUNE_TEST EQUAL 0)
- message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
+ # arg1 might exist if using ccache
+ string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
+ set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
+ execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
+ OUTPUT_VARIABLE _GCC_OUTPUT)
+ string(FIND "${_GCC_OUTPUT}" "march" POS)
+ string(SUBSTRING "${_GCC_OUTPUT}" ${POS} -1 _GCC_OUTPUT)
+ string(REGEX REPLACE "march=[ \t]*([^ \n]*)[ \n].*" "\\1"
+ GNUCC_ARCH "${_GCC_OUTPUT}")
+
+ # test the parsed flag
+ set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
+ execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
+ OUTPUT_QUIET ERROR_QUIET
+ INPUT_FILE /dev/null
+ RESULT_VARIABLE GNUCC_TUNE_TEST)
+ if (NOT GNUCC_TUNE_TEST EQUAL 0)
+ message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
+ endif()
+ endif()
+
+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=armv8-a -mtune=armv8-a)
endif()
set(TUNE_FLAG ${GNUCC_ARCH})
else ()
@@ -239,6 +246,13 @@ else()
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")
+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsigned-char")
+ set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fsigned-char")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc")
+ endif()
+
if (NOT RELEASE_BUILD)
# -Werror is most useful during development, don't potentially break
# release builds
@@ -252,11 +266,19 @@ else()
endif()
if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
- set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}")
+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
+ set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}")
+ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ set(ARCH_C_FLAGS "-march=armv8-a -mtune=${TUNE_FLAG}")
+ endif ()
endif()
if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
- set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}")
+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
+ set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}")
+ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ set(ARCH_CXX_FLAGS "-march=armv8-a -mtune=${TUNE_FLAG}")
+ endif()
endif()
if(CMAKE_COMPILER_IS_GNUCC)
@@ -289,10 +311,18 @@ else()
endif()
CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
-CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
-CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
-CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
-CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)
+
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
+ CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
+ CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
+ CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
+ CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)
+endif()
+
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ CHECK_INCLUDE_FILES(arm_neon.h HAVE_C_ARM_NEON_H)
+ CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_CXX_ARM_NEON_H)
+endif()
CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)
@@ -325,6 +355,9 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
(CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja")))
message (STATUS "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher")
set (FAT_RUNTIME_REQUISITES FALSE)
+ elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ message(STATUS "AARCH64 platform don't support fat runtime")
+ set (FAT_RUNTIME_REQUISITES FALSE)
else()
include (${CMAKE_MODULE_PATH}/attrib.cmake)
if (NOT HAS_C_ATTR_IFUNC)
@@ -337,7 +370,9 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitectures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
endif ()
-include (${CMAKE_MODULE_PATH}/arch.cmake)
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
+ include (${CMAKE_MODULE_PATH}/arch.cmake)
+endif()
# testing a builtin takes a little more work
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
@@ -415,12 +450,6 @@ if (CXX_IGNORED_ATTR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes")
endif()
-# gcc 9 complains about redundant move for returned variable
-CHECK_CXX_COMPILER_FLAG("-Wredundant-move" CXX_REDUNDANT_MOVE)
-if (CXX_REDUNDANT_MOVE)
- set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-redundant-move")
-endif()
-
# note this for later
# g++ doesn't have this flag but clang does
CHECK_CXX_COMPILER_FLAG("-Wweak-vtables" CXX_WEAK_VTABLES)
@@ -477,6 +506,14 @@ else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
+# Test case for neon function.
+option(UNIT_SIMD "Simd funtion test case, default is OFF" OFF)
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ if (UNIT_SIMD)
+ add_subdirectory(unit-simd)
+ endif()
+endif()
+
add_subdirectory(util)
add_subdirectory(doc/dev-reference)
@@ -573,7 +610,14 @@ set_source_files_properties(
PROPERTIES
COMPILE_FLAGS "${RAGEL_C_FLAGS}")
-ragelmaker(src/parser/control_verbs.rl)
+
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
+ ragelmaker(src/parser/control_verbs.rl)
+endif()
+
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ ragelcopyer(src/parser/control_verbs.rl)
+endif()
SET(hs_HEADERS
src/hs.h
diff --git a/cmake/config.h.in b/cmake/config.h.in
index 5454643..336cf19 100644
--- a/cmake/config.h.in
+++ b/cmake/config.h.in
@@ -15,6 +15,9 @@
/* "Define if building for EM64T" */
#cmakedefine ARCH_X86_64
+/* "Define if building for aarch64" */
+#cmakedefine ARCH_AARCH64
+
/* internal build, switch on dump support. */
#cmakedefine DUMP_SUPPORT
@@ -48,6 +51,12 @@
/* C compiler has intrin.h */
#cmakedefine HAVE_C_INTRIN_H
+/* C++ compiler has arm_neon.h */
+#cmakedefine HAVE_CXX_ARM_NEON_H
+
+/* C compiler has arm_neon.h */
+#cmakedefine HAVE_C_ARM_NEON_H
+
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
diff --git a/cmake/platform.cmake b/cmake/platform.cmake
index 593c544..213dcc5 100644
--- a/cmake/platform.cmake
+++ b/cmake/platform.cmake
@@ -1,9 +1,14 @@
# determine the target arch
# really only interested in the preprocessor here
-CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_64_BIT)
+CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_X86_64)
-CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_32_BIT)
+CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32)
-set(ARCH_X86_64 ${ARCH_64_BIT})
-set(ARCH_IA32 ${ARCH_32_BIT})
+CHECK_C_SOURCE_COMPILES("#if !(defined(__aarch64__))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)
+
+if (ARCH_X86_64 OR ARCH_AARCH64)
+ set(ARCH_64_BIT 1)
+elseif (ARCH_IA32)
+ set(ARCH_32_BIT 1)
+endif()
\ No newline at end of file
diff --git a/cmake/ragel.cmake b/cmake/ragel.cmake
index d3f0b92..3356cb9 100644
--- a/cmake/ragel.cmake
+++ b/cmake/ragel.cmake
@@ -14,3 +14,23 @@ function(ragelmaker src_rl)
set_source_files_properties(${rl_out} PROPERTIES GENERATED TRUE)
endfunction(ragelmaker)
+ # On the aarch64 platform, char is unsigned by default, so in order to be consistent with
+ # the x86 platform, we will add -fsigned-char to the compile option to force the char type.
+ # However, when the ragel generates c++ code, the char variable used will still be considered
+ # unsigned, resulting in the overflow of the char variable value in the generated code,
+ # resulting in some errors.
+ # function for copying the previously modified code to the specified path
+
+ function(ragelcopyer src_rl)
+ get_filename_component(src_dir ${src_rl} PATH) # old cmake needs PATH
+ get_filename_component(src_file ${src_rl} NAME_WE)
+ set(rl_out ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp)
+ add_custom_command(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp
+ COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}
+ COMMAND ${COPY} -f ${CMAKE_CURRENT_SOURCE_DIR}/${src_dir}/${src_file}.cpp ${rl_out} 2>/dev/null ||:
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src_dir}/${src_file}.cpp
+ )
+ add_custom_target(ragel_${src_file} DEPENDS ${rl_out})
+ set_source_files_properties(${rl_out} PROPERTIES GENERATED TRUE)
+ endfunction(ragelcopyer)
\ No newline at end of file
diff --git a/src/crc32.c b/src/crc32.c
index 1dae47b..4609c5d 100644
--- a/src/crc32.c
+++ b/src/crc32.c
@@ -32,6 +32,47 @@
#include "util/arch.h"
#include "util/intrinsics.h"
+#if defined(HAVE_NEON)
+
+#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
+#define CRC_WORD 8
+#define CRC_TYPE u64a
+static really_inline
+u32 crc32c_neon(u32 running_crc, const unsigned char * p_buf, const size_t length)
+{
+ u32 crc=running_crc;
+
+ //Processbyte-by-byteuntilp_bufisaligned
+ const unsigned char * aligned_buf = ROUNDUP_PTR(p_buf, CRC_WORD);
+ size_t init_bytes = aligned_buf - p_buf;
+ size_t running_length = ((length - init_bytes) / CRC_WORD) * CRC_WORD;
+ size_t end_bytes = length - init_bytes - running_length;
+
+ while(p_buf < aligned_buf){
+ CRC32CB(crc, *p_buf);
+ p_buf++;
+ }
+
+ //Main aligned loop, processes a word at a time.
+ for(size_t li = 0; li < running_length / CRC_WORD; li++){
+ CRC_TYPE block = *(const CRC_TYPE *)p_buf;
+ CRC32CX(crc,block);
+ p_buf += CRC_WORD;
+ }
+
+ //Remainingbytes
+ for(size_t li = 0; li < end_bytes; li++){
+ CRC32CB(crc,*p_buf);
+ p_buf++;
+ }
+ return crc;
+}
+#endif
+
+
#if !defined(HAVE_SSE42)
/***
@@ -636,6 +677,8 @@ u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
#if defined(HAVE_SSE42)
u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
+#elif defined(HAVE_NEON)
+ u32 crc = crc32c_neon(inCrc32, (const unsigned char *)buf, bufLen);
#else
u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
#endif
diff --git a/src/fdr/fdr.c b/src/fdr/fdr.c
index d33756d..718f169 100644
--- a/src/fdr/fdr.c
+++ b/src/fdr/fdr.c
@@ -127,6 +127,13 @@ u64a andn(const u32 a, const u8 *b) {
u64a r;
#if defined(HAVE_BMI) && !defined(NO_ASM)
__asm__ ("andn\t%2,%1,%k0" : "=r"(r) : "r"(a), "m"(*(const u32 *)b));
+#elif defined(HAVE_NEON)
+ __asm__ __volatile__("ldr w0, %w2 \n\t"
+ "bic %w0,w0,%w1 \n\t"
+ : "=r"(r)
+ : "r"(a), "m"(*(const u32 *)b)
+ : "w0"
+ );
#else
r = unaligned_load_u32(b) & ~a;
#endif
@@ -159,7 +166,104 @@ void get_conf_stride_1(const u8 *itPtr, UNUSED const u8 *start_ptr,
UNUSED const u8 *end_ptr, u32 domain_mask_flipped,
const u64a *ft, u64a *conf0, u64a *conf8, m128 *s) {
/* +1: the zones ensure that we can read the byte at z->end */
- assert(itPtr >= start_ptr && itPtr + ITER_BYTES <= end_ptr);
+ assert(itPtr >= start_ptr && itPtr <= end_ptr);
+#if defined(HAVE_NEON)
+ domain_mask_flipped = ~domain_mask_flipped;
+
+ u32 reach0, reach1, reach2, reach3;
+ u64a ptr = unaligned_load_u64a(itPtr);
+
+ reach0 = ptr & domain_mask_flipped;
+ reach1 = ptr >> 8 & domain_mask_flipped;
+ reach2 = ptr >> 16 & domain_mask_flipped;
+ reach3 = ptr >> 24 & domain_mask_flipped;
+
+ m128 st0 = load_m128_from_u64a(ft + reach0);
+ m128 st1 = load_m128_from_u64a(ft + reach1);
+ m128 st2 = load_m128_from_u64a(ft + reach2);
+ m128 st3 = load_m128_from_u64a(ft + reach3);
+
+ u32 reach4, reach5, reach6, reach7;
+ ptr = unaligned_load_u64a(itPtr + 4);
+ reach4 = ptr & domain_mask_flipped;
+ reach5 = ptr >> 8 & domain_mask_flipped;
+ reach6 = ptr >> 16 & domain_mask_flipped;
+ reach7 = ptr >> 24 & domain_mask_flipped;
+
+ m128 st4 = load_m128_from_u64a(ft + reach4);
+ m128 st5 = load_m128_from_u64a(ft + reach5);
+ m128 st6 = load_m128_from_u64a(ft + reach6);
+ m128 st7 = load_m128_from_u64a(ft + reach7);
+
+ m128 zero = zeroes128();
+
+ st1.vect_s8 = vextq_s8(zero.vect_s8, st1.vect_s8, 15);
+ st2.vect_s8 = vextq_s8(zero.vect_s8, st2.vect_s8, 14);
+ st3.vect_s8 = vextq_s8(zero.vect_s8, st3.vect_s8, 13);
+ st4.vect_s8 = vextq_s8(zero.vect_s8, st4.vect_s8, 12);
+ st5.vect_s8 = vextq_s8(zero.vect_s8, st5.vect_s8, 11);
+ st6.vect_s8 = vextq_s8(zero.vect_s8, st6.vect_s8, 10);
+ st7.vect_s8 = vextq_s8(zero.vect_s8, st7.vect_s8, 9);
+
+ st0 = or128(st0, st1);
+ st2 = or128(st2, st3);
+ st4 = or128(st4, st5);
+ st6 = or128(st6, st7);
+ st0 = or128(st0, st2);
+ st4 = or128(st4, st6);
+ st0 = or128(st0, st4);
+ *s = or128(*s, st0);
+
+ *conf0 = movq(*s);
+ *s = rshiftbyte_m128(*s, 8);
+ *conf0 = ~(*conf0);
+
+ u32 reach8, reach9, reach10, reach11;
+ ptr = unaligned_load_u64a(itPtr + 8);
+ reach8 = ptr & domain_mask_flipped;
+ reach9 = ptr >> 8 & domain_mask_flipped;
+ reach10 = ptr >> 16 & domain_mask_flipped;
+ reach11 = ptr >> 24 & domain_mask_flipped;
+
+ m128 st8 = load_m128_from_u64a(ft + reach8);
+ m128 st9 = load_m128_from_u64a(ft + reach9);
+ m128 st10 = load_m128_from_u64a(ft + reach10);
+ m128 st11 = load_m128_from_u64a(ft + reach11);
+
+ u32 reach12, reach13, reach14, reach15;
+ ptr = unaligned_load_u64a(itPtr + 12);
+ reach12 = ptr & domain_mask_flipped;
+ reach13 = ptr >> 8 & domain_mask_flipped;
+ reach14 = ptr >> 16 & domain_mask_flipped;
+ reach15 = ptr >> 24 & domain_mask_flipped;
+
+ m128 st12 = load_m128_from_u64a(ft + reach12);
+ m128 st13 = load_m128_from_u64a(ft + reach13);
+ m128 st14 = load_m128_from_u64a(ft + reach14);
+ m128 st15 = load_m128_from_u64a(ft + reach15);
+
+ st9.vect_s8 = vextq_s8(zero.vect_s8, st9.vect_s8, 15);
+ st10.vect_s8 = vextq_s8(zero.vect_s8, st10.vect_s8, 14);
+ st11.vect_s8 = vextq_s8(zero.vect_s8, st11.vect_s8, 13);
+ st12.vect_s8 = vextq_s8(zero.vect_s8, st12.vect_s8, 12);
+ st13.vect_s8 = vextq_s8(zero.vect_s8, st13.vect_s8, 11);
+ st14.vect_s8 = vextq_s8(zero.vect_s8, st14.vect_s8, 10);
+ st15.vect_s8 = vextq_s8(zero.vect_s8, st15.vect_s8, 9);
+
+ st8 = or128(st8, st9);
+ st10 = or128(st10, st11);
+ st12 = or128(st12, st13);
+ st14 = or128(st14, st15);
+ st8 = or128(st8, st10);
+ st12 = or128(st12, st14);
+ st8 = or128(st8, st12);
+ *s = or128(*s, st8);
+
+ *conf8 = movq(*s);
+ *s = rshiftbyte_m128(*s, 8);
+ *conf8 = ~(*conf8);
+
+#else
u64a reach0 = andn(domain_mask_flipped, itPtr);
u64a reach1 = andn(domain_mask_flipped, itPtr + 1);
u64a reach2 = andn(domain_mask_flipped, itPtr + 2);
@@ -241,6 +345,8 @@ void get_conf_stride_1(const u8 *itPtr, UNUSED const u8 *start_ptr,
*conf8 = movq(*s);
*s = rshiftbyte_m128(*s, 8);
*conf8 ^= ~0ULL;
+
+#endif
}
static really_inline
@@ -349,12 +455,12 @@ void do_confirm_fdr(u64a *conf, u8 offset, hwlmcb_rv_t *control,
u32 bitRem = bit % bucket;
u32 idx = bitRem;
u32 cf = confBase[idx];
- if (!cf) {
+ if (unlikely(!cf)) {
continue;
}
const struct FDRConfirm *fdrc = (const struct FDRConfirm *)
((const u8 *)confBase + cf);
- if (!(fdrc->groups & *control)) {
+ if (unlikely(!(fdrc->groups & *control))) {
continue;
}
u64a confVal = unaligned_load_u64a(confLoc + byte - sizeof(u64a) + 1);
@@ -603,7 +709,7 @@ void createEndZone(const u8 *buf, const u8 *begin, const u8 *end,
assert(z_len > 0);
size_t iter_bytes_second = 0;
size_t z_len_first = z_len;
- if (z_len > ITER_BYTES) {
+ if (unlikely(z_len > ITER_BYTES)) {
z_len_first = z_len - ITER_BYTES;
iter_bytes_second = ITER_BYTES;
}
@@ -637,7 +743,7 @@ void createEndZone(const u8 *buf, const u8 *begin, const u8 *end,
/* copy the last 16 bytes, may overlap with the previous 8 byte write */
storeu128(z_end_first - sizeof(m128), loadu128(end_first - sizeof(m128)));
- if (iter_bytes_second) {
+ if (unlikely(iter_bytes_second)) {
storeu128(z_end - sizeof(m128), loadu128(end - sizeof(m128)));
}
@@ -658,7 +764,7 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
const u8 *ptr = buf + start;
size_t remaining = len - start;
- if (remaining <= ITER_BYTES) {
+ if (unlikely(remaining <= ITER_BYTES)) {
/* enough bytes to make only one zone */
createShortZone(buf, hend, ptr, buf + len, &zoneArr[0]);
return 1;
@@ -691,13 +797,25 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
#define INVALID_MATCH_ID (~0U)
+/* add prefetch for aarch64,
+ *- due to gcc4.8.5 do not support builtin_prefetch.
+ */
+#if defined(HAVE_NEON)
+#define PREFETCH __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(itPtr + 256)))
+#define P2ALIGN __asm__ __volatile__(".p2align 6")
+#else
+#define PREFETCH __builtin_prefetch(itPtr + ITER_BYTES)
+#define P2ALIGN
+#endif
+
#define FDR_MAIN_LOOP(zz, s, get_conf_fn) \
do { \
+ P2ALIGN; \
const u8 *tryFloodDetect = zz->floodPtr; \
const u8 *start_ptr = zz->start; \
- const u8 *end_ptr = zz->end; \
+ const u8 *end_ptr = zz->end - ITER_BYTES; \
\
- for (const u8 *itPtr = start_ptr; itPtr + ITER_BYTES <= end_ptr; \
+ for (const u8 *itPtr = start_ptr; itPtr <= end_ptr; \
itPtr += ITER_BYTES) { \
if (unlikely(itPtr > tryFloodDetect)) { \
tryFloodDetect = floodDetect(fdr, a, &itPtr, tryFloodDetect,\
@@ -707,7 +825,7 @@ size_t prepareZones(const u8 *buf, size_t len, const u8 *hend,
return HWLM_TERMINATED; \
} \
} \
- __builtin_prefetch(itPtr + ITER_BYTES); \
+ PREFETCH; \
u64a conf0; \
u64a conf8; \
get_conf_fn(itPtr, start_ptr, end_ptr, domain_mask_flipped, \
diff --git a/src/hs_valid_platform.c b/src/hs_valid_platform.c
index 59ad3f3..035d3ff 100644
--- a/src/hs_valid_platform.c
+++ b/src/hs_valid_platform.c
@@ -33,9 +33,16 @@
HS_PUBLIC_API
hs_error_t HS_CDECL hs_valid_platform(void) {
/* Hyperscan requires SSSE3, anything else is a bonus */
+#if defined(__x86_64__)
if (check_ssse3()) {
return HS_SUCCESS;
- } else {
+ }
+#else
+ if (check_neon()) {
+ return HS_SUCCESS;
+ }
+#endif
+ else {
return HS_ARCH_ERROR;
}
}
diff --git a/src/nfa/limex_exceptional.h b/src/nfa/limex_exceptional.h
index 6c7335f..8304215 100644
--- a/src/nfa/limex_exceptional.h
+++ b/src/nfa/limex_exceptional.h
@@ -131,7 +131,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + info->ctrlIndex;
char *repeat_state = ctx->repeat_state + info->stateOffset;
- if (e->trigger == LIMEX_TRIGGER_POS) {
+ if (unlikely(e->trigger == LIMEX_TRIGGER_POS)) {
char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState);
processPosTrigger(repeat, repeat_ctrl, repeat_state, offset,
cyclic_on);
@@ -140,7 +140,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
assert(e->trigger == LIMEX_TRIGGER_TUG);
enum TriggerResult rv =
processTugTrigger(repeat, repeat_ctrl, repeat_state, offset);
- if (rv == TRIGGER_FAIL) {
+ if (likely(rv == TRIGGER_FAIL)) {
*cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES;
DEBUG_PRINTF("tug found no valid matches in repeat state\n");
return 1; // continue
@@ -150,7 +150,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
assert(e->hasSquash == LIMEX_SQUASH_TUG);
*succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash));
return 1; // continue
- } else if (rv == TRIGGER_SUCCESS_CACHE) {
+ } else if (unlikely(rv == TRIGGER_SUCCESS_CACHE)) {
new_cache->br = 1;
} else {
assert(rv == TRIGGER_SUCCESS);
@@ -160,7 +160,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
}
// Some exceptions fire accepts.
- if (e->reports != MO_INVALID_IDX) {
+ if (unlikely(e->reports != MO_INVALID_IDX)) {
if (flags & CALLBACK_OUTPUT) {
const ReportID *reports =
(const ReportID *)((const char *)limex + e->reports);
@@ -171,7 +171,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
return 0; // halt
}
if (*cacheable == CACHE_RESULT) {
- if (!new_cache->reports || new_cache->reports == reports) {
+ if (likely(!new_cache->reports || new_cache->reports == reports)) {
new_cache->reports = reports;
} else {
*cacheable = DO_NOT_CACHE_RESULT;
@@ -194,8 +194,8 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG,
// Some exceptions squash states behind them. Note that we squash states in
// 'succ', not local_succ.
- if (e->hasSquash == LIMEX_SQUASH_CYCLIC
- || e->hasSquash == LIMEX_SQUASH_REPORT) {
+ if (unlikely(e->hasSquash == LIMEX_SQUASH_CYCLIC
+ || e->hasSquash == LIMEX_SQUASH_REPORT)) {
*succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash));
if (*cacheable == CACHE_RESULT) {
*cacheable = DO_NOT_CACHE_RESULT;
@@ -331,12 +331,12 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
u32 idx = local_index + base_index[t];
const EXCEPTION_T *e = &exceptions[idx];
- if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
+ if (unlikely(!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ,
#ifndef BIG_MODEL
&local_succ,
#endif
limex, offset, ctx, &new_cache, &cacheable,
- in_rev, flags)) {
+ in_rev, flags))) {
return PE_RV_HALT;
}
} while (word);
@@ -349,7 +349,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
*succ = OR_STATE(*succ, ctx->local_succ);
#endif
- if (cacheable == CACHE_RESULT) {
+ if (likely(cacheable == CACHE_RESULT)) {
ctx->cached_estate = estate;
#ifndef BIG_MODEL
ctx->cached_esucc = local_succ;
@@ -359,7 +359,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ,
ctx->cached_reports = new_cache.reports;
ctx->cached_br = new_cache.br;
} else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
- if (ctx->cached_br) {
+ if (unlikely(ctx->cached_br)) {
ctx->cached_estate = ZERO_STATE;
}
}
diff --git a/src/nfa/limex_internal.h b/src/nfa/limex_internal.h
index 23b1bd9..0e27c79 100644
--- a/src/nfa/limex_internal.h
+++ b/src/nfa/limex_internal.h
@@ -119,7 +119,7 @@ struct NFAException##size { \
u32 repeatOffset; /**< offset to NFARepeatInfo, or MO_INVALID_IDX */ \
u8 hasSquash; /**< from enum LimExSquash */ \
u8 trigger; /**< from enum LimExTrigger */ \
-}; \
+}__attribute__ ((aligned (16))); \
\
struct LimExNFA##size { \
u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \
diff --git a/src/nfa/limex_native.c b/src/nfa/limex_native.c
index f6f5809..8998830 100644
--- a/src/nfa/limex_native.c
+++ b/src/nfa/limex_native.c
@@ -77,7 +77,7 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
struct NFAContext32 *ctx, char in_rev, char flags) {
assert(estate != 0); // guaranteed by calling macro
- if (estate == ctx->cached_estate) {
+ if (unlikely(estate == ctx->cached_estate)) {
DEBUG_PRINTF("using cached succ from previous state\n");
*succ |= ctx->cached_esucc;
if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) {
@@ -103,21 +103,21 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ,
u32 bit = findAndClearLSB_32(&estate);
u32 idx = rank_in_mask32(limex->exceptionMask, bit);
const struct NFAException32 *e = &exceptions[idx];
- if (!runException32(e, s, succ, &local_succ, limex, offset, ctx,
- &new_cache, &cacheable, in_rev, flags)) {
+ if (unlikely(!runException32(e, s, succ, &local_succ, limex, offset, ctx,
+ &new_cache, &cacheable, in_rev, flags))) {
return PE_RV_HALT;
}
} while (estate != 0);
*succ |= local_succ;
- if (cacheable == CACHE_RESULT) {
+ if (unlikely(cacheable == CACHE_RESULT)) {
ctx->cached_estate = orig_estate;
ctx->cached_esucc = local_succ;
ctx->cached_reports = new_cache.reports;
ctx->cached_br = new_cache.br;
} else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) {
- if (ctx->cached_br) {
+ if (unlikely(ctx->cached_br)) {
ctx->cached_estate = 0U;
}
}
diff --git a/src/nfa/shufti.c b/src/nfa/shufti.c
index 09ffc0c..2cb74f0 100644
--- a/src/nfa/shufti.c
+++ b/src/nfa/shufti.c
@@ -153,13 +153,13 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
assert(buf < buf_end);
// Slow path for small cases.
- if (buf_end - buf < 16) {
+ if (unlikely(buf_end - buf < 16)) {
return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi,
buf, buf_end);
}
const m128 zeroes = zeroes128();
- const m128 low4bits = _mm_set1_epi8(0xf);
+ const m128 low4bits = set16x8(0xf);
const u8 *rv;
size_t min = (size_t)buf % 16;
@@ -179,6 +179,11 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *last_block = buf_end - 16;
while (buf < last_block) {
m128 lchars = load128(buf);
+
+#if defined(HAVE_NEON)
+ __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(buf + 256)));
+#endif
+
rv = fwdBlock(mask_lo, mask_hi, lchars, buf, low4bits, zeroes);
if (rv) {
return rv;
@@ -246,7 +251,7 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
}
const m128 zeroes = zeroes128();
- const m128 low4bits = _mm_set1_epi8(0xf);
+ const m128 low4bits = set16x8(0xf);
const u8 *rv;
assert(buf_end - buf >= 16);
@@ -320,7 +325,7 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
m128 mask2_lo, m128 mask2_hi,
const u8 *buf, const u8 *buf_end) {
const m128 ones = ones128();
- const m128 low4bits = _mm_set1_epi8(0xf);
+ const m128 low4bits = set16x8(0xf);
const u8 *rv;
size_t min = (size_t)buf % 16;
@@ -340,6 +345,11 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi,
const u8 *last_block = buf_end - 16;
while (buf < last_block) {
m128 lchars = load128(buf);
+
+#if defined(HAVE_NEON)
+ __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(buf + 256)));
+#endif
+
rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi,
lchars, buf, low4bits, ones);
if (rv) {
diff --git a/src/nfa/truffle.c b/src/nfa/truffle.c
index be6b312..c05d778 100644
--- a/src/nfa/truffle.c
+++ b/src/nfa/truffle.c
@@ -41,7 +41,7 @@
static really_inline
const u8 *lastMatch(const u8 *buf, u32 z) {
- if (unlikely(z != 0xffff)) {
+ if (z != 0xffff) {
u32 pos = clz32(~z & 0xffff);
assert(pos >= 16 && pos < 32);
return buf + (31 - pos);
@@ -52,7 +52,7 @@ const u8 *lastMatch(const u8 *buf, u32 z) {
static really_inline
const u8 *firstMatch(const u8 *buf, u32 z) {
- if (unlikely(z != 0xffff)) {
+ if (likely(z != 0xffff)) {
u32 pos = ctz32(~z & 0xffff);
assert(pos < 16);
return buf + pos;
@@ -64,8 +64,8 @@ const u8 *firstMatch(const u8 *buf, u32 z) {
static really_inline
u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) {
- m128 highconst = _mm_set1_epi8(0x80);
- m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201);
+ m128 highconst = set16x8(0x80);
+ m128 shuf_mask_hi = set2x64(0x8040201008040201);
// and now do the real work
m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v);
@@ -124,7 +124,7 @@ const u8 *truffleExec(m128 shuf_mask_lo_highclear,
assert(buf < buf_end);
const u8 *rv;
- if (buf_end - buf < 16) {
+ if (unlikely(buf_end - buf < 16)) {
return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf,
buf_end);
}
diff --git a/src/parser/control_verbs.cpp b/src/parser/control_verbs.cpp
new file mode 100644
index 0000000..482004d
--- /dev/null
+++ b/src/parser/control_verbs.cpp
@@ -0,0 +1,340 @@
+
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * \brief Parser for control verbs that can occur at the beginning of a pattern.
+ */
+
+#include "parser/control_verbs.h"
+
+#include "parser/Parser.h"
+#include "parser/parse_error.h"
+
+#include <cstring>
+#include <sstream>
+
+using namespace std;
+
+namespace ue2 {
+
+const char *read_control_verbs(const char *ptr, const char *end, size_t start,
+ ParseMode &mode) {
+ const char *p = ptr;
+ const char *pe = end;
+ const char *eof = pe;
+ const char *ts, *te;
+ int cs;
+ UNUSED int act;
+
+ static const char _ControlVerbs_actions[] = {
+ 0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9};
+
+ static const unsigned char _ControlVerbs_key_offsets[] = {
+ 0, 7, 8, 10, 12, 14, 16, 18, 20, 21, 23, 25, 27,
+ 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 55,
+ 57, 59, 61, 63, 66, 68, 70, 72, 74, 76, 79, 82, 84,
+ 86, 88, 90, 92, 94, 96, 98, 100, 102, 105, 107, 109, 111,
+ 113, 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137,
+ 139, 141, 143, 146, 148, 149, 151, 155, 157, 159, 160, 161};
+
+ static const char _ControlVerbs_trans_keys[] = {
+ 41, 65, 66, 67, 76, 78, 85, 41, 41, 78, 41, 89, 41, 67, 41, 82, 41,
+ 76, 41, 70, 41, 41, 83, 41, 82, 41, 95, 41, 65, 85, 41, 78, 41, 89,
+ 41, 67, 41, 78, 41, 73, 41, 67, 41, 79, 41, 68, 41, 69, 41, 82, 41,
+ 76, 41, 70, 73, 41, 77, 41, 73, 41, 84, 41, 95, 41, 77, 82, 41, 65,
+ 41, 84, 41, 67, 41, 72, 41, 61, 41, 48, 57, 41, 48, 57, 41, 69, 41,
+ 67, 41, 85, 41, 82, 41, 83, 41, 73, 41, 79, 41, 78, 41, 79, 41, 95,
+ 41, 65, 83, 41, 85, 41, 84, 41, 79, 41, 95, 41, 80, 41, 79, 41, 83,
+ 41, 83, 41, 69, 41, 83, 41, 83, 41, 84, 41, 65, 41, 82, 41, 84, 41,
+ 95, 41, 79, 41, 80, 41, 84, 41, 67, 84, 41, 80, 41, 41, 70, 41, 49,
+ 51, 56, 41, 54, 41, 50, 41, 40, 42, 0};
+
+ static const char _ControlVerbs_single_lengths[] = {
+ 7, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 4, 2, 2, 1, 1, 1};
+
+ static const char _ControlVerbs_range_lengths[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ static const short _ControlVerbs_index_offsets[] = {
+ 0, 8, 10, 13, 16, 19, 22, 25, 28, 30, 33, 36, 39,
+ 43, 46, 49, 52, 55, 58, 61, 64, 67, 70, 73, 76, 80,
+ 83, 86, 89, 92, 96, 99, 102, 105, 108, 111, 114, 117, 120,
+ 123, 126, 129, 132, 135, 138, 141, 144, 147, 151, 154, 157, 160,
+ 163, 166, 169, 172, 175, 178, 181, 184, 187, 190, 193, 196, 199,
+ 202, 205, 208, 212, 215, 217, 220, 225, 228, 231, 233, 235};
+
+ static const char _ControlVerbs_indicies[] = {
+ 0, 2, 3, 4, 5, 6, 7, 1, 8, 1, 8, 9, 1, 8, 10, 1, 11,
+ 12, 1, 8, 13, 1, 8, 14, 1, 8, 15, 1, 11, 1, 8, 16, 1, 8,
+ 17, 1, 8, 18, 1, 8, 19, 20, 1, 8, 21, 1, 8, 22, 1, 8, 12,
+ 1, 8, 23, 1, 8, 24, 1, 8, 25, 1, 8, 26, 1, 8, 27, 1, 8,
+ 15, 1, 8, 28, 1, 11, 14, 1, 8, 15, 29, 1, 8, 30, 1, 8, 31,
+ 1, 8, 32, 1, 8, 33, 1, 8, 34, 35, 1, 8, 36, 1, 8, 37, 1,
+ 8, 38, 1, 8, 39, 1, 8, 40, 1, 8, 41, 1, 11, 41, 1, 8, 42,
+ 1, 8, 43, 1, 8, 44, 1, 8, 45, 1, 8, 46, 1, 8, 47, 1, 8,
+ 48, 1, 8, 39, 1, 8, 49, 1, 8, 50, 1, 8, 51, 52, 1, 8, 53,
+ 1, 8, 54, 1, 8, 55, 1, 8, 56, 1, 8, 57, 1, 8, 58, 1, 8,
+ 59, 1, 8, 60, 1, 8, 61, 1, 8, 62, 1, 8, 15, 1, 8, 63, 1,
+ 8, 64, 1, 8, 65, 1, 8, 66, 1, 8, 67, 1, 8, 68, 1, 8, 69,
+ 1, 8, 15, 1, 8, 70, 71, 1, 8, 72, 1, 73, 1, 8, 74, 1, 75,
+ 76, 77, 78, 1, 8, 15, 1, 8, 15, 1, 75, 1, 80, 79, 82, 81, 0};
+
+ static const char _ControlVerbs_trans_targs[] = {
+ 75, 1, 2, 9, 22, 24, 45, 67, 75, 3, 4, 75, 5, 6, 7, 8, 10,
+ 11, 12, 13, 16, 14, 15, 17, 18, 19, 20, 21, 23, 25, 26, 27, 28, 29,
+ 30, 37, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46, 47,
+ 48, 59, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64,
+ 65, 66, 68, 70, 69, 75, 71, 75, 72, 73, 74, 75, 76, 75, 0};
+
+ static const char _ControlVerbs_trans_actions[] = {
+ 19, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 7, 0, 0, 0, 15, 5, 17, 0};
+
+ static const char _ControlVerbs_to_state_actions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0};
+
+ static const char _ControlVerbs_from_state_actions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0};
+
+ static const short _ControlVerbs_eof_trans[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 82};
+
+ static const int ControlVerbs_start = 75;
+ static const int ControlVerbs_first_final = 75;
+ static const int ControlVerbs_error = -1;
+
+ static const int ControlVerbs_en_main = 75;
+
+ {
+ cs = ControlVerbs_start;
+ ts = 0;
+ te = 0;
+ act = 0;
+ }
+
+ try {
+
+ {
+ int _klen;
+ unsigned int _trans;
+ const char *_acts;
+ unsigned int _nacts;
+ const char *_keys;
+
+ if (p == pe)
+ goto _test_eof;
+ _resume:
+ _acts =
+ _ControlVerbs_actions + _ControlVerbs_from_state_actions[cs];
+ _nacts = (unsigned int)*_acts++;
+ while (_nacts-- > 0) {
+ switch (*_acts++) {
+ case 1: {
+ ts = p;
+ } break;
+ }
+ }
+
+ _keys = _ControlVerbs_trans_keys + _ControlVerbs_key_offsets[cs];
+ _trans = _ControlVerbs_index_offsets[cs];
+
+ _klen = _ControlVerbs_single_lengths[cs];
+ if (_klen > 0) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + _klen - 1;
+ while (1) {
+ if (_upper < _lower)
+ break;
+
+ _mid = _lower + ((_upper - _lower) >> 1);
+ if ((*p) < *_mid)
+ _upper = _mid - 1;
+ else if ((*p) > *_mid)
+ _lower = _mid + 1;
+ else {
+ _trans += (unsigned int)(_mid - _keys);
+ goto _match;
+ }
+ }
+ _keys += _klen;
+ _trans += _klen;
+ }
+
+ _klen = _ControlVerbs_range_lengths[cs];
+ if (_klen > 0) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + (_klen << 1) - 2;
+ while (1) {
+ if (_upper < _lower)
+ break;
+
+ _mid = _lower + (((_upper - _lower) >> 1) & ~1);
+ if ((*p) < _mid[0])
+ _upper = _mid - 2;
+ else if ((*p) > _mid[1])
+ _lower = _mid + 2;
+ else {
+ _trans += (unsigned int)((_mid - _keys) >> 1);
+ goto _match;
+ }
+ }
+ _trans += _klen;
+ }
+
+ _match:
+ _trans = _ControlVerbs_indicies[_trans];
+ _eof_trans:
+ cs = _ControlVerbs_trans_targs[_trans];
+
+ if (_ControlVerbs_trans_actions[_trans] == 0)
+ goto _again;
+
+ _acts = _ControlVerbs_actions + _ControlVerbs_trans_actions[_trans];
+ _nacts = (unsigned int)*_acts++;
+ while (_nacts-- > 0) {
+ switch (*_acts++) {
+ case 2: {
+ te = p + 1;
+ } break;
+ case 3: {
+ te = p + 1;
+ { mode.utf8 = true; }
+ } break;
+ case 4: {
+ te = p + 1;
+ { mode.ucp = true; }
+ } break;
+ case 5: {
+ te = p + 1;
+ {
+ ostringstream str;
+ str << "Unsupported control verb "
+ << string(ts, te - ts);
+ throw LocatedParseError(str.str());
+ }
+ } break;
+ case 6: {
+ te = p + 1;
+ {
+ ostringstream str;
+ str << "Unknown control verb " << string(ts, te - ts);
+ throw LocatedParseError(str.str());
+ }
+ } break;
+ case 7: {
+ te = p + 1;
+ {
+ p--;
+ {
+ p++;
+ goto _out;
+ }
+ }
+ } break;
+ case 8: {
+ te = p;
+ p--;
+ {
+ p--;
+ {
+ p++;
+ goto _out;
+ }
+ }
+ } break;
+ case 9: {
+ { p = ((te)) - 1; }
+ {
+ p--;
+ {
+ p++;
+ goto _out;
+ }
+ }
+ } break;
+ }
+ }
+
+ _again:
+ _acts = _ControlVerbs_actions + _ControlVerbs_to_state_actions[cs];
+ _nacts = (unsigned int)*_acts++;
+ while (_nacts-- > 0) {
+ switch (*_acts++) {
+ case 0: {
+ ts = 0;
+ } break;
+ }
+ }
+
+ if (++p != pe)
+ goto _resume;
+ _test_eof : {}
+ if (p == eof) {
+ if (_ControlVerbs_eof_trans[cs] > 0) {
+ _trans = _ControlVerbs_eof_trans[cs] - 1;
+ goto _eof_trans;
+ }
+ }
+
+ _out : {}
+ }
+
+ } catch (LocatedParseError &error) {
+ if (ts >= ptr && ts <= pe) {
+ error.locate(ts - ptr + start);
+ } else {
+ error.locate(0);
+ }
+ throw;
+ }
+
+ return p;
+}
+
+} // namespace ue2
diff --git a/src/rose/counting_miracle.h b/src/rose/counting_miracle.h
index 976208b..4456679 100644
--- a/src/rose/counting_miracle.h
+++ b/src/rose/counting_miracle.h
@@ -94,7 +94,7 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
u32 count = *count_inout;
const m128 zeroes = zeroes128();
- const m128 low4bits = _mm_set1_epi8(0xf);
+ const m128 low4bits = set16x8(0xf);
for (; d + 16 <= d_end; d_end -= 16) {
m128 data = loadu128(d_end - 16);
diff --git a/src/util/arch.h b/src/util/arch.h
index 985fec6..fe4a910 100644
--- a/src/util/arch.h
+++ b/src/util/arch.h
@@ -61,6 +61,10 @@
#define HAVE_AVX512VBMI
#endif
+#if defined(__aarch64__)
+#define HAVE_NEON
+#endif
+
/*
* ICC and MSVC don't break out POPCNT or BMI/2 as separate pre-def macros
*/
@@ -87,4 +91,11 @@
#define NO_ASM
#endif
+/*
+ * AARCH64 uses a different form of inline asm
+ */
+#if defined(__aarch64__)
+#define NO_ASM
+#endif
+
#endif // UTIL_ARCH_H_
diff --git a/src/util/cpuid_flags.c b/src/util/cpuid_flags.c
index c00ce58..96286ee 100644
--- a/src/util/cpuid_flags.c
+++ b/src/util/cpuid_flags.c
@@ -40,6 +40,7 @@
u64a cpuid_flags(void) {
u64a cap = 0;
+#if defined(__X86_64__)
if (check_avx2()) {
DEBUG_PRINTF("AVX2 enabled\n");
cap |= HS_CPU_FEATURES_AVX2;
@@ -67,6 +68,7 @@ u64a cpuid_flags(void) {
#if (!defined(FAT_RUNTIME) && !defined(HAVE_AVX512VBMI)) || \
(defined(FAT_RUNTIME) && !defined(BUILD_AVX512VBMI))
cap &= ~HS_CPU_FEATURES_AVX512VBMI;
+#endif
#endif
return cap;
@@ -78,6 +80,7 @@ struct family_id {
u32 tune;
};
+#if defined(__X86_64__)
/* from table 35-1 of the Intel 64 and IA32 Arch. Software Developer's Manual
* and "Intel Architecture and Processor Identification With CPUID Model and
* Family Numbers" */
@@ -121,6 +124,7 @@ static const struct family_id known_microarch[] = {
{ 0x6, 0x6C, HS_TUNE_FAMILY_ICX }, /* Icelake Xeon */
};
+#endif
#ifdef DUMP_SUPPORT
static UNUSED
@@ -144,6 +148,7 @@ const char *dumpTune(u32 tune) {
#endif
u32 cpuid_tune(void) {
+#if defined(__X86_64__)
unsigned int eax, ebx, ecx, edx;
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
@@ -171,6 +176,7 @@ u32 cpuid_tune(void) {
DEBUG_PRINTF("found tune flag %s\n", dumpTune(tune) );
return tune;
}
+#endif
return HS_TUNE_FAMILY_GENERIC;
}
diff --git a/src/util/cpuid_flags.h b/src/util/cpuid_flags.h
index 527c6d5..3125bd1 100644
--- a/src/util/cpuid_flags.h
+++ b/src/util/cpuid_flags.h
@@ -32,7 +32,9 @@
#include "ue2common.h"
#if !defined(_WIN32) && !defined(CPUID_H_)
+#if defined(__x86_64__)
#include <cpuid.h>
+#endif
/* system header doesn't have a header guard */
#define CPUID_H_
#endif
diff --git a/src/util/cpuid_inline.h b/src/util/cpuid_inline.h
index b7b4245..b228c1d 100644
--- a/src/util/cpuid_inline.h
+++ b/src/util/cpuid_inline.h
@@ -32,17 +32,20 @@
#include "ue2common.h"
#include "cpuid_flags.h"
+#if defined(__x86_64__) || defined(_M_X64)
#if !defined(_WIN32) && !defined(CPUID_H_)
#include <cpuid.h>
/* system header doesn't have a header guard */
#define CPUID_H_
#endif
+#endif
#ifdef __cplusplus
extern "C"
{
#endif
+#if defined(__x86_64__) || defined(_M_X64)
static inline
void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
@@ -57,6 +60,7 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
*edx = a[3];
#endif
}
+#endif
// ECX
#define CPUID_SSE3 (1 << 0)
@@ -93,11 +97,12 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
#define CPUID_XCR0_AVX512 \
(CPUID_XCR0_OPMASK | CPUID_XCR0_ZMM_Hi256 | CPUID_XCR0_Hi16_ZMM)
+#if defined(__x86_64__)
static inline
u64a xgetbv(u32 op) {
#if defined(_WIN32) || defined(__INTEL_COMPILER)
return _xgetbv(op);
-#else
+#elif defined(__x86_64__)
u32 a, d;
__asm__ volatile (
"xgetbv\n"
@@ -252,6 +257,16 @@ int check_popcnt(void) {
cpuid(1, 0, &eax, &ebx, &ecx, &edx);
return !!(ecx & CPUID_POPCNT);
}
+#endif //__x86_64__
+
+static inline
+int check_neon(void) {
+#if defined(__aarch64__)
+ return 1;
+#else
+ return 0;
+#endif
+}
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/util/intrinsics.h b/src/util/intrinsics.h
index edc4f6e..ece3b1a 100644
--- a/src/util/intrinsics.h
+++ b/src/util/intrinsics.h
@@ -55,10 +55,22 @@
# endif
#endif
+#ifdef __cplusplus
+# if defined(HAVE_CXX_ARM_NEON_H)
+# define USE_ARM_NEON_H
+# endif
+#else // C
+# if defined(HAVE_C_ARM_NEON_H)
+# define USE_ARM_NEON_H
+# endif
+#endif
+
#if defined(USE_X86INTRIN_H)
#include <x86intrin.h>
#elif defined(USE_INTRIN_H)
#include <intrin.h>
+#elif defined(USE_ARM_NEON_H)
+#include <arm_neon.h>
#else
#error no intrinsics file
#endif
diff --git a/src/util/popcount.h b/src/util/popcount.h
index eb08f6b..7d794d1 100644
--- a/src/util/popcount.h
+++ b/src/util/popcount.h
@@ -41,6 +41,8 @@ u32 popcount32(u32 x) {
#if defined(HAVE_POPCOUNT_INSTR)
// Single-instruction builtin.
return _mm_popcnt_u32(x);
+#elif defined(HAVE_NEON)
+ return (u32)vaddlv_u8(vcnt_u8(vcreate_u8((u64a)x)));
#else
// Fast branch-free version from bit-twiddling hacks as older Intel
// processors do not have a POPCNT instruction.
@@ -63,7 +65,9 @@ u32 popcount64(u64a x) {
x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0f;
return (x * 0x0101010101010101) >> 56;
-# endif
+#endif
+#elif defined(HAVE_NEON)
+ return (u32)vaddlv_u8(vcnt_u8(vcreate_u8((u64a)x)));
#else
// Synthesise from two 32-bit cases.
return popcount32(x >> 32) + popcount32(x);
diff --git a/src/util/simd_arm.h b/src/util/simd_arm.h
new file mode 100644
index 0000000..cce119f
--- /dev/null
+++ b/src/util/simd_arm.h
@@ -0,0 +1,1069 @@
+/*
+ * Copyright (c) 2015-2017, Intel Corporation
+ * 2020.01 - Use the neon instruction to implement the function of 128-bit operation.
+ * Huawei Technologies Co., Ltd.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief SIMD types and primitive operations.
+ */
+
+#ifndef SIMD_ARM
+#define SIMD_ARM
+
+#include "config.h"
+#include "simd_types.h"
+#include "ue2common.h"
+#include "unaligned.h"
+#include "util/arch.h"
+#include "util/intrinsics.h"
+
+#include <string.h> // for memcpy
+
+// Define a common assume_aligned using an appropriate compiler built-in, if
+// it's available. Note that we need to handle C or C++ compilation.
+#ifdef __cplusplus
+#ifdef HAVE_CXX_BUILTIN_ASSUME_ALIGNED
+#define assume_aligned(x, y) __builtin_assume_aligned((x), (y))
+#endif
+#else
+#ifdef HAVE_CC_BUILTIN_ASSUME_ALIGNED
+#define assume_aligned(x, y) __builtin_assume_aligned((x), (y))
+#endif
+#endif
+
+// Fallback to identity case.
+#ifndef assume_aligned
+#define assume_aligned(x, y) (x)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern const char vbs_mask_data[];
+#ifdef __cplusplus
+}
+#endif
+
+/*
+** extend 4.8.5 neon inline assembly functions
+*/
+__extension__ static __inline uint64x2_t __attribute__((__always_inline__))
+vmvnq_u64(uint64x2_t a) {
+ uint64x2_t result;
+ __asm__("mvn %0.16b,%1.16b" : "=w"(result) : "w"(a) : /* No clobbers */);
+ return result;
+}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+
+static really_inline m128 ones128(void) {
+ m128 result;
+ result.vect_s32 = vdupq_n_s32(0xFFFFFFFF);
+ return result;
+}
+
+static really_inline m128 zeroes128(void) {
+ m128 result;
+ result.vect_s32 = vdupq_n_s32(0x0);
+ return result;
+}
+
+/** \brief Return 1 if a and b are different otherwise 0 */
+static really_inline int diff128(m128 a, m128 b) {
+ return !!vaddlvq_s16(veorq_s16(a.vect_s16, b.vect_s16));
+}
+
+static really_inline int isnonzero128(m128 a) {
+ return !!diff128(a, zeroes128());
+}
+
+/**
+ * "Rich" version of diff128(). Takes two vectors a and b and returns a 4-bit
+ * mask indicating which 32-bit words contain differences.
+ */
+static really_inline u32 diffrich128(m128 a, m128 b) {
+ m128 tmp;
+ tmp.vect_u32 = vmvnq_u32(vceqq_u32(a.vect_u32, b.vect_u32));
+ return ((vgetq_lane_u32(tmp.vect_u32, 3) & 0x8) |
+ (vgetq_lane_u32(tmp.vect_u32, 2) & 0x4) |
+ (vgetq_lane_u32(tmp.vect_u32, 1) & 0x2) |
+ (vgetq_lane_u32(tmp.vect_u32, 0) & 0x1));
+}
+
+/**
+ * "Rich" version of diff128(), 64-bit variant. Takes two vectors a and b and
+ * returns a 4-bit mask indicating which 64-bit words contain differences.
+ */
+static really_inline u32 diffrich64_128(m128 a, m128 b) {
+ m128 tmp;
+ tmp.vect_u64 = vmvnq_u64(vceqq_u64(a.vect_u64, b.vect_u64));
+ return (u32)((vgetq_lane_u64(tmp.vect_u64, 1) & 0x4) |
+ (vgetq_lane_u64(tmp.vect_u64, 0) & 0x1));
+}
+
+static really_really_inline m128 lshift64_m128(m128 a, unsigned b) {
+ assert(b <= 63);
+ m128 result;
+ result.vect_s64 = vshlq_n_s64(a.vect_s64, b);
+ return result;
+}
+
+static really_really_inline m128 rshift64_m128(m128 a, int imm8) {
+ assert(imm8 >= 0 && imm8 <= 63);
+ if (unlikely(imm8 == 0)) {
+ return a;
+ }
+ m128 result;
+ result.vect_u64 = vshrq_n_u64(a.vect_u64, imm8);
+ return result;
+}
+
+static really_really_inline m128 eq128(m128 a, m128 b) {
+ m128 result;
+ result.vect_u8 = vceqq_s8(a.vect_s8, b.vect_s8);
+ return result;
+}
+
+static really_really_inline u32 movemask128(m128 a) {
+ m128 result;
+ result.vect_u8 = vshrq_n_u8(a.vect_u8, 7);
+ result.vect_u16 = vsraq_n_u16(result.vect_u16, result.vect_u16, 7);
+ result.vect_u32 = vsraq_n_u32(result.vect_u32, result.vect_u32, 14);
+ result.vect_u64 = vsraq_n_u64(result.vect_u64, result.vect_u64, 28);
+ return (u32)(vgetq_lane_u8(result.vect_u8, 0) |
+ ((u32)vgetq_lane_u8(result.vect_u8, 8) << 8));
+}
+
+static really_really_inline m128 rshiftbyte_m128(m128 a, int imm8) {
+ assert(imm8 >= 0 && imm8 <= 15);
+ m128 result;
+ result.vect_s8 = vextq_s8(a.vect_s8, vdupq_n_s8(0), imm8);
+ return result;
+}
+
+static really_really_inline m128 lshiftbyte_m128(m128 a, int imm8) {
+ assert(imm8 >= 0 && imm8 <= 15);
+ m128 result;
+ if (unlikely(imm8 == 0)) {
+ return a;
+ }
+ result.vect_s8 = vextq_s8(vdupq_n_s8(0), a.vect_s8, (16 - imm8));
+ return result;
+}
+
+static really_inline m128 set16x8(u8 c) {
+ m128 result;
+ result.vect_s8 = vdupq_n_s8(c);
+ return result;
+}
+
+static really_inline m128 set4x32(u32 c) {
+ m128 result;
+ result.vect_s32 = vdupq_n_s32(c);
+ return result;
+}
+
+static really_inline m128 set2x64(u64a c) {
+ m128 result;
+ result.vect_u64 = vdupq_n_u64(c);
+ return result;
+}
+
+static really_inline u32 movd(const m128 in) {
+ u32 result;
+ result = vgetq_lane_u32(in.vect_u32, 0);
+ return result;
+}
+
+static really_inline u64a movq(const m128 in) {
+ return vgetq_lane_u64(in.vect_u64, 0);
+}
+
+/* another form of movq */
+static really_inline m128 load_m128_from_u64a(const u64a *p) {
+ m128 result;
+ __asm__ __volatile__("ldr %d0, %1 \n\t"
+ : "=w"(result)
+ : "Utv"(*p)
+ : /* No clobbers */
+ );
+ return result;
+}
+
+/*The x86 platform does not perform the lower 2 bit operation.
+If the value of imm exceeds 2 bit, a compilation error occurs.*/
+static really_inline u32 extract32from128(m128 a, int imm) {
+ return vgetq_lane_s32(a.vect_s32, imm & 0x0003);
+}
+
+/*The x86 platform does not perform the lower 1 bit operation.
+If the value of imm exceeds 1 bit, a compilation error occurs.*/
+static really_inline u64a extract64from128(m128 a, int imm) {
+ return vgetq_lane_s64(a.vect_s64, imm & 0x0001);
+}
+
+#define extractlow64from256(a) movq(a.lo)
+#define extractlow32from256(a) movd(a.lo)
+
+/*The x86 platform does not perform the lower 2 bit operation.
+If the value of imm exceeds 2 bit, a compilation error occurs.*/
+static really_inline u32 extract32from256(m256 a, int imm) {
+ return vgetq_lane_s32((imm >> 2) ? a.hi.vect_s32 : a.lo.vect_s32,
+ imm & 0x0003);
+}
+
+/*The x86 platform does not perform the lower 1 bit operation.
+If the value of imm exceeds 1 bit, a compilation error occurs.*/
+static really_inline u64a extract64from256(m256 a, int imm) {
+ return vgetq_lane_s64((imm >> 1) ? a.hi.vect_s64 : a.lo.vect_s64,
+ imm & 0x0001);
+}
+
+static really_inline m128 and128(m128 a, m128 b) {
+ m128 result;
+ result.vect_s32 = vandq_s32(a.vect_s32, b.vect_s32);
+ return result;
+}
+
+static really_inline m128 not128(m128 a) {
+ m128 result;
+ result.vect_s32 = vmvnq_s32(a.vect_s32);
+ return result;
+}
+
+static really_inline m128 xor128(m128 a, m128 b) {
+ m128 result;
+ result.vect_s32 = veorq_s32(a.vect_s32, b.vect_s32);
+ return result;
+}
+
+static really_inline m128 or128(m128 a, m128 b) {
+ m128 result;
+ result.vect_s32 = vorrq_s32(a.vect_s32, b.vect_s32);
+ return result;
+}
+
+static really_inline m128 andnot128(m128 a, m128 b) {
+ m128 result;
+ result.vect_s32 = vbicq_s32(b.vect_s32, a.vect_s32);
+ return result;
+}
+
+// aligned load
+static really_inline m128 load128(const void *ptr) {
+ assert(ISALIGNED_N(ptr, alignof(m128)));
+ ptr = assume_aligned(ptr, 16);
+ m128 result;
+ result.vect_s32 = vld1q_s32((const int32_t *)ptr);
+ return result;
+}
+
+// aligned store
+static really_inline void store128(void *ptr, m128 a) {
+ assert(ISALIGNED_N(ptr, alignof(m128)));
+ ptr = assume_aligned(ptr, 16);
+ *(m128 *)ptr = a;
+}
+
+// unaligned load
+static really_inline m128 loadu128(const void *ptr) {
+ m128 result;
+ result.vect_s32 = vld1q_s32((const int32_t *)ptr);
+ return result;
+}
+
+// unaligned store
+static really_inline void storeu128(void *ptr, m128 a) {
+ vst1q_s32((int32_t *)ptr, a.vect_s32);
+}
+
+// packed unaligned store of first N bytes
+static really_inline void storebytes128(void *ptr, m128 a, unsigned int n) {
+ assert(n <= sizeof(a));
+ memcpy(ptr, &a, n);
+}
+
+// packed unaligned load of first N bytes, pad with zero
+static really_inline m128 loadbytes128(const void *ptr, unsigned int n) {
+ m128 a = zeroes128();
+ assert(n <= sizeof(a));
+ memcpy(&a, ptr, n);
+ return a;
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern const u8 simd_onebit_masks[];
+#ifdef __cplusplus
+}
+#endif
+
+static really_inline m128 mask1bit128(unsigned int n) {
+ assert(n < sizeof(m128) * 8);
+ u32 mask_idx = ((n % 8) * 64) + 95;
+ mask_idx -= n / 8;
+ return loadu128(&simd_onebit_masks[mask_idx]);
+}
+
+// switches on bit N in the given vector.
+static really_inline void setbit128(m128 *ptr, unsigned int n) {
+ *ptr = or128(mask1bit128(n), *ptr);
+}
+
+// switches off bit N in the given vector.
+static really_inline void clearbit128(m128 *ptr, unsigned int n) {
+ *ptr = andnot128(mask1bit128(n), *ptr);
+}
+
+// tests bit N in the given vector.
+static really_inline char testbit128(m128 val, unsigned int n) {
+ const m128 mask = mask1bit128(n);
+ return isnonzero128(and128(mask, val));
+}
+
+// offset must be an immediate
+/*The x86 platform does not perform the lower 8 bit operation.
+If the value of imm exceeds 8 bit, a compilation error occurs.*/
+static really_inline m128 palignr(m128 a, m128 b, int count) {
+ m128 result;
+ count = count & 0xff;
+ if (likely(count < 16)) {
+ result.vect_s8 = vextq_s8(b.vect_s8, a.vect_s8, count);
+ } else if (count < 32) {
+ result.vect_s8 = vextq_s8(a.vect_s8, vdupq_n_s8(0x0), count - 16);
+ } else {
+ result.vect_s32 = vdupq_n_s32(0);
+ }
+ return result;
+}
+
+static really_inline m128 pshufb_m128(m128 a, m128 b) {
+ m128 result;
+ __asm__ __volatile__("movi v3.16b, 0x8f \n\t"
+ "and v3.16b, v3.16b, %2.16b \n\t"
+ "tbl %0.16b, {%1.16b}, v3.16b \n\t"
+ : "=w"(result)
+ : "w"(a), "w"(b)
+ : "v3");
+ return result;
+}
+
+static really_inline m256 pshufb_m256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = pshufb_m128(a.lo, b.lo);
+ rv.hi = pshufb_m128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m128 variable_byte_shift_m128(m128 in, s32 amount) {
+ assert(amount >= -16 && amount <= 16);
+ m128 shift_mask = loadu128(vbs_mask_data + 16 - amount);
+ return pshufb_m128(in, shift_mask);
+}
+
+static really_inline m128 max_u8_m128(m128 a, m128 b) {
+ m128 result;
+ result.vect_u8 = vmaxq_u8(a.vect_u8, b.vect_u8);
+ return result;
+}
+
+static really_inline m128 min_u8_m128(m128 a, m128 b) {
+ m128 result;
+ result.vect_u8 = vminq_u8(a.vect_u8, b.vect_u8);
+ return result;
+}
+
+static really_inline m128 sadd_u8_m128(m128 a, m128 b) {
+ m128 result;
+ result.vect_u8 = vqaddq_u8(a.vect_u8, b.vect_u8);
+ return result;
+}
+
+static really_inline m128 sub_u8_m128(m128 a, m128 b) {
+ m128 result;
+ result.vect_u8 = vsubq_u8(a.vect_u8, b.vect_u8);
+ return result;
+}
+
+static really_inline m128 set64x2(int64_t hi, int64_t lo) {
+ m128 result;
+ result.vect_s64 = vsetq_lane_s64(hi, vdupq_n_s64(lo), 1);
+ return result;
+}
+
+static really_inline m128 set32x4(int i3, int i2, int i1, int i0) {
+ m128 result;
+ result.vect_s32 = vsetq_lane_s32(
+ i3, vsetq_lane_s32(i2, vsetq_lane_s32(i1, vdupq_n_s32(i0), 1), 2), 3);
+ return result;
+}
+
+/****
+ **** 256-bit Primitives
+ ****/
+
+static really_really_inline m256 lshift64_m256(m256 a, int b) {
+ m256 rv = a;
+ rv.lo = lshift64_m128(rv.lo, b);
+ rv.hi = lshift64_m128(rv.hi, b);
+ return rv;
+}
+
+static really_inline m256 rshift64_m256(m256 a, int b) {
+ m256 rv = a;
+ rv.lo = rshift64_m128(rv.lo, b);
+ rv.hi = rshift64_m128(rv.hi, b);
+ return rv;
+}
+static really_inline m256 set32x8(u32 in) {
+ m256 rv;
+ rv.lo = set16x8((u8)in);
+ rv.hi = rv.lo;
+ return rv;
+}
+
+static really_inline m256 eq256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = eq128(a.lo, b.lo);
+ rv.hi = eq128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline u32 movemask256(m256 a) {
+ u32 lo_mask = movemask128(a.lo);
+ u32 hi_mask = movemask128(a.hi);
+ return lo_mask | (hi_mask << 16);
+}
+
+static really_inline m256 set2x128(m128 a) {
+ m256 rv = {a, a};
+ return rv;
+}
+
+static really_inline m256 zeroes256(void) {
+ m256 rv = {zeroes128(), zeroes128()};
+ return rv;
+}
+
+static really_inline m256 ones256(void) {
+ m256 rv = {ones128(), ones128()};
+ return rv;
+}
+
+static really_inline m256 and256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = and128(a.lo, b.lo);
+ rv.hi = and128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m256 or256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = or128(a.lo, b.lo);
+ rv.hi = or128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m256 xor256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = xor128(a.lo, b.lo);
+ rv.hi = xor128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m256 not256(m256 a) {
+ m256 rv;
+ rv.lo = not128(a.lo);
+ rv.hi = not128(a.hi);
+ return rv;
+}
+
+static really_inline m256 andnot256(m256 a, m256 b) {
+ m256 rv;
+ rv.lo = andnot128(a.lo, b.lo);
+ rv.hi = andnot128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline int diff256(m256 a, m256 b) {
+ return diff128(a.lo, b.lo) || diff128(a.hi, b.hi);
+}
+
+static really_inline int isnonzero256(m256 a) {
+ return isnonzero128(or128(a.lo, a.hi));
+}
+
+/**
+ * "Rich" version of diff256(). Takes two vectors a and b and returns an 8-bit
+ * mask indicating which 32-bit words contain differences.
+ */
+static really_inline u32 diffrich256(m256 a, m256 b) {
+ uint32x4_t x = vceqq_s32(a.lo.vect_s32, b.lo.vect_s32);
+ uint32x4_t y = vceqq_s32(a.hi.vect_s32, b.hi.vect_s32);
+ uint8x8_t lo = vqmovn_u16(vcombine_u16(vqmovn_u32(x), vqmovn_u32(y)));
+
+ static const int8_t __attribute__((aligned(16)))
+ xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0};
+ uint8x8_t mask_and = vdup_n_u8(0x80);
+ int8x8_t mask_shift = vld1_s8(xr);
+
+ lo = vand_u8(lo, mask_and);
+ lo = vshl_u8(lo, mask_shift);
+
+ lo = vpadd_u8(lo, lo);
+ lo = vpadd_u8(lo, lo);
+ lo = vpadd_u8(lo, lo);
+
+ return ~(lo[0] & 0xFF) & 0xff;
+}
+
+/**
+ * "Rich" version of diff256(), 64-bit variant. Takes two vectors a and b and
+ * returns an 8-bit mask indicating which 64-bit words contain differences.
+ */
+static really_inline u32 diffrich64_256(m256 a, m256 b) {
+ u32 d = diffrich256(a, b);
+ return (d | (d >> 1)) & 0x55555555;
+}
+
+// aligned load
+static really_inline m256 load256(const void *ptr) {
+ assert(ISALIGNED_N(ptr, alignof(m256)));
+ m256 rv = {load128(ptr), load128((const char *)ptr + 16)};
+ return rv;
+}
+
+// aligned load of 128-bit value to low and high part of 256-bit value
+static really_inline m256 load2x128(const void *ptr) {
+ assert(ISALIGNED_N(ptr, alignof(m128)));
+ m256 rv;
+ rv.hi = rv.lo = load128(ptr);
+ return rv;
+}
+
+static really_inline m256 loadu2x128(const void *ptr) {
+ return set2x128(loadu128(ptr));
+}
+
+// aligned store
+static really_inline void store256(void *ptr, m256 a) {
+ assert(ISALIGNED_N(ptr, alignof(m256)));
+ ptr = assume_aligned(ptr, 16);
+ *(m256 *)ptr = a;
+}
+
+// unaligned load
+static really_inline m256 loadu256(const void *ptr) {
+ m256 rv = {loadu128(ptr), loadu128((const char *)ptr + 16)};
+ return rv;
+}
+
+// unaligned store
+static really_inline void storeu256(void *ptr, m256 a) {
+ storeu128(ptr, a.lo);
+ storeu128((char *)ptr + 16, a.hi);
+}
+
+// packed unaligned store of first N bytes
+static really_inline void storebytes256(void *ptr, m256 a, unsigned int n) {
+ assert(n <= sizeof(a));
+ memcpy(ptr, &a, n);
+}
+
+// packed unaligned load of first N bytes, pad with zero
+static really_inline m256 loadbytes256(const void *ptr, unsigned int n) {
+ m256 a = zeroes256();
+ assert(n <= sizeof(a));
+ memcpy(&a, ptr, n);
+ return a;
+}
+
+static really_inline m256 mask1bit256(unsigned int n) {
+ assert(n < sizeof(m256) * 8);
+ u32 mask_idx = ((n % 8) * 64) + 95;
+ mask_idx -= n / 8;
+ return loadu256(&simd_onebit_masks[mask_idx]);
+}
+
+static really_inline m256 set64x4(u64a hi_1, u64a hi_0, u64a lo_1, u64a lo_0) {
+ m256 rv;
+ rv.hi = set64x2(hi_1, hi_0);
+ rv.lo = set64x2(lo_1, lo_0);
+ return rv;
+}
+
+// switches on bit N in the given vector.
+static really_inline void setbit256(m256 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo;
+ } else {
+ sub = &ptr->hi;
+ n -= 128;
+ }
+ setbit128(sub, n);
+}
+
+// switches off bit N in the given vector.
+static really_inline void clearbit256(m256 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo;
+ } else {
+ sub = &ptr->hi;
+ n -= 128;
+ }
+ clearbit128(sub, n);
+}
+
+// tests bit N in the given vector.
+static really_inline char testbit256(m256 val, unsigned int n) {
+ assert(n < sizeof(val) * 8);
+ m128 sub;
+ if (n < 128) {
+ sub = val.lo;
+ } else {
+ sub = val.hi;
+ n -= 128;
+ }
+ return testbit128(sub, n);
+}
+
+static really_really_inline m128 movdq_hi(m256 x) { return x.hi; }
+
+static really_really_inline m128 movdq_lo(m256 x) { return x.lo; }
+
+static really_inline m256 combine2x128(m128 hi, m128 lo) {
+ m256 rv = {lo, hi};
+ return rv;
+}
+
+/****
+ **** 384-bit Primitives
+ ****/
+
+static really_inline m384 and384(m384 a, m384 b) {
+ m384 rv;
+ rv.lo = and128(a.lo, b.lo);
+ rv.mid = and128(a.mid, b.mid);
+ rv.hi = and128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m384 or384(m384 a, m384 b) {
+ m384 rv;
+ rv.lo = or128(a.lo, b.lo);
+ rv.mid = or128(a.mid, b.mid);
+ rv.hi = or128(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m384 xor384(m384 a, m384 b) {
+ m384 rv;
+ rv.lo = xor128(a.lo, b.lo);
+ rv.mid = xor128(a.mid, b.mid);
+ rv.hi = xor128(a.hi, b.hi);
+ return rv;
+}
+static really_inline m384 not384(m384 a) {
+ m384 rv;
+ rv.lo = not128(a.lo);
+ rv.mid = not128(a.mid);
+ rv.hi = not128(a.hi);
+ return rv;
+}
+static really_inline m384 andnot384(m384 a, m384 b) {
+ m384 rv;
+ rv.lo = andnot128(a.lo, b.lo);
+ rv.mid = andnot128(a.mid, b.mid);
+ rv.hi = andnot128(a.hi, b.hi);
+ return rv;
+}
+
+static really_really_inline m384 lshift64_m384(m384 a, unsigned b) {
+ m384 rv;
+ rv.lo = lshift64_m128(a.lo, b);
+ rv.mid = lshift64_m128(a.mid, b);
+ rv.hi = lshift64_m128(a.hi, b);
+ return rv;
+}
+
+static really_inline m384 zeroes384(void) {
+ m384 rv = {zeroes128(), zeroes128(), zeroes128()};
+ return rv;
+}
+
+static really_inline m384 ones384(void) {
+ m384 rv = {ones128(), ones128(), ones128()};
+ return rv;
+}
+
+static really_inline int diff384(m384 a, m384 b) {
+ return diff128(a.lo, b.lo) || diff128(a.mid, b.mid) || diff128(a.hi, b.hi);
+}
+
+static really_inline int isnonzero384(m384 a) {
+ return isnonzero128(or128(or128(a.lo, a.mid), a.hi));
+}
+
+/**
+ * "Rich" version of diff384(). Takes two vectors a and b and returns a 12-bit
+ * mask indicating which 32-bit words contain differences.
+ */
+static really_inline u32 diffrich384(m384 a, m384 b) {
+ m128 z = zeroes128();
+ uint32x4_t x = vceqq_s32(a.lo.vect_s32, b.lo.vect_s32);
+ uint32x4_t y = vceqq_s32(a.mid.vect_s32, b.mid.vect_s32);
+ uint32x4_t w = vceqq_s32(a.hi.vect_s32, b.hi.vect_s32);
+
+ uint16x8_t q = vcombine_u16(vqmovn_u32(x), vqmovn_u32(y));
+ uint16x8_t p = vcombine_u16(vqmovn_u32(w), vqmovn_u32(z.vect_u32));
+
+ uint8x16_t input = vcombine_u8(vqmovn_u16(q), vqmovn_u16(p));
+
+ static const int8_t __attribute__((aligned(16)))
+ xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0};
+ uint8x8_t mask_and = vdup_n_u8(0x80);
+ int8x8_t mask_shift = vld1_s8(xr);
+
+ uint8x8_t lo = vget_low_u8(input);
+ uint8x8_t hi = vget_high_u8(input);
+
+ lo = vand_u8(lo, mask_and);
+ lo = vshl_u8(lo, mask_shift);
+
+ hi = vand_u8(hi, mask_and);
+ hi = vshl_u8(hi, mask_shift);
+
+ lo = vpadd_u8(lo, lo);
+ lo = vpadd_u8(lo, lo);
+ lo = vpadd_u8(lo, lo);
+
+ hi = vpadd_u8(hi, hi);
+ hi = vpadd_u8(hi, hi);
+ hi = vpadd_u8(hi, hi);
+
+ return ~((hi[0] << 8) | (lo[0] & 0xFF)) & 0xfff;
+}
+
+/**
+ * "Rich" version of diff384(), 64-bit variant. Takes two vectors a and b and
+ * returns a 12-bit mask indicating which 64-bit words contain differences.
+ */
+static really_inline u32 diffrich64_384(m384 a, m384 b) {
+ u32 d = diffrich384(a, b);
+ return (d | (d >> 1)) & 0x55555555;
+}
+
+// aligned load
+static really_inline m384 load384(const void *ptr) {
+ assert(ISALIGNED_16(ptr));
+ m384 rv = {load128(ptr), load128((const char *)ptr + 16),
+ load128((const char *)ptr + 32)};
+ return rv;
+}
+
+// aligned store
+static really_inline void store384(void *ptr, m384 a) {
+ assert(ISALIGNED_16(ptr));
+ ptr = assume_aligned(ptr, 16);
+ *(m384 *)ptr = a;
+}
+
+// unaligned load
+static really_inline m384 loadu384(const void *ptr) {
+ m384 rv = {loadu128(ptr), loadu128((const char *)ptr + 16),
+ loadu128((const char *)ptr + 32)};
+ return rv;
+}
+
+// packed unaligned store of first N bytes
+static really_inline void storebytes384(void *ptr, m384 a, unsigned int n) {
+ assert(n <= sizeof(a));
+ memcpy(ptr, &a, n);
+}
+
+// packed unaligned load of first N bytes, pad with zero
+static really_inline m384 loadbytes384(const void *ptr, unsigned int n) {
+ m384 a = zeroes384();
+ assert(n <= sizeof(a));
+ memcpy(&a, ptr, n);
+ return a;
+}
+
+// switches on bit N in the given vector.
+static really_inline void setbit384(m384 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo;
+ } else if (n < 256) {
+ sub = &ptr->mid;
+ } else {
+ sub = &ptr->hi;
+ }
+ setbit128(sub, n % 128);
+}
+
+// switches off bit N in the given vector.
+static really_inline void clearbit384(m384 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo;
+ } else if (n < 256) {
+ sub = &ptr->mid;
+ } else {
+ sub = &ptr->hi;
+ }
+ clearbit128(sub, n % 128);
+}
+
+// tests bit N in the given vector.
+static really_inline char testbit384(m384 val, unsigned int n) {
+ assert(n < sizeof(val) * 8);
+ m128 sub;
+ if (n < 128) {
+ sub = val.lo;
+ } else if (n < 256) {
+ sub = val.mid;
+ } else {
+ sub = val.hi;
+ }
+ return testbit128(sub, n % 128);
+}
+
+/****
+ **** 512-bit Primitives
+ ****/
+
+static really_inline m512 zeroes512(void) {
+ m512 rv = {zeroes256(), zeroes256()};
+ return rv;
+}
+
+static really_inline m512 ones512(void) {
+ m512 rv = {ones256(), ones256()};
+ return rv;
+}
+
+static really_inline m512 and512(m512 a, m512 b) {
+ m512 rv;
+ rv.lo = and256(a.lo, b.lo);
+ rv.hi = and256(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m512 or512(m512 a, m512 b) {
+ m512 rv;
+ rv.lo = or256(a.lo, b.lo);
+ rv.hi = or256(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m512 xor512(m512 a, m512 b) {
+ m512 rv;
+ rv.lo = xor256(a.lo, b.lo);
+ rv.hi = xor256(a.hi, b.hi);
+ return rv;
+}
+
+static really_inline m512 not512(m512 a) {
+ m512 rv;
+ rv.lo = not256(a.lo);
+ rv.hi = not256(a.hi);
+ return rv;
+}
+
+static really_inline m512 andnot512(m512 a, m512 b) {
+ m512 rv;
+ rv.lo = andnot256(a.lo, b.lo);
+ rv.hi = andnot256(a.hi, b.hi);
+ return rv;
+}
+
+static really_really_inline m512 lshift64_m512(m512 a, unsigned b) {
+ m512 rv;
+ rv.lo = lshift64_m256(a.lo, b);
+ rv.hi = lshift64_m256(a.hi, b);
+ return rv;
+}
+
+static really_inline int diff512(m512 a, m512 b) {
+ return diff256(a.lo, b.lo) || diff256(a.hi, b.hi);
+}
+
+static really_inline int isnonzero512(m512 a) {
+ m128 x = or128(a.lo.lo, a.lo.hi);
+ m128 y = or128(a.hi.lo, a.hi.hi);
+ return isnonzero128(or128(x, y));
+}
+
+/**
+ * "Rich" version of diff512(). Takes two vectors a and b and returns a 16-bit
+ * mask indicating which 32-bit words contain differences.
+ */
+static really_inline u32 diffrich512(m512 a, m512 b) {
+ uint32x4_t x = vceqq_s32(a.lo.lo.vect_s32, b.lo.lo.vect_s32);
+ uint32x4_t y = vceqq_s32(a.lo.hi.vect_s32, b.lo.hi.vect_s32);
+ uint32x4_t z = vceqq_s32(a.hi.lo.vect_s32, b.hi.lo.vect_s32);
+ uint32x4_t w = vceqq_s32(a.hi.hi.vect_s32, b.hi.hi.vect_s32);
+ uint16x8_t p = vcombine_u16(vqmovn_u32(x), vqmovn_u32(y));
+ uint16x8_t q = vcombine_u16(vqmovn_u32(z), vqmovn_u32(w));
+
+ uint8x16_t input = vcombine_u8(vqmovn_u16(p), vqmovn_u16(q));
+
+ static const int8_t __attribute__((aligned(16)))
+ xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0};
+ uint8x8_t mask_and = vdup_n_u8(0x80);
+ int8x8_t mask_shift = vld1_s8(xr);
+
+ uint8x8_t lo = vget_low_u8(input);
+ uint8x8_t hi = vget_high_u8(input);
+
+ lo = vand_u8(lo, mask_and);
+ lo = vshl_u8(lo, mask_shift);
+
+ hi = vand_u8(hi, mask_and);
+ hi = vshl_u8(hi, mask_shift);
+
+ lo = vpadd_u8(lo, lo);
+ lo = vpadd_u8(lo, lo);
+ lo = vpadd_u8(lo, lo);
+
+ hi = vpadd_u8(hi, hi);
+ hi = vpadd_u8(hi, hi);
+ hi = vpadd_u8(hi, hi);
+
+ return ~((hi[0] << 8) | (lo[0] & 0xFF)) & 0xffff;
+}
+
+/**
+ * "Rich" version of diffrich(), 64-bit variant. Takes two vectors a and b and
+ * returns a 16-bit mask indicating which 64-bit words contain differences.
+ */
+static really_inline u32 diffrich64_512(m512 a, m512 b) {
+ u32 d = diffrich512(a, b);
+ return (d | (d >> 1)) & 0x55555555;
+}
+
+// aligned load
+static really_inline m512 load512(const void *ptr) {
+ assert(ISALIGNED_N(ptr, alignof(m256)));
+ m512 rv = {load256(ptr), load256((const char *)ptr + 32)};
+ return rv;
+}
+
+// aligned store
+static really_inline void store512(void *ptr, m512 a) {
+ assert(ISALIGNED_N(ptr, alignof(m512)));
+ ptr = assume_aligned(ptr, 16);
+ *(m512 *)ptr = a;
+}
+
+// unaligned load
+static really_inline m512 loadu512(const void *ptr) {
+ m512 rv = {loadu256(ptr), loadu256((const char *)ptr + 32)};
+ return rv;
+}
+
+// packed unaligned store of first N bytes
+static really_inline void storebytes512(void *ptr, m512 a, unsigned int n) {
+ assert(n <= sizeof(a));
+ memcpy(ptr, &a, n);
+}
+
+// packed unaligned load of first N bytes, pad with zero
+static really_inline m512 loadbytes512(const void *ptr, unsigned int n) {
+ m512 a = zeroes512();
+ assert(n <= sizeof(a));
+ memcpy(&a, ptr, n);
+ return a;
+}
+
+static really_inline m512 mask1bit512(unsigned int n) {
+ assert(n < sizeof(m512) * 8);
+ u32 mask_idx = ((n % 8) * 64) + 95;
+ mask_idx -= n / 8;
+ return loadu512(&simd_onebit_masks[mask_idx]);
+}
+
+// switches on bit N in the given vector.
+static really_inline void setbit512(m512 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo.lo;
+ } else if (n < 256) {
+ sub = &ptr->lo.hi;
+ } else if (n < 384) {
+ sub = &ptr->hi.lo;
+ } else {
+ sub = &ptr->hi.hi;
+ }
+ setbit128(sub, n % 128);
+}
+
+// switches off bit N in the given vector.
+static really_inline void clearbit512(m512 *ptr, unsigned int n) {
+ assert(n < sizeof(*ptr) * 8);
+ m128 *sub;
+ if (n < 128) {
+ sub = &ptr->lo.lo;
+ } else if (n < 256) {
+ sub = &ptr->lo.hi;
+ } else if (n < 384) {
+ sub = &ptr->hi.lo;
+ } else {
+ sub = &ptr->hi.hi;
+ }
+ clearbit128(sub, n % 128);
+}
+
+// tests bit N in the given vector.
+static really_inline char testbit512(m512 val, unsigned int n) {
+ assert(n < sizeof(val) * 8);
+ m128 sub;
+ if (n < 128) {
+ sub = val.lo.lo;
+ } else if (n < 256) {
+ sub = val.lo.hi;
+ } else if (n < 384) {
+ sub = val.hi.lo;
+ } else {
+ sub = val.hi.hi;
+ }
+ return testbit128(sub, n % 128);
+}
+#pragma GCC diagnostic pop
+
+#endif
diff --git a/src/util/simd_types.h b/src/util/simd_types.h
index 962cad6..b3f96ea 100644
--- a/src/util/simd_types.h
+++ b/src/util/simd_types.h
@@ -35,6 +35,23 @@
#include "ue2common.h"
#if defined(HAVE_SSE2)
+typedef __m128i m128;
+#elif defined(HAVE_NEON)
+#include "arm_neon.h"
+
+typedef union {
+ int8x16_t vect_s8;
+ int16x8_t vect_s16;
+ int32x4_t vect_s32;
+ int64x2_t vect_s64;
+ uint8x16_t vect_u8;
+ uint16x8_t vect_u16;
+ uint32x4_t vect_u32;
+ uint64x2_t vect_u64;
+} __m128i;
+typedef float32x4_t __m128;
+typedef float64x2_t __m128d;
+
typedef __m128i m128;
#else
typedef struct ALIGN_DIRECTIVE {u64a hi; u64a lo;} m128;
diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h
new file mode 100644
index 0000000..9588d97
--- /dev/null
+++ b/src/util/simd_utils.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+
+#ifndef SIMD_UTILS
+#define SIMD_UTILS
+
+#if defined(__x86_64__)
+#include "simd_x86.h"
+#elif defined(__aarch64__)
+#include "simd_arm.h"
+#endif
+
+#endif
diff --git a/src/util/simd_x86.h b/src/util/simd_x86.h
index 5fa727e..5daaa74 100644
--- a/src/util/simd_x86.h
+++ b/src/util/simd_x86.h
@@ -1417,4 +1417,14 @@ char testbit512(m512 val, unsigned int n) {
#endif
}
+static really_inline m128 set2x64(u64a c)
+{
+ return _mm_set1_epi32(c);
+}
+
+static really_inline m128 set32x4(int i3, int i2, int i1, int i0)
+{
+ return _mm_set_epi32(i3, i2, i1, i0);
+}
+
#endif
diff --git a/tools/hscollider/CMakeLists.txt b/tools/hscollider/CMakeLists.txt
index a4d71b2..0c41ab9 100644
--- a/tools/hscollider/CMakeLists.txt
+++ b/tools/hscollider/CMakeLists.txt
@@ -21,7 +21,14 @@ set_source_files_properties(
PROPERTIES
COMPILE_FLAGS "${RAGEL_C_FLAGS} -I${CMAKE_CURRENT_SOURCE_DIR}")
-ragelmaker(ColliderCorporaParser.rl)
+
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
+ ragelmaker(ColliderCorporaParser.rl)
+endif()
+
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ ragelcopyer(ColliderCorporaParser.rl)
+endif()
if (BUILD_CHIMERA)
add_definitions(-DHS_HYBRID)
diff --git a/tools/hscollider/ColliderCorporaParser.cpp b/tools/hscollider/ColliderCorporaParser.cpp
new file mode 100644
index 0000000..5391473
--- /dev/null
+++ b/tools/hscollider/ColliderCorporaParser.cpp
@@ -0,0 +1,474 @@
+
+
+/*
+ * Copyright (c) 2015-2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "ColliderCorporaParser.h"
+#include "Corpora.h"
+
+#include "ue2common.h"
+
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+using namespace std;
+
+namespace /* anonymous */ {
+
+// Take a string like '\xFF' and convert it to the character it represents
+char unhex(const char *start, UNUSED const char *end) {
+ assert(start + 4 == end);
+ assert(start[0] == '\\');
+ assert(start[1] == 'x');
+ assert(isxdigit(start[2]));
+ assert(isxdigit(start[2]));
+
+ char temp[3] = {start[2], start[3], 0};
+
+ return strtol(temp, nullptr, 16);
+}
+
+static const char _FileCorporaParser_actions[] = {
+ 0, 1, 0, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9, 1, 10,
+ 1, 11, 1, 12, 1, 13, 1, 14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1,
+ 20, 1, 21, 1, 22, 1, 23, 1, 24, 2, 0, 2, 2, 3, 0, 3, 1, 0, 2};
+
+static const char _FileCorporaParser_key_offsets[] = {
+ 0, 0, 2, 6, 7, 13, 19, 25, 31, 34, 34, 35, 52, 54, 71, 72, 75, 79};
+
+static const char _FileCorporaParser_trans_keys[] = {
+ 48, 57, 58, 61, 48, 57, 34, 48, 57, 65, 70, 97, 102, 48,
+ 57, 65, 70, 97, 102, 48, 57, 65, 70, 97, 102, 48, 57, 65,
+ 70, 97, 102, 32, 48, 57, 92, 48, 97, 110, 114, 116, 118, 120,
+ 49, 57, 65, 90, 98, 100, 101, 102, 103, 122, 34, 92, 48, 97,
+ 110, 114, 116, 118, 120, 49, 57, 65, 90, 98, 100, 101, 102, 103,
+ 122, 58, 32, 48, 57, 32, 44, 48, 57, 32, 44, 0};
+
+static const char _FileCorporaParser_single_lengths[] = {
+ 0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 7, 2, 7, 1, 1, 2, 2};
+
+static const char _FileCorporaParser_range_lengths[] = {
+ 0, 1, 1, 0, 3, 3, 3, 3, 1, 0, 0, 5, 0, 5, 0, 1, 1, 0};
+
+static const char _FileCorporaParser_index_offsets[] = {
+ 0, 0, 2, 6, 8, 12, 16, 20, 24, 27, 28, 30, 43, 46, 59, 61, 64, 68};
+
+static const char _FileCorporaParser_indicies[] = {
+ 0, 1, 3, 4, 2, 1, 5, 1, 7, 7, 7, 6, 8, 8, 8, 6, 10, 10,
+ 10, 9, 11, 11, 11, 9, 12, 13, 1, 1, 15, 14, 18, 18, 18, 18, 18, 18,
+ 19, 16, 16, 16, 18, 16, 17, 21, 22, 20, 25, 25, 25, 25, 25, 25, 26, 23,
+ 23, 23, 25, 23, 24, 27, 1, 28, 29, 1, 31, 32, 13, 30, 31, 32, 30, 0};
+
+static const char _FileCorporaParser_trans_targs[] = {
+ 2, 0, 2, 9, 3, 9, 10, 5, 10, 12, 7, 12, 8, 16, 10, 11, 10,
+ 10, 10, 4, 12, 12, 13, 12, 12, 12, 6, 14, 8, 16, 15, 17, 15};
+
+static const char _FileCorporaParser_trans_actions[] = {
+ 53, 0, 47, 5, 0, 7, 25, 0, 15, 39, 0, 27, 0, 1, 21, 13, 23,
+ 19, 17, 0, 33, 35, 13, 37, 31, 29, 0, 41, 3, 50, 45, 0, 43};
+
+static const char _FileCorporaParser_to_state_actions[] = {
+ 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 9, 0, 9, 9, 0, 0};
+
+static const char _FileCorporaParser_from_state_actions[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 11, 0, 11, 11, 0, 0};
+
+static const char _FileCorporaParser_eof_trans[] = {
+ 0, 0, 0, 0, 7, 7, 10, 10, 0, 0, 0, 17, 0, 24, 0, 0, 31, 31};
+
+static const int FileCorporaParser_start = 1;
+static const int FileCorporaParser_first_final = 9;
+static const int FileCorporaParser_error = 0;
+
+static const int FileCorporaParser_en_corpus_old = 10;
+static const int FileCorporaParser_en_corpus_new = 12;
+static const int FileCorporaParser_en_colon_sep = 14;
+static const int FileCorporaParser_en_match_list = 15;
+static const int FileCorporaParser_en_main = 1;
+
+} // namespace
+
+bool parseCorpus(const string &line, Corpus &c, unsigned int &id) {
+ const char *p = line.c_str();
+ const char *pe = p + line.size();
+ const char *eof = pe;
+ const char *ts;
+ const char *te;
+ int cs;
+ UNUSED int act;
+
+ // For storing integers as they're scanned
+ unsigned int num = 0;
+
+ string &sout = c.data;
+
+ {
+ cs = FileCorporaParser_start;
+ ts = 0;
+ te = 0;
+ act = 0;
+ }
+
+ {
+ int _klen;
+ unsigned int _trans;
+ const char *_acts;
+ unsigned int _nacts;
+ const char *_keys;
+
+ if (p == pe)
+ goto _test_eof;
+ if (cs == 0)
+ goto _out;
+ _resume:
+ _acts = _FileCorporaParser_actions +
+ _FileCorporaParser_from_state_actions[cs];
+ _nacts = (unsigned int)*_acts++;
+ while (_nacts-- > 0) {
+ switch (*_acts++) {
+ case 7:
+
+ {
+ ts = p;
+ } break;
+ }
+ }
+
+ _keys =
+ _FileCorporaParser_trans_keys + _FileCorporaParser_key_offsets[cs];
+ _trans = _FileCorporaParser_index_offsets[cs];
+
+ _klen = _FileCorporaParser_single_lengths[cs];
+ if (_klen > 0) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + _klen - 1;
+ while (1) {
+ if (_upper < _lower)
+ break;
+
+ _mid = _lower + ((_upper - _lower) >> 1);
+ if ((*p) < *_mid)
+ _upper = _mid - 1;
+ else if ((*p) > *_mid)
+ _lower = _mid + 1;
+ else {
+ _trans += (unsigned int)(_mid - _keys);
+ goto _match;
+ }
+ }
+ _keys += _klen;
+ _trans += _klen;
+ }
+
+ _klen = _FileCorporaParser_range_lengths[cs];
+ if (_klen > 0) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + (_klen << 1) - 2;
+ while (1) {
+ if (_upper < _lower)
+ break;
+
+ _mid = _lower + (((_upper - _lower) >> 1) & ~1);
+ if ((*p) < _mid[0])
+ _upper = _mid - 2;
+ else if ((*p) > _mid[1])
+ _lower = _mid + 2;
+ else {
+ _trans += (unsigned int)((_mid - _keys) >> 1);
+ goto _match;
+ }
+ }
+ _trans += _klen;
+ }
+
+ _match:
+ _trans = _FileCorporaParser_indicies[_trans];
+ _eof_trans:
+ cs = _FileCorporaParser_trans_targs[_trans];
+
+ if (_FileCorporaParser_trans_actions[_trans] == 0)
+ goto _again;
+
+ _acts = _FileCorporaParser_actions +
+ _FileCorporaParser_trans_actions[_trans];
+ _nacts = (unsigned int)*_acts++;
+ while (_nacts-- > 0) {
+ switch (*_acts++) {
+ case 0:
+
+ {
+ num = (num * 10) + ((*p) - '0');
+ } break;
+ case 1:
+
+ {
+ num = 0;
+ } break;
+ case 2:
+
+ {
+ id = num;
+ } break;
+ case 3:
+
+ {
+ num = 0;
+ } break;
+ case 4:
+
+ {
+ {
+ cs = 10;
+ goto _again;
+ }
+ } break;
+ case 5:
+
+ {
+ c.hasMatches = true;
+ {
+ cs = 12;
+ goto _again;
+ }
+ } break;
+ case 8:
+
+ {
+ te = p + 1;
+ } break;
+ case 9:
+
+ {
+ te = p + 1;
+ { sout.push_back(unhex(ts, te)); }
+ } break;
+ case 10:
+
+ {
+ te = p + 1;
+ {
+ switch (*(ts + 1)) {
+ case '0':
+ sout.push_back('\x00');
+ break;
+ case 'a':
+ sout.push_back('\x07');
+ break;
+ case 'e':
+ sout.push_back('\x1b');
+ break;
+ case 'f':
+ sout.push_back('\x0c');
+ break;
+ case 'n':
+ sout.push_back('\x0a');
+ break;
+ case 'v':
+ sout.push_back('\x0b');
+ break;
+ case 'r':
+ sout.push_back('\x0d');
+ break;
+ case 't':
+ sout.push_back('\x09');
+ break;
+ default: {
+ p++;
+ goto _out;
+ }
+ }
+ }
+ } break;
+ case 11:
+
+ {
+ te = p + 1;
+ { sout.push_back(*(ts + 1)); }
+ } break;
+ case 12:
+
+ {
+ te = p + 1;
+ { sout.push_back(*ts); }
+ } break;
+ case 13:
+
+ {
+ te = p;
+ p--;
+ { sout.push_back(*ts); }
+ } break;
+ case 14:
+
+ {
+ { p = ((te)) - 1; }
+ { sout.push_back(*ts); }
+ } break;
+ case 15:
+
+ {
+ te = p + 1;
+ { sout.push_back(unhex(ts, te)); }
+ } break;
+ case 16:
+
+ {
+ te = p + 1;
+ {
+ switch (*(ts + 1)) {
+ case '0':
+ sout.push_back('\x00');
+ break;
+ case 'a':
+ sout.push_back('\x07');
+ break;
+ case 'e':
+ sout.push_back('\x1b');
+ break;
+ case 'f':
+ sout.push_back('\x0c');
+ break;
+ case 'n':
+ sout.push_back('\x0a');
+ break;
+ case 'v':
+ sout.push_back('\x0b');
+ break;
+ case 'r':
+ sout.push_back('\x0d');
+ break;
+ case 't':
+ sout.push_back('\x09');
+ break;
+ default: {
+ p++;
+ goto _out;
+ }
+ }
+ }
+ } break;
+ case 17:
+
+ {
+ te = p + 1;
+ { sout.push_back(*(ts + 1)); }
+ } break;
+ case 18:
+
+ {
+ te = p + 1;
+ { sout.push_back(*ts); }
+ } break;
+ case 19:
+
+ {
+ te = p + 1;
+ {
+ {
+ cs = 14;
+ goto _again;
+ }
+ }
+ } break;
+ case 20:
+
+ {
+ te = p;
+ p--;
+ { sout.push_back(*ts); }
+ } break;
+ case 21:
+
+ {
+ { p = ((te)) - 1; }
+ { sout.push_back(*ts); }
+ } break;
+ case 22:
+
+ {
+ te = p + 1;
+ {
+ {
+ cs = 15;
+ goto _again;
+ }
+ }
+ } break;
+ case 23:
+
+ {
+ te = p + 1;
+ { c.matches.insert(num); }
+ } break;
+ case 24:
+
+ {
+ te = p;
+ p--;
+ { c.matches.insert(num); }
+ } break;
+ }
+ }
+
+ _again:
+ _acts = _FileCorporaParser_actions +
+ _FileCorporaParser_to_state_actions[cs];
+ _nacts = (unsigned int)*_acts++;
+ while (_nacts-- > 0) {
+ switch (*_acts++) {
+ case 6:
+
+ {
+ ts = 0;
+ } break;
+ }
+ }
+
+ if (cs == 0)
+ goto _out;
+ if (++p != pe)
+ goto _resume;
+ _test_eof : {}
+ if (p == eof) {
+ if (_FileCorporaParser_eof_trans[cs] > 0) {
+ _trans = _FileCorporaParser_eof_trans[cs] - 1;
+ goto _eof_trans;
+ }
+ }
+
+ _out : {}
+ }
+
+ return (cs != FileCorporaParser_error) && (p == pe);
+}
diff --git a/unit/internal/simd_utils.cpp b/unit/internal/simd_utils.cpp
index 623c2c9..22945d6 100644
--- a/unit/internal/simd_utils.cpp
+++ b/unit/internal/simd_utils.cpp
@@ -663,7 +663,7 @@ TEST(SimdUtilsTest, movq) {
ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
ASSERT_EQ(0, memcmp(cmp, &r, sizeof(r)));
- simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
+ simd = set64x2(~0LL, 0x123456789abcdef);
r = movq(simd);
ASSERT_EQ(r, 0x123456789abcdef);
}
diff --git a/util/CMakeLists.txt b/util/CMakeLists.txt
index ea942ef..d7bef50 100644
--- a/util/CMakeLists.txt
+++ b/util/CMakeLists.txt
@@ -11,7 +11,13 @@ set_source_files_properties(
PROPERTIES
COMPILE_FLAGS "${RAGEL_C_FLAGS}")
-ragelmaker(ExpressionParser.rl)
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
+ ragelmaker(ExpressionParser.rl)
+endif()
+
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
+ ragelcopyer(ExpressionParser.rl)
+endif()
set(expressionutil_SRCS
expressions.cpp
diff --git a/util/ExpressionParser.cpp b/util/ExpressionParser.cpp
new file mode 100644
index 0000000..687fc39
--- /dev/null
+++ b/util/ExpressionParser.cpp
@@ -0,0 +1,397 @@
+
+
+/*
+ * Copyright (c) 2015-2018, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "ExpressionParser.h"
+
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+
+#include "hs_compile.h"
+#include "ue2common.h"
+
+using std::string;
+
+namespace { // anon
+
+enum ParamKey {
+ PARAM_NONE,
+ PARAM_MIN_OFFSET,
+ PARAM_MAX_OFFSET,
+ PARAM_MIN_LENGTH,
+ PARAM_EDIT_DISTANCE,
+ PARAM_HAMM_DISTANCE
+};
+
+static const char _ExpressionParser_actions[] = {0, 1, 0, 1, 1, 1, 2, 1, 3,
+ 1, 4, 1, 5, 1, 6, 1, 7, 1,
+ 9, 1, 10, 2, 8, 0
+
+};
+
+static const char _ExpressionParser_key_offsets[] = {
+ 0, 0, 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 23, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
+ 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 82};
+
+static const char _ExpressionParser_trans_keys[] = {
+ 32, 101, 104, 109, 32, 101, 104, 109, 100, 105, 116, 95, 100, 105,
+ 115, 116, 97, 110, 99, 101, 61, 48, 57, 32, 44, 125, 48, 57,
+ 32, 44, 125, 97, 109, 109, 105, 110, 103, 95, 100, 105, 115, 116,
+ 97, 110, 99, 101, 97, 105, 120, 95, 111, 102, 102, 115, 101, 116,
+ 110, 95, 108, 111, 101, 110, 103, 116, 104, 102, 102, 115, 101, 116,
+ 56, 67, 72, 76, 105, 109, 115, 123, 79, 81, 86, 87, 0};
+
+static const char _ExpressionParser_single_lengths[] = {
+ 0, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3, 3, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 0};
+
+static const char _ExpressionParser_range_lengths[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0};
+
+static const unsigned char _ExpressionParser_index_offsets[] = {
+ 0, 0, 5, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
+ 34, 36, 38, 43, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
+ 69, 71, 73, 75, 77, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98,
+ 100, 103, 105, 107, 109, 111, 113, 115, 117, 119, 121, 123, 134};
+
+static const char _ExpressionParser_trans_targs[] = {
+ 2, 3, 19, 34, 0, 2, 3, 19, 34, 0, 4, 0, 5, 0, 6, 0, 7,
+ 0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 0,
+ 16, 0, 17, 0, 18, 1, 57, 17, 0, 18, 1, 57, 0, 20, 0, 21, 0,
+ 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30,
+ 0, 31, 0, 32, 0, 33, 0, 15, 0, 35, 43, 0, 36, 0, 37, 0, 38,
+ 0, 39, 0, 40, 0, 41, 0, 42, 0, 15, 0, 44, 0, 45, 0, 46, 51,
+ 0, 47, 0, 48, 0, 49, 0, 50, 0, 15, 0, 52, 0, 53, 0, 54, 0,
+ 55, 0, 15, 0, 56, 56, 56, 56, 56, 56, 56, 1, 56, 56, 0, 0, 0};
+
+static const char _ExpressionParser_trans_actions[] = {
+ 17, 17, 17, 17, 19, 0, 0, 0, 0, 19, 0, 19, 0, 19, 0, 19, 0,
+ 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 13, 19,
+ 0, 19, 21, 19, 0, 5, 5, 1, 19, 0, 5, 5, 19, 0, 19, 0, 19,
+ 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0, 19, 0,
+ 19, 0, 19, 0, 19, 0, 19, 15, 19, 0, 0, 19, 0, 19, 0, 19, 0,
+ 19, 0, 19, 0, 19, 0, 19, 0, 19, 9, 19, 0, 19, 0, 19, 0, 0,
+ 19, 0, 19, 0, 19, 0, 19, 0, 19, 11, 19, 0, 19, 0, 19, 0, 19,
+ 0, 19, 7, 19, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 19, 19, 0};
+
+static const char _ExpressionParser_eof_actions[] = {
+ 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,
+ 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0};
+
+static const int ExpressionParser_start = 56;
+static const int ExpressionParser_first_final = 56;
+static const int ExpressionParser_error = 0;
+
+static const int ExpressionParser_en_main = 56;
+
+} // namespace
+
+static void initExt(hs_expr_ext *ext) {
+ memset(ext, 0, sizeof(*ext));
+ ext->max_offset = MAX_OFFSET;
+}
+
+bool HS_CDECL readExpression(const std::string &input, std::string &expr,
+ unsigned int *flags, hs_expr_ext *ext,
+ bool *must_be_ordered) {
+ assert(flags);
+ assert(ext);
+
+ // Init flags and ext params.
+ *flags = 0;
+ initExt(ext);
+ if (must_be_ordered) {
+ *must_be_ordered = false;
+ }
+
+ // Extract expr, which is easier to do in straight C++ than with Ragel.
+ if (input.empty() || input[0] != '/') {
+ return false;
+ }
+ size_t end = input.find_last_of('/');
+ if (end == string::npos || end == 0) {
+ return false;
+ }
+ expr = input.substr(1, end - 1);
+
+ // Use a Ragel scanner to handle flags and params.
+ const char *p = input.c_str() + end + 1;
+ const char *pe = input.c_str() + input.size();
+ UNUSED const char *eof = pe;
+ UNUSED const char *ts = p, *te = p;
+ int cs;
+ UNUSED int act;
+
+ assert(p);
+ assert(pe);
+
+ // For storing integers as they're scanned.
+ u64a num = 0;
+ enum ParamKey key = PARAM_NONE;
+
+ { cs = ExpressionParser_start; }
+
+ {
+ int _klen;
+ unsigned int _trans;
+ const char *_acts;
+ unsigned int _nacts;
+ const char *_keys;
+
+ if (p == pe)
+ goto _test_eof;
+ if (cs == 0)
+ goto _out;
+ _resume:
+ _keys =
+ _ExpressionParser_trans_keys + _ExpressionParser_key_offsets[cs];
+ _trans = _ExpressionParser_index_offsets[cs];
+
+ _klen = _ExpressionParser_single_lengths[cs];
+ if (_klen > 0) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + _klen - 1;
+ while (1) {
+ if (_upper < _lower)
+ break;
+
+ _mid = _lower + ((_upper - _lower) >> 1);
+ if ((*p) < *_mid)
+ _upper = _mid - 1;
+ else if ((*p) > *_mid)
+ _lower = _mid + 1;
+ else {
+ _trans += (unsigned int)(_mid - _keys);
+ goto _match;
+ }
+ }
+ _keys += _klen;
+ _trans += _klen;
+ }
+
+ _klen = _ExpressionParser_range_lengths[cs];
+ if (_klen > 0) {
+ const char *_lower = _keys;
+ const char *_mid;
+ const char *_upper = _keys + (_klen << 1) - 2;
+ while (1) {
+ if (_upper < _lower)
+ break;
+
+ _mid = _lower + (((_upper - _lower) >> 1) & ~1);
+ if ((*p) < _mid[0])
+ _upper = _mid - 2;
+ else if ((*p) > _mid[1])
+ _lower = _mid + 2;
+ else {
+ _trans += (unsigned int)((_mid - _keys) >> 1);
+ goto _match;
+ }
+ }
+ _trans += _klen;
+ }
+
+ _match:
+ cs = _ExpressionParser_trans_targs[_trans];
+
+ if (_ExpressionParser_trans_actions[_trans] == 0)
+ goto _again;
+
+ _acts =
+ _ExpressionParser_actions + _ExpressionParser_trans_actions[_trans];
+ _nacts = (unsigned int)*_acts++;
+ while (_nacts-- > 0) {
+ switch (*_acts++) {
+ case 0:
+
+ {
+ num = (num * 10) + ((*p) - '0');
+ } break;
+ case 1:
+
+ {
+ switch ((*p)) {
+ case 'i':
+ *flags |= HS_FLAG_CASELESS;
+ break;
+ case 's':
+ *flags |= HS_FLAG_DOTALL;
+ break;
+ case 'm':
+ *flags |= HS_FLAG_MULTILINE;
+ break;
+ case 'H':
+ *flags |= HS_FLAG_SINGLEMATCH;
+ break;
+ case 'O':
+ if (must_be_ordered) {
+ *must_be_ordered = true;
+ }
+ break;
+ case 'V':
+ *flags |= HS_FLAG_ALLOWEMPTY;
+ break;
+ case 'W':
+ *flags |= HS_FLAG_UCP;
+ break;
+ case '8':
+ *flags |= HS_FLAG_UTF8;
+ break;
+ case 'P':
+ *flags |= HS_FLAG_PREFILTER;
+ break;
+ case 'L':
+ *flags |= HS_FLAG_SOM_LEFTMOST;
+ break;
+ case 'C':
+ *flags |= HS_FLAG_COMBINATION;
+ break;
+ case 'Q':
+ *flags |= HS_FLAG_QUIET;
+ break;
+ default: {
+ p++;
+ goto _out;
+ }
+ }
+ } break;
+ case 2:
+
+ {
+ switch (key) {
+ case PARAM_MIN_OFFSET:
+ ext->flags |= HS_EXT_FLAG_MIN_OFFSET;
+ ext->min_offset = num;
+ break;
+ case PARAM_MAX_OFFSET:
+ ext->flags |= HS_EXT_FLAG_MAX_OFFSET;
+ ext->max_offset = num;
+ break;
+ case PARAM_MIN_LENGTH:
+ ext->flags |= HS_EXT_FLAG_MIN_LENGTH;
+ ext->min_length = num;
+ break;
+ case PARAM_EDIT_DISTANCE:
+ ext->flags |= HS_EXT_FLAG_EDIT_DISTANCE;
+ ext->edit_distance = num;
+ break;
+ case PARAM_HAMM_DISTANCE:
+ ext->flags |= HS_EXT_FLAG_HAMMING_DISTANCE;
+ ext->hamming_distance = num;
+ break;
+ case PARAM_NONE:
+ default:
+ // No key specified, syntax invalid.
+ return false;
+ }
+ } break;
+ case 3:
+
+ {
+ key = PARAM_MIN_OFFSET;
+ } break;
+ case 4:
+
+ {
+ key = PARAM_MAX_OFFSET;
+ } break;
+ case 5:
+
+ {
+ key = PARAM_MIN_LENGTH;
+ } break;
+ case 6:
+
+ {
+ key = PARAM_EDIT_DISTANCE;
+ } break;
+ case 7:
+
+ {
+ key = PARAM_HAMM_DISTANCE;
+ } break;
+ case 8:
+
+ {
+ num = 0;
+ } break;
+ case 9:
+
+ {
+ key = PARAM_NONE;
+ } break;
+ case 10:
+
+ {
+ return false;
+ } break;
+ }
+ }
+
+ _again:
+ if (cs == 0)
+ goto _out;
+ if (++p != pe)
+ goto _resume;
+ _test_eof : {}
+ if (p == eof) {
+ const char *__acts =
+ _ExpressionParser_actions + _ExpressionParser_eof_actions[cs];
+ unsigned int __nacts = (unsigned int)*__acts++;
+ while (__nacts-- > 0) {
+ switch (*__acts++) {
+ case 10:
+
+ {
+ return false;
+ } break;
+ }
+ }
+ }
+
+ _out : {}
+ }
+
+ DEBUG_PRINTF("expr='%s', flags=%u\n", expr.c_str(), *flags);
+
+ return (cs != ExpressionParser_error) && (p == pe);
+}
--
2.39.0
1
https://gitee.com/src-openeuler/hyperscan.git
git@gitee.com:src-openeuler/hyperscan.git
src-openeuler
hyperscan
hyperscan
master

搜索帮助

53164aa7 5694891 3bd8fe86 5694891