1 Star 0 Fork 0

unitwork / iwechen-thrift

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
String.cpp 4.29 KB
一键复制 编辑 原始数据 按行查看 历史
#include "String.h"
#include "trace_worker.h"
#include <string>
#include<vector>
#include <typeinfo>
String::String(std::string &str)
:m_string(str)
{
m_isUtf8 = IsUTF8(m_string.c_str(), m_string.size());
}
bool String::IsZhCh(char p)
{
/*汉字的两个字节的最高为都为1*/
if((p & 0x80) != 0)
{
return true;
}
return false;
}
bool String::IsUTF8(const void* pBuffer, long size)
{
bool IsUTF8 = true;
unsigned char* start = (unsigned char*)pBuffer;
unsigned char* end = (unsigned char*)pBuffer + size;
while (start < end)
{
if (*start < 0x80) // (10000000): 值小于0x80的为ASCII字符
{
start++;
}
else if (*start < (0xC0)) // (11000000): 值介于0x80与0xC0之间的为无效UTF-8字符
{
IsUTF8 = false;
break;
}
else if (*start < (0xE0)) // (11100000): 此范围内为2字节UTF-8字符
{
if (start >= end - 1)
break;
if ((start[1] & (0xC0)) != 0x80)
{
IsUTF8 = false;
break;
}
start += 2;
}
else if (*start < (0xF0)) // (11110000): 此范围内为3字节UTF-8字符
{
if (start >= end - 2)
break;
if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
{
IsUTF8 = false;
break;
}
start += 3;
}
else
{
IsUTF8 = false;
break;
}
}
return IsUTF8;
}
std::string String::Sub(int start, int end)
{ trace_worker();
trace_printf("start, end %d %d", start, end);
if (m_string.length() <= 0)
{
return "";
}
int len = m_string.length();
std::string tmp = "";
//先把str里的汉字和英文分开
int zhChLen = m_isUtf8 ? 3 : 2;
trace_printf("zhChLen %d", zhChLen);
std::vector <std::string> dump;
int i = 0;
trace_printf("m_string.c_str() %s", m_string.c_str());
while(i < len)
{
if (IsZhCh(m_string.at(i)))
{
dump.push_back(m_string.substr(i, zhChLen));
i = i + zhChLen;
}
else
{
dump.push_back(m_string.substr(i, 1));
i = i + 1;
}
}
trace_printf("NULL");
if (end < 0 || end > (int)dump.size())
{
end = dump.size();
}
if(start < 0 || start > end)
{ trace_printf("NULL");
printf("start is wrong");
return "";
}
trace_printf("start, end, dump.size() %d %d %d", start, end, dump.size());
for(int i=start; i<end; i++)
{
tmp += dump[i];
}
trace_printf("tmp.c_str() %s", tmp.c_str());
return tmp;
}
std::vector<std::string> &String::GetWords()
{ trace_worker();
if (m_words.size() > 0)
{
return m_words;
}
int wordStartPos = -1;
int wordEndPos = -1;
trace_printf("m_string.c_str() %s", m_string.c_str());
for (unsigned int i=0; i<m_string.size(); ++i)
{
if (wordStartPos == -1 && m_string[i] != ' ')
{
wordStartPos = i;
wordEndPos = i;
}
if (wordStartPos != -1)
{
if (m_string[i] != ' ')
{
++wordEndPos;
}
else
{
m_words.push_back(m_string.substr(wordStartPos, wordEndPos - wordStartPos));
wordStartPos = wordEndPos = -1;
}
}
}
if (wordStartPos != -1 && wordStartPos < wordEndPos)
{
m_words.push_back(m_string.substr(wordStartPos, wordEndPos - wordStartPos));
}
for (unsigned int i=0; i<m_words.size(); ++i)
{
trace_printf("m_words[i].c_str() |%s|", m_words[i].c_str());
}
return m_words;
}
void String::CleanRN()
{
int strLen = m_string.size() + 1;
char *strBuffer = (char *)malloc(strLen);
int dstStrLen = 0;
for (int i=0; i<strLen; ++i)
{
if (m_string[i] != '\r' && m_string[i] != '\n')
{
strBuffer[dstStrLen++] = m_string[i];
}
}
strBuffer[dstStrLen] = '\0';
m_string = strBuffer;
free(strBuffer);
m_words.clear();
}
C++
1
https://gitee.com/unitwork/iwechen-thrift.git
git@gitee.com:unitwork/iwechen-thrift.git
unitwork
iwechen-thrift
iwechen-thrift
master

搜索帮助