Note
Access to this page requires authorization. You can try signing in or changing directories.
Access to this page requires authorization. You can try changing directories.
One tough issue, especially require the better performance and memory usage.
However, I done it :)
1. It builds a table strlen(s1)*strlen(s2)
2. Compare the s1 and s2 and fill the equaled bit in the table
3. Find out the longest connected lines in that table
4. Re-build the string with the line's offset
Not sure if it is the best algorithm to do that :)
However it only spend 47ms to compare and find out the duplicate string ("pro") of "programming" and "professional" in 10000 times.
"abbazyabcyzabcegz" and ""xabcyzabcdegabbazyabh" for 10,000 times
Milliseconds: 109
Cool?
//////
StrNode* GetDupStr(__notnull LPCWSTR lpszP1, __notnull LPCWSTR lpszP2)
{
// 0. valid parameters, if SAL does not work
if (lpszP1==NULL || lpszP2==NULL || wcslen(lpszP1)==0 || wcslen(lpszP2)==0)
return NULL;
//
// 1. build the table
//
// calculate the size
int nLen1 = (int)wcslen(lpszP1);
int nLen2 = (int)wcslen(lpszP2);
int nMapLen = sizeof(bool)*nLen1*nLen2;
// allocate the memory @allocate abMap
bool* abMap;
abMap = (bool*)malloc(nMapLen);
//ZeroMemory
memset(abMap, 0x0, nMapLen);
//
// 2. compare and fill the table
//
for(int i=0; i<nLen1; i++)
//rows
{
WCHAR cRow = lpszP1[i];
//check if there is duplicated char in previous chars, if so, memcpy directly
for(int k=0; k<i; k++)
{
if (lpszP1[k]==cRow)
{
memcpy(&abMap[i*nLen2], &abMap[k*nLen2], nLen2);
DEBUG("Dup: %c\n", cRow);
}
}
//loop cols
for(int j=0; j<nLen2; j++)
//cols
{
//check equality
if (cRow==lpszP2[j])
{
//MarkEqual(abMap, CalPos(nLen2, i, j));
MarkBit(abMap, CalcBitPos(nLen2, i, j));
}
}
}
//
// 3. print out the table
//
#ifdef _DUPSTR_DEBUG_
PrintTable(abMap, lpszP1, lpszP2);
DEBUG("\n", NULL);
#endif
//
// 4. search table, find out the connected points
//
// get minimun strlen of two strings
int nMinLen = nLen1 < nLen2 ? nLen1 : nLen2;
// build maxNode var @allocate pMaxNode
BNode *pMaxNode = NULL;
//
// loop the cols and rows from longest to shortest, and compare the maxlength with current nLen
// jump out if maxLen > nLen
//
//loop for cols
for(int i=0; i<nLen2; i++)
{
//current len
int nLen = nMinLen < nLen2-i ? nMinLen : nLen2-i;
if ( pMaxNode==NULL || pMaxNode->len<nLen)
{
//get pNodes
BNode *pNode = CheckPoints(&abMap[CalcBitPos(nLen2, 0, i)], nLen, nLen2+1);
//rebuild
RebuildMaxNodes(&pMaxNode, &pNode);
}
}
//then loop for rows
for(int i=1; i<nLen1; i++)
{
int nLen = nMinLen < nLen1-i ? nMinLen : nLen1-i;
if ( pMaxNode==NULL || pMaxNode->len<nLen)
{
BNode *pNode = CheckPoints(&abMap[CalcBitPos(nLen2, i, 0)], nLen, nLen2+1);
/* @refactor RebuildMaxNodes
if (pNode!=NULL)
{
if (pMaxNode==NULL)
{
DEBUG("leave null\n", NULL);
pMaxNode = pNode;
}
else if (pMaxNode->len < pNode->len)
{
DEBUG("got new length: %d\n", pNode->len);
//free previous pMaxNode
ReleaseNode(pMaxNode);
pMaxNode = pNode;
}
else if (pMaxNode->len == pNode->len)
{
DEBUG("get duplicated\n", NULL);
pMaxNode->next = pNode;
}
else
{
ReleaseNode(pNode);
}
}
*/
RebuildMaxNodes(&pMaxNode, &pNode);
}
}
//
// 5. print out pMaxNodes
//
StrNode* pstrNode = NULL;
//
BNode* pn = pMaxNode;
//foreach pMaxNode
while(pn!=NULL)
{
//calculate the string offset
int nStrOffset = (int)((pn->pbStart - abMap) % nLen2);
//allocate new string @allocate lpszOutput
LPWSTR lpszOutput = new WCHAR[pn->len+1];
memset(lpszOutput, 0x0, (pn->len+1)*sizeof(WCHAR));
memcpy(lpszOutput, lpszP2+nStrOffset, pn->len * sizeof(WCHAR));
DEBUG("%x => ", pn->pbStart);
DEBUG("%d => ", pn->len);
DEBUG("%s\n", lpszOutput);
//DEBUG("%x => %d => %s\n", pn->pbStart, pn->len, lpszOutput);
//attach string to return structure
if (pstrNode==NULL)
{
//build the node
pstrNode = BuildStrNode(lpszP2+nStrOffset, pn->len, lpszOutput);
HEAPLEAK(pstrNode->magic, 0xCACF0169);
}
else
{
//build the node
/* @refactor BuildStrNode
StrNode* psn = new StrNode;
memset(psn, 0x0, sizeof(StrNode));
psn->lpSrc = lpszP2+nStrOffset;
psn->len = pn->len;
psn->str = lpszOutput;
psn->next = NULL;
pstrNode->next = psn;
*/
StrNode* psn = pstrNode;
while(psn->next!=NULL)
{
psn = psn->next;
}
psn->next = BuildStrNode(lpszP2+nStrOffset, pn->len, lpszOutput);
HEAPLEAK(psn->next->magic, 0xCACF0189);
}
pn = pn->next;
}
//@release pMaxNode
ReleaseBNode(pMaxNode);
//@release abMap
delete abMap;
return pstrNode;
}
Attached full code files.