Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
nghttp2
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Libraries
nghttp2
Commits
14f5c463
Commit
14f5c463
authored
May 19, 2012
by
Tatsuhiro Tsujikawa
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added missing examples/HtmlParser.{cc,h}
parent
a1085610
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
247 additions
and
0 deletions
+247
-0
examples/HtmlParser.cc
examples/HtmlParser.cc
+166
-0
examples/HtmlParser.h
examples/HtmlParser.h
+81
-0
No files found.
examples/HtmlParser.cc
0 → 100644
View file @
14f5c463
/*
* Spdylay - SPDY Library
*
* Copyright (c) 2012 Tatsuhiro Tsujikawa
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "HtmlParser.h"
#include "util.h"
#include "uri.h"
namespace
spdylay
{
ParserData
::
ParserData
(
const
std
::
string
&
base_uri
)
:
base_uri
(
base_uri
)
{}
HtmlParser
::
HtmlParser
(
const
std
::
string
&
base_uri
)
:
base_uri_
(
base_uri
),
parser_ctx_
(
0
),
parser_data_
(
base_uri
)
{}
HtmlParser
::~
HtmlParser
()
{
htmlFreeParserCtxt
(
parser_ctx_
);
}
namespace
{
const
char
*
get_attr
(
const
xmlChar
**
attrs
,
const
char
*
name
)
{
for
(;
*
attrs
;
attrs
+=
2
)
{
if
(
util
::
strieq
(
reinterpret_cast
<
const
char
*>
(
attrs
[
0
]),
name
))
{
return
reinterpret_cast
<
const
char
*>
(
attrs
[
1
]);
}
}
return
0
;
}
}
// namespace
namespace
{
void
start_element_func
(
void
*
user_data
,
const
xmlChar
*
name
,
const
xmlChar
**
attrs
)
{
ParserData
*
parser_data
=
reinterpret_cast
<
ParserData
*>
(
user_data
);
if
(
util
::
strieq
(
reinterpret_cast
<
const
char
*>
(
name
),
"link"
))
{
const
char
*
rel_attr
=
get_attr
(
attrs
,
"rel"
);
const
char
*
href_attr
=
get_attr
(
attrs
,
"href"
);
if
((
util
::
strieq
(
rel_attr
,
"shortcut icon"
)
||
util
::
strieq
(
rel_attr
,
"stylesheet"
))
&&
href_attr
)
{
std
::
string
uri
=
uri
::
joinUri
(
parser_data
->
base_uri
,
href_attr
);
parser_data
->
links
.
push_back
(
uri
);
}
}
else
if
(
util
::
strieq
(
reinterpret_cast
<
const
char
*>
(
name
),
"img"
))
{
const
char
*
src_attr
=
get_attr
(
attrs
,
"src"
);
if
(
src_attr
)
{
std
::
string
uri
=
uri
::
joinUri
(
parser_data
->
base_uri
,
src_attr
);
parser_data
->
links
.
push_back
(
uri
);
}
}
}
}
// namespace
namespace
{
xmlSAXHandler
saxHandler
=
{
0
,
// internalSubsetSAXFunc
0
,
// isStandaloneSAXFunc
0
,
// hasInternalSubsetSAXFunc
0
,
// hasExternalSubsetSAXFunc
0
,
// resolveEntitySAXFunc
0
,
// getEntitySAXFunc
0
,
// entityDeclSAXFunc
0
,
// notationDeclSAXFunc
0
,
// attributeDeclSAXFunc
0
,
// elementDeclSAXFunc
0
,
// unparsedEntityDeclSAXFunc
0
,
// setDocumentLocatorSAXFunc
0
,
// startDocumentSAXFunc
0
,
// endDocumentSAXFunc
&
start_element_func
,
// startElementSAXFunc
0
,
// endElementSAXFunc
0
,
// referenceSAXFunc
0
,
// charactersSAXFunc
0
,
// ignorableWhitespaceSAXFunc
0
,
// processingInstructionSAXFunc
0
,
// commentSAXFunc
0
,
// warningSAXFunc
0
,
// errorSAXFunc
0
,
// fatalErrorSAXFunc
0
,
// getParameterEntitySAXFunc
0
,
// cdataBlockSAXFunc
0
,
// externalSubsetSAXFunc
0
,
// unsigned int initialized
0
,
// void * _private
0
,
// startElementNsSAX2Func
0
,
// endElementNsSAX2Func
0
,
// xmlStructuredErrorFunc
};
}
// namespace
int
HtmlParser
::
parse_chunk
(
const
char
*
chunk
,
size_t
size
,
int
fin
)
{
if
(
!
parser_ctx_
)
{
parser_ctx_
=
htmlCreatePushParserCtxt
(
&
saxHandler
,
&
parser_data_
,
chunk
,
size
,
base_uri_
.
c_str
(),
XML_CHAR_ENCODING_NONE
);
if
(
!
parser_ctx_
)
{
return
-
1
;
}
else
{
if
(
fin
)
{
return
parse_chunk_internal
(
0
,
0
,
fin
);
}
else
{
return
0
;
}
}
}
else
{
return
parse_chunk_internal
(
chunk
,
size
,
fin
);
}
}
int
HtmlParser
::
parse_chunk_internal
(
const
char
*
chunk
,
size_t
size
,
int
fin
)
{
int
rv
=
htmlParseChunk
(
parser_ctx_
,
chunk
,
size
,
fin
);
if
(
rv
==
0
)
{
return
0
;
}
else
{
return
-
1
;
}
}
const
std
::
vector
<
std
::
string
>&
HtmlParser
::
get_links
()
const
{
return
parser_data_
.
links
;
}
void
HtmlParser
::
clear_links
()
{
parser_data_
.
links
.
clear
();
}
}
// namespace spdylay
examples/HtmlParser.h
0 → 100644
View file @
14f5c463
/*
* Spdylay - SPDY Library
*
* Copyright (c) 2012 Tatsuhiro Tsujikawa
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef HTML_PARSER_H
#define HTML_PARSER_H
#include <config.h>
#include <vector>
#include <string>
#ifdef HAVE_LIBXML2
#include <libxml/HTMLparser.h>
namespace
spdylay
{
struct
ParserData
{
std
::
string
base_uri
;
std
::
vector
<
std
::
string
>
links
;
ParserData
(
const
std
::
string
&
base_uri
);
};
class
HtmlParser
{
public:
HtmlParser
(
const
std
::
string
&
base_uri
);
~
HtmlParser
();
int
parse_chunk
(
const
char
*
chunk
,
size_t
size
,
int
fin
);
const
std
::
vector
<
std
::
string
>&
get_links
()
const
;
void
clear_links
();
private:
int
parse_chunk_internal
(
const
char
*
chunk
,
size_t
size
,
int
fin
);
std
::
string
base_uri_
;
htmlParserCtxtPtr
parser_ctx_
;
ParserData
parser_data_
;
};
}
// namespace spdylay
#else // !HAVE_LIBXML2
namespace
spdylay
{
class
HtmlParser
{
public:
HtmlParser
(
const
std
::
string
&
base_uri
)
{}
~
HtmlParser
()
{}
int
parse_chunk
(
const
char
*
chunk
,
size_t
size
,
int
fin
)
{
return
0
;
}
const
std
::
vector
<
std
::
string
>&
get_links
()
const
{
return
links_
;
}
void
clear_links
()
{}
private:
std
::
vector
<
std
::
string
>
links_
;
};
}
// namespace spdylay
#endif // !HAVE_LIBXML2
#endif // HTML_PARSER_H
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment