1-
21//
32// wiki-registry.c
43//
@@ -96,31 +95,37 @@ wiki_registry_parse(const char *html) {
9695
9796 GumboNode * body = gumbo_get_element_by_id ("wiki-body" , output -> root );
9897 if (body ) {
99- // grab all category `<h2 />`s
100- list_t * h2s = gumbo_get_elements_by_tag_name ("h2" , body );
101- list_node_t * heading_node ;
102- list_iterator_t * heading_iterator = list_iterator_new (h2s , LIST_HEAD );
103- while ((heading_node = list_iterator_next (heading_iterator ))) {
104- GumboNode * heading = (GumboNode * ) heading_node -> val ;
105- char * category = gumbo_text_content (heading );
106- // die if we failed to parse a category, as it's
107- // almost certinaly a malloc error
108- if (!category ) break ;
109- trim (case_lower (category ));
110- GumboVector * siblings = & heading -> parent -> v .element .children ;
111- size_t pos = heading -> index_within_parent ;
112-
113- // skip elements until the UL
114- // TODO: don't hardcode position here
115- // 2:
116- // 1 - whitespace
117- // 2 - actual node
118- GumboNode * ul = siblings -> data [pos + 2 ];
119- if (GUMBO_TAG_UL != ul -> v .element .tag ) {
120- free (category );
98+ GumboNode * markdown_body = ((GumboNode * )((GumboVector )body -> v .element .children ).data [1 ]);
99+ GumboVector children = (GumboVector )markdown_body -> v .element .children ;
100+
101+ size_t count = children .length - 1 ;
102+
103+ for (size_t index = 0 ; index < count ; index ++ ) {
104+ GumboNode * heading = (GumboNode * )children .data [index ];
105+ GumboNode * ul = NULL ;
106+
107+ if (heading -> v .element .tag != GUMBO_TAG_DIV ) {
108+ continue ;
109+ }
110+
111+ GumboAttribute * node_id = gumbo_get_attribute (& heading -> v .element .attributes , "class" );
112+ if (node_id == NULL || strncmp (node_id -> value , "markdown-heading" , 16 ) != 0 ) {
121113 continue ;
122114 }
123115
116+ for (; index < count ; index ++ ) {
117+ ul = (GumboNode * )children .data [index ];
118+
119+ if (ul -> v .element .tag == GUMBO_TAG_UL ) {
120+ break ;
121+ }
122+ }
123+
124+ list_t * h2 = gumbo_get_elements_by_tag_name ("h2" , heading );
125+ char * category = gumbo_text_content (h2 -> head -> val );
126+ if (!category ) break ;
127+ trim (case_lower (category ));
128+
124129 list_t * lis = gumbo_get_elements_by_tag_name ("li" , ul );
125130 list_iterator_t * li_iterator = list_iterator_new (lis , LIST_HEAD );
126131 list_node_t * li_node ;
@@ -138,8 +143,6 @@ wiki_registry_parse(const char *html) {
138143 list_destroy (lis );
139144 free (category );
140145 }
141- list_iterator_destroy (heading_iterator );
142- list_destroy (h2s );
143146 }
144147
145148 gumbo_destroy_output (& kGumboDefaultOptions , output );
0 commit comments