- So we just need to skip past the "NUMBER; URL=" garbage
- to get to the URL. */
- {
- char *name = find_attr (tag, "name", NULL);
- char *http_equiv = find_attr (tag, "http-equiv", NULL);
- if (http_equiv && !strcasecmp (http_equiv, "refresh"))
- {
- struct urlpos *entry;
-
- int id;
- char *p, *refresh = find_attr (tag, "content", &id);
- int timeout = 0;
-
- for (p = refresh; ISDIGIT (*p); p++)
- timeout = 10 * timeout + *p - '0';
- if (*p++ != ';')
- return;
-
- while (ISSPACE (*p))
- ++p;
- if (!(TOUPPER (*p) == 'U'
- && TOUPPER (*(p + 1)) == 'R'
- && TOUPPER (*(p + 2)) == 'L'
- && *(p + 3) == '='))
- return;
- p += 4;
- while (ISSPACE (*p))
- ++p;
-
- entry = handle_link (closure, p, tag, id);
- if (entry)
- {
- entry->link_refresh_p = 1;
- entry->refresh_timeout = timeout;
- }
- }
- else if (name && !strcasecmp (name, "robots"))
- {
- /* Handle stuff like:
- <meta name="robots" content="index,nofollow"> */
- char *content = find_attr (tag, "content", NULL);
- if (!content)
- return;
- if (!strcasecmp (content, "none"))
- closure->nofollow = 1;
- else
- {
- while (*content)
- {
- /* Find the next occurrence of ',' or the end of
- the string. */
- char *end = strchr (content, ',');
- if (end)
- ++end;
- else
- end = content + strlen (content);
- if (!strncasecmp (content, "nofollow", end - content))
- closure->nofollow = 1;
- content = end;
- }
- }
- }
- }
- break;
- default:
- /* Category is TC_SPEC, but tag name is unhandled. This
- must not be. */
- abort ();
+ /* All <link href="..."> link references are external,
+ except for <link rel="stylesheet" href="...">. */
+ if (href)
+ {
+ char *rel = find_attr (tag, "rel", NULL);
+ int inlinep = (rel && 0 == strcasecmp (rel, "stylesheet"));
+ append_one_url (href, inlinep, tag, attrind, ctx);
+ }
+}
+
+/* Some pages use a META tag to specify that the page be refreshed by
+ a new page after a given number of seconds. The general format for
+ this is:
+
+ <meta http-equiv=Refresh content="NUMBER; URL=index2.html">
+
+ So we just need to skip past the "NUMBER; URL=" garbage to get to
+ the URL. */
+
+static void
+tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
+{
+ char *name = find_attr (tag, "name", NULL);
+ char *http_equiv = find_attr (tag, "http-equiv", NULL);
+
+ if (http_equiv && 0 == strcasecmp (http_equiv, "refresh"))
+ {
+ struct urlpos *entry;
+
+ int attrind;
+ char *p, *refresh = find_attr (tag, "content", &attrind);
+ int timeout = 0;
+
+ for (p = refresh; ISDIGIT (*p); p++)
+ timeout = 10 * timeout + *p - '0';
+ if (*p++ != ';')
+ return;
+
+ while (ISSPACE (*p))
+ ++p;
+ if (!( TOUPPER (*p) == 'U'
+ && TOUPPER (*(p + 1)) == 'R'
+ && TOUPPER (*(p + 2)) == 'L'
+ && *(p + 3) == '='))
+ return;
+ p += 4;
+ while (ISSPACE (*p))
+ ++p;
+
+ entry = append_one_url (p, 0, tag, attrind, ctx);
+ if (entry)
+ {
+ entry->link_refresh_p = 1;
+ entry->refresh_timeout = timeout;
+ }
+ }
+ else if (name && 0 == strcasecmp (name, "robots"))
+ {
+ /* Handle stuff like:
+ <meta name="robots" content="index,nofollow"> */
+ char *content = find_attr (tag, "content", NULL);
+ if (!content)
+ return;
+ if (!strcasecmp (content, "none"))
+ ctx->nofollow = 1;
+ else
+ {
+ while (*content)
+ {
+ /* Find the next occurrence of ',' or the end of
+ the string. */
+ char *end = strchr (content, ',');
+ if (end)
+ ++end;
+ else
+ end = content + strlen (content);
+ if (!strncasecmp (content, "nofollow", end - content))
+ ctx->nofollow = 1;
+ content = end;
+ }