import datetime import warnings from dataclasses import dataclass from functools import wraps from django.contrib.sites.shortcuts import get_current_site from django.core.paginator import EmptyPage, PageNotAnInteger from django.http import Http404 from django.template.response import TemplateResponse from django.urls import reverse from django.utils import timezone from django.utils.deprecation import RemovedInDjango50Warning from django.utils.http import http_date @dataclass class SitemapIndexItem: location: str last_mod: bool = None # RemovedInDjango50Warning def __str__(self): msg = ( "Calling `__str__` on SitemapIndexItem is deprecated, use the `location` " "attribute instead." ) warnings.warn(msg, RemovedInDjango50Warning, stacklevel=2) return self.location def x_robots_tag(func): @wraps(func) def inner(request, *args, **kwargs): response = func(request, *args, **kwargs) response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive" return response return inner def _get_latest_lastmod(current_lastmod, new_lastmod): """ Returns the latest `lastmod` where `lastmod` can be either a date or a datetime. """ if not isinstance(new_lastmod, datetime.datetime): new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min) if timezone.is_naive(new_lastmod): new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc) return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod) @x_robots_tag def index( request, sitemaps, template_name="sitemap_index.xml", content_type="application/xml", sitemap_url_name="django.contrib.sitemaps.views.sitemap", ): req_protocol = request.scheme req_site = get_current_site(request) sites = [] # all sections' sitemap URLs all_indexes_lastmod = True latest_lastmod = None for section, site in sitemaps.items(): # For each section label, add links of all pages of its sitemap # (usually generated by the `sitemap` view). if callable(site): site = site() protocol = req_protocol if site.protocol is None else site.protocol sitemap_url = reverse(sitemap_url_name, kwargs={"section": section}) absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url) site_lastmod = site.get_latest_lastmod() if all_indexes_lastmod: if site_lastmod is not None: latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod) else: all_indexes_lastmod = False sites.append(SitemapIndexItem(absolute_url, site_lastmod)) # Add links to all pages of the sitemap. for page in range(2, site.paginator.num_pages + 1): sites.append( SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod) ) # If lastmod is defined for all sites, set header so as # ConditionalGetMiddleware is able to send 304 NOT MODIFIED if all_indexes_lastmod and latest_lastmod: headers = {"Last-Modified": http_date(latest_lastmod.timestamp())} else: headers = None return TemplateResponse( request, template_name, {"sitemaps": sites}, content_type=content_type, headers=headers, ) @x_robots_tag def sitemap( request, sitemaps, section=None, template_name="sitemap.xml", content_type="application/xml", ): req_protocol = request.scheme req_site = get_current_site(request) if section is not None: if section not in sitemaps: raise Http404("No sitemap available for section: %r" % section) maps = [sitemaps[section]] else: maps = sitemaps.values() page = request.GET.get("p", 1) lastmod = None all_sites_lastmod = True urls = [] for site in maps: try: if callable(site): site = site() urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol)) if all_sites_lastmod: site_lastmod = getattr(site, "latest_lastmod", None) if site_lastmod is not None: lastmod = _get_latest_lastmod(lastmod, site_lastmod) else: all_sites_lastmod = False except EmptyPage: raise Http404("Page %s empty" % page) except PageNotAnInteger: raise Http404("No page '%s'" % page) # If lastmod is defined for all sites, set header so as # ConditionalGetMiddleware is able to send 304 NOT MODIFIED if all_sites_lastmod: headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None else: headers = None return TemplateResponse( request, template_name, {"urlset": urls}, content_type=content_type, headers=headers, )