tests: new test case to check for stale links in the source code and docs
This commit is contained in:
@@ -286,6 +286,150 @@ class NS3UnusedSourcesTestCase(unittest.TestCase):
|
||||
|
||||
self.assertListEqual([], list(unused_sources))
|
||||
|
||||
def test_04_CheckForDeadLinksInSources(self):
|
||||
"""!
|
||||
Test if all urls in source files are alive
|
||||
@return None
|
||||
"""
|
||||
|
||||
# Skip this test if Django is not available
|
||||
try:
|
||||
import django
|
||||
except ImportError:
|
||||
self.skipTest("Django URL validators are not available")
|
||||
|
||||
# Skip this test if requests library is not available
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
self.skipTest("Requests library is not available")
|
||||
|
||||
regex = re.compile(r'((http|https)://[^\ \n\)\"\'\}\>\<\]\;\`\\]*)')
|
||||
skipped_files = []
|
||||
|
||||
whitelisted_urls = {"https://gitlab.com/your-user-name/ns-3-dev",
|
||||
"https://www.nsnam.org/release/ns-allinone-3.31.rc1.tar.bz2",
|
||||
"https://www.nsnam.org/release/ns-allinone-3.X.rcX.tar.bz2",
|
||||
"https://www.nsnam.org/releases/ns-3-x",
|
||||
"https://www.nsnam.org/releases/ns-allinone-3.(x-1",
|
||||
"https://www.nsnam.org/releases/ns-allinone-3.x.tar.bz2",
|
||||
# split due to command-line formatting
|
||||
"https://cmake.org/cmake/help/latest/manual/cmake-",
|
||||
"http://www.ieeeghn.org/wiki/index.php/First-Hand:Digital_Television:_The_",
|
||||
# Dia placeholder xmlns address
|
||||
"http://www.lysator.liu.se/~alla/dia/",
|
||||
# Fails due to bad regex
|
||||
"http://www.ieeeghn.org/wiki/index.php/First-Hand:Digital_Television:_The_Digital_Terrestrial_Television_Broadcasting_(DTTB",
|
||||
"http://en.wikipedia.org/wiki/Namespace_(computer_science",
|
||||
"http://en.wikipedia.org/wiki/Bonobo_(component_model",
|
||||
"http://msdn.microsoft.com/en-us/library/aa365247(v=vs.85",
|
||||
# historical links
|
||||
"http://www.research.att.com/info/kpv/",
|
||||
"http://www.research.att.com/~gsf/",
|
||||
}
|
||||
|
||||
# Scan for all URLs in all files we can parse
|
||||
files_and_urls = set()
|
||||
unique_urls = set()
|
||||
for topdir in ["bindings", "doc", "examples", "src", "utils"]:
|
||||
for root, dirs, files in os.walk(topdir):
|
||||
# do not parse files in build directories
|
||||
if "build" in root or "_static" in root or "source-temp" in root or 'html' in root:
|
||||
continue
|
||||
for file in files:
|
||||
# skip svg files
|
||||
if file.endswith(".svg"):
|
||||
continue
|
||||
filepath = os.path.join(root, file)
|
||||
|
||||
try:
|
||||
with open(filepath, "r") as f:
|
||||
matches = regex.findall(f.read())
|
||||
|
||||
# Get first group for each match (containing the URL)
|
||||
# and strip final punctuation or commas in matched links
|
||||
# commonly found in the docs
|
||||
urls = list(map(lambda x: x[0][:-1] if x[0][-1] in ".," else x[0], matches))
|
||||
except UnicodeDecodeError:
|
||||
skipped_files.append(filepath)
|
||||
continue
|
||||
|
||||
# Search for new unique URLs and add keep track of their associated source file
|
||||
for url in set(urls)-unique_urls-whitelisted_urls:
|
||||
unique_urls.add(url)
|
||||
files_and_urls.add((filepath, url))
|
||||
|
||||
# Instantiate the Django URL validator
|
||||
from django.core.validators import URLValidator
|
||||
from django.core.exceptions import ValidationError
|
||||
validate_url = URLValidator()
|
||||
|
||||
# User agent string to make ACM and Elsevier let us check if links to papers are working
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
|
||||
|
||||
def test_file_url(args):
|
||||
filepath, url = args
|
||||
dead_link_msg = None
|
||||
|
||||
# Skip invalid URLs
|
||||
try:
|
||||
validate_url(url)
|
||||
except ValidationError:
|
||||
dead_link_msg = "%s: URL %s, invalid URL" % (filepath, url)
|
||||
|
||||
# Check if valid URLs are alive
|
||||
if dead_link_msg is None:
|
||||
try:
|
||||
tries = 3
|
||||
while tries > 0:
|
||||
# Not verifying the certificate (verify=False) is potentially dangerous
|
||||
# HEAD checks are not as reliable as GET ones,
|
||||
# in some cases they may return bogus error codes and reasons
|
||||
response = requests.get(url, verify=False, headers=headers)
|
||||
|
||||
# In case of success and redirection
|
||||
if response.status_code in [200, 301]:
|
||||
dead_link_msg = None
|
||||
break
|
||||
|
||||
# People use the wrong code, but the reason
|
||||
# can still be correct
|
||||
if response.status_code in [302, 308, 500, 503]:
|
||||
if response.reason.lower() in ['found',
|
||||
'moved temporarily',
|
||||
'permanent redirect',
|
||||
'ok',
|
||||
'service temporarily unavailable'
|
||||
]:
|
||||
dead_link_msg = None
|
||||
break
|
||||
# In case it didn't pass in any of the previous tests,
|
||||
# set dead_link_msg with the most recent error and try again
|
||||
dead_link_msg = "%s: URL %s: returned code %d" % (filepath, url, response.status_code)
|
||||
tries -= 1
|
||||
except requests.exceptions.InvalidURL:
|
||||
dead_link_msg = "%s: URL %s: invalid URL" % (filepath, url)
|
||||
except requests.exceptions.SSLError:
|
||||
dead_link_msg = "%s: URL %s: SSL error" % (filepath, url)
|
||||
except requests.exceptions.TooManyRedirects:
|
||||
dead_link_msg = "%s: URL %s: too many redirects" % (filepath, url)
|
||||
except Exception as e:
|
||||
try:
|
||||
error_msg = e.args[0].reason.__str__()
|
||||
except AttributeError:
|
||||
error_msg = e.args[0]
|
||||
dead_link_msg = "%s: URL %s: failed with exception: %s" % (filepath, url, error_msg)
|
||||
return dead_link_msg
|
||||
|
||||
# Dispatch threads to test multiple URLs concurrently
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
with ThreadPoolExecutor(max_workers=20) as executor:
|
||||
dead_links = list(executor.map(test_file_url, list(files_and_urls)))
|
||||
|
||||
# Filter out None entries
|
||||
dead_links = list(sorted(filter(lambda x: x is not None, dead_links)))
|
||||
self.assertEqual(len(dead_links), 0, msg="\n".join(["Dead links found:", *dead_links]))
|
||||
|
||||
|
||||
class NS3CommonSettingsTestCase(unittest.TestCase):
|
||||
"""!
|
||||
|
||||
Reference in New Issue
Block a user