Use this file to discover all available pages before exploring further.
The /sources endpoint lets you discover what news sources News API indexes
and verify whether specific domains are covered. For large domain lists, you can
check coverage in bulk and identify uncovered sources programmatically.
Search for sources by name, language, country, or domain. At least one
filter parameter is required. source_name performs a partial match —
it returns any source whose name contains the search term. This example
finds sources with “sport” in their name:
When you have thousands of domains to check, use the Python SDK’s async client
to batch requests concurrently and identify which sources are not covered by
News API.Set the NEWSCATCHER_API_KEY environment variable before running the script:
export NEWSCATCHER_API_KEY="YOUR_API_KEY"
Show async coverage check script
import asyncioimport csvimport loggingimport osfrom typing import Dict, List, Optionalfrom newscatcher import AsyncNewscatcherApiINPUT_FILE = "source_urls.csv"OUTPUT_FILE = "uncovered_sources.csv"BATCH_SIZE = 1000MAX_CONCURRENT = 5logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s",)client = AsyncNewscatcherApi(api_key=os.environ["NEWSCATCHER_API_KEY"])async def check_batch( semaphore: asyncio.Semaphore, batch: List[str],) -> Optional[object]: async with semaphore: try: return await client.sources.post( source_url=batch, include_additional_info=True, ) except Exception as e: logging.error(f"Batch failed: {e}") return Noneasync def check_coverage(source_urls: List[str]) -> Dict[str, bool]: coverage: Dict[str, bool] = {} semaphore = asyncio.Semaphore(MAX_CONCURRENT) batches = [ source_urls[i : i + BATCH_SIZE] for i in range(0, len(source_urls), BATCH_SIZE) ] results = await asyncio.gather( *[check_batch(semaphore, batch) for batch in batches], return_exceptions=True, ) for i, result in enumerate(results): if isinstance(result, Exception) or result is None: logging.error(f"Skipping failed batch {i + 1}") continue for source in result.sources or []: coverage[source.domain_url] = True for url in batches[i]: if url not in coverage: coverage[url] = False return coveragedef read_sources(file_path: str) -> List[str]: with open(file_path) as f: reader = csv.reader(f) sources = [row[0] for row in reader if row] logging.info(f"Loaded {len(sources)} sources from {file_path}") return sourcesdef write_uncovered(uncovered: List[str], file_path: str) -> None: with open(file_path, "w", newline="") as f: writer = csv.writer(f) writer.writerow(["Uncovered Source URL"]) for url in uncovered: writer.writerow([url]) logging.info(f"Wrote {len(uncovered)} uncovered sources to {file_path}")def main() -> None: source_urls = read_sources(INPUT_FILE) coverage = asyncio.run(check_coverage(source_urls)) uncovered = [url for url, covered in coverage.items() if not covered] write_uncovered(uncovered, OUTPUT_FILE) logging.info( f"Total: {len(source_urls)} | " f"Covered: {len(source_urls) - len(uncovered)} | " f"Uncovered: {len(uncovered)}" )if __name__ == "__main__": main()
The script does the following:
Reads domain URLs from source_urls.csv (one URL per row, first column).
Splits them into batches (BATCH_SIZE) and sends requests concurrently up
to the MAX_CONCURRENT limit.
Marks each domain as covered or uncovered based on the API response.
Writes uncovered domains to uncovered_sources.csv.
You can send uncovered_sources.csv to support — News API can manually add
sources that are missing from the index.