From my previous post, I tried to take into account all of the nice answers made by Kate, Chris, J_H, Booboo and mudskipper
Changes made
In short, I attempted to apply all of the following:
- API key handling
- Better URL construction
- Input cleaning improvements
- Category detection rewrite
- More functions
- Article formatting cleanup
- Added error handling
- Replaced hard-coded strings with constants
- PEP8 & Style corrections
- Added a
__main__entry point - Simplified logic flow
API key changes
I noticed not long ago that NewsAPI prefers to keep all API keys for private use only. You may get yours here. I am in no way promoting them.
Code
Here is the updated Python code:
"""
This script fetches the most recent news
headline from the NewsAPI service based
on a natural-language query given by the
user (e.g., “latest technology news”,
“what’s happening in sports today”, etc.).
"""
import os
import re
import requests
import json5
# Constants / configuration
API_URL = "https://newsapi.org/v2/top-headlines"
CATEGORIES = ("business", "entertainment", "general", "health", "science", "sports", "technology")
STOP_WORDS = [
"news", "new", "for", "concerning", "can", "you", "give", "me",
"the", "most", "recent", "in", "category", "currently", "current",
"now", "what", "whats", "is", "about", "today"
]
def clean_query(text: str) -> str:
"""
Clean the input text by removing stop words, then detect if
any category is mentioned. Return query parameter as string.
"""
text = text.lower()
# remove stop words with word boundaries
pattern = re.compile(r"\b(" + "|".join(re.escape(w) for w in STOP_WORDS) + r")\b", flags=re.IGNORECASE)
text = pattern.sub("", text).strip()
# detect categories
for cat in CATEGORIES:
if re.search(rf"\b{re.escape(cat)}\b", text, flags=re.IGNORECASE):
return {"category": cat}
# if nothing remains, default to general
if not text:
return {"category": "general"}
# else treat as search query
return {"q": text}
def fetch_top_headline(params: dict, api_key: str) -> dict:
"""
Fetches the top headline from NewsAPI with the given params.
Raises on HTTP error.
"""
params = params.copy()
params.update({
"sortBy": "publishedAt",
"pageSize": 1,
})
# Prefer putting API key in header for security
headers = {"X-Api-Key": api_key}
response = requests.get(API_URL, params=params, headers=headers)
response.raise_for_status()
return response.json()
def format_article(article: dict) -> str:
"""
Given a single article dict, extract its fields and return a nice string.
"""
source_name = article.get("source", {}).get("name", "")
author = article.get("author")
title = article.get("title", "")
desc = article.get("description") or ""
# clean up title
title = title.replace(source_name, "").replace("\n", " ")
title = title.replace(" - ", " ").replace("LISTEN | ", "")
# clean up description (if HTML-like)
if "<p>" in desc:
desc = desc.split("<p>", 1)[1]
# here you might want to use BeautifulSoup instead of manual replace
desc = desc.replace(""", '"')
# author formatting
if author and author.lower() != source_name.lower():
# clean commas
cleaned = author.replace(",", " and ")
author_part = f" by {cleaned}"
else:
author_part = ""
# source formatting
source_part = f" on {source_name}" if source_name else ""
return f"Here is the recently published news I found{source_part}{author_part}:\n" \
f"The title is {title}...\n{desc}"
def news_concerning(text: str) -> str:
"""
Main function that takes user text, queries NewsAPI, and returns a formatted result.
"""
api_key = os.getenv("NEWS_API_KEY")
if not api_key:
raise RuntimeError("API key not found. Please set NEWS_API_KEY in your environment.")
params = clean_query(text)
data = fetch_top_headline(params, api_key)
articles = data.get("articles", [])
if not articles:
# Nothing found
query_term = params.get("q") or params.get("category")
return f"I couldn’t find anything about {query_term}. Could you try a different query?"
first = articles[0]
try:
return format_article(first)
except Exception as e:
# Fallback error message
return "Sorry, I couldn’t format the article properly."
if __name__ == "__main__":
user_input = input("What news would you like to hear about? ")
print(news_concerning(user_input))
To run the script, enter the following:
NEWS_API_KEY="your_api_key_here" python3 NewsAPI.py
Critique request
Please, tell me anything that comes to mind.