Skip to content

News Finder Class

NewsFinder

API Object to scrape news articles from Google News and scrape the article text from the news website. Allows filtering of Google News results.

Note: Due to issues with scraping, Daily Mail and News18 are blocked by default. You can unblock them by calling the remove_blocked_source method but this is not advised.

ATTRIBUTE DESCRIPTION
blocked_sources

A list of sources to block, by default includes Daily Mail and News18

TYPE: list

Source code in twitternewsbot/newsfinder.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
class NewsFinder():

    """
    API Object to scrape news articles from Google News and scrape the article text from the news website. 
    Allows filtering of Google News results.

    Note: Due to issues with scraping, Daily Mail and News18 are blocked by default. You can unblock them by calling the remove_blocked_source method but this is not advised.

    Attributes
    ----------
    blocked_sources : list
        A list of sources to block, by default includes Daily Mail and News18
    """

    #####################################
    # Initialization
    #####################################

    def __init__(self, blocked_sources: list|None = None):
        """Initialize the NewsFinder class

        Parameters
        ----------
        blocked_sources : list, optional
            A list of sources to block, by default includes Daily Mail and News18        

        Raises
        ------
        TypeError
            If blocked_sources is not a list
        TypeError
            If any element in blocked_sources is not a string    
        """

        if blocked_sources is None:
            self.blocked_sources = ["Daily Mail", "News18"]
        else:

            # Check if blocked_sources is valid
            if not isinstance(blocked_sources, list):
                raise TypeError("blocked_sources must be a list")

            # Check if all elements in blocked_sources are strings
            if not all(isinstance(source, str) for source in blocked_sources):
                raise TypeError("blocked_sources must be a list of strings")

            self.blocked_sources = blocked_sources + ["Daily Mail", "News18"]

    #####################################
    # Private Methods
    #####################################

    def __scrape_articles(self, url: str, number_of_articles: int = 5) -> list | None:
        """Private: Scrape the news articles from Google News for a given topic

        Parameters
        ----------
        url : str
            The google news url to scrape
        number_of_articles : int, optional 
            The number of articles to scrape, by default 5

        Returns
        -------
        all_articles : list
            A list of dictionaries containing the title, source, time and link of each article
        """

        # Initialize an HTML Session
        session = HTMLSession()

        # Get the page
        r = session.get(url=url)

        # Get all the articles
        try:
            articles = r.html.find('article')
        except:
            return None

        all_articles = []

        # Iterate over each article
        for article in articles:

            # Break if we have enough articles
            if len(all_articles) == number_of_articles:
                break

            # Get the title
            title = article.find('h3', first=True).text

            # Get the source
            source = article.find('img', first=True).attrs.get('alt')

            # Disallow certain sources
            if source in self.blocked_sources:
                continue

            # Get the link
            link = article.find('a', first=True).absolute_links.pop()

            # Print the details
            newsarticle = {
                'title': title,
                'source': source,
                'link': link
            }
            all_articles.append(newsarticle)

        return all_articles

    def __scrape_news_article(self, url: str) -> dict | None:
        """Private: Scrape the news article from the given url

        Parameters
        ----------
        url : str
            The google news url of the news article

        Returns
        -------
        article : dict
            A dictionary containing the title and article body of the news article
        """


        # Final url
        try:
            url = requests.get(url, timeout=5).url
        except Exception as error:
            warnings.warn(f"Error processing url: {url}. Continuing without it...")
            return None

        # Initialize HTML Session
        session = HTMLSession()

        # Get the page
        r = session.get(url=url)

        # Get the title
        try:
            title  = r.html.find('h1', first=True).text
        except:
            title = ""

        # Get all article fragments (each fragment is a paragraph)
        try:
            article_fragments = r.html.find('p')
        except:
            warnings.warn(f"Article with url: {url} cannot be scraped. Continuing without it...")
            return None


        # Join all the paragraphs to form the article
        body = '\n'.join([fragment.text for fragment in article_fragments])

        return {'title': title, 'article': body}

    def __build_list_of_articles(self, articles_list: list) -> list:
        """Private: Build a list of articles from the given list of dictionaries

        Parameters
        ----------
        articles_list : list(dict)
            A list of dictionaries containing the title, source, date and link of the articles

        Returns
        -------
        articles_full_text : list
            A list of dictionaries containing the title and article body of the news articles
        """

        # Iterate through articles, and scrape each one
        for article in articles_list:
            article_text = self.__build_article_from_dict(article)
            article.update(article_text)
        return articles_list

    def __build_article_from_dict(self, article_dict: dict) -> dict | None:
        """Private: Build the article from the given dictionary

        Parameters
        ----------
        article_dict : dict
            A dictionary containing the title, source, date and link of the article

        Returns
        -------
        article : dict
            A dictionary containing the title and article body of the news article
        """

        return self.__scrape_news_article(article_dict['link'])

    def __build_url(self, topic_url: str, source_url: str, period_url: str) -> str:
        """Private: Build the url for the given topic, source and period

        Parameters
        ----------
        topic_url : str
            The topic to search for
        source_url : str
            The url fragment of the source to search for
        period_url : str
            The url fragment of the period to search for

        Returns 
        -------
        url : str
            The url to scrape
        """

        return f'https://news.google.com/search?q={topic_url}{source_url}{period_url}&hl=en-IN&gl=IN&ceid=IN:en'


    ###############################
    # Public Methods - API Methods
    ###############################

    def add_blocked_source(self, sources: list) -> int:
        """Add a list of sources to block

        Parameters
        ----------
        sources : list
            A list of sources to block

        Returns
        -------
        int
            The number of blocked sources

        Raises
        ------
        TypeError
            If sources is not a list

        Examples
        --------
        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder() 
        >>> nf.add_blocked_source(["BBC"]) # Add BBC to the list of blocked sources
        3
        """

        # Check if the sources is valid
        if not isinstance(sources, list):
            raise TypeError("Sources must be a list")

        # Check if the source is already blocked
        for source in sources:
            if source in self.blocked_sources:
                sources.remove(source)

        self.blocked_sources = self.blocked_sources + sources
        return len(self.blocked_sources)

    def remove_blocked_source(self, sources: list) -> int:
        """Remove a list of sources to block

        Parameters
        ----------
        sources : list
            A list of sources to block

        Returns
        -------
        int
            The number of blocked sources

        Raises
        ------
        TypeError
            If sources is not a list

        Examples
        --------
        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder()
        >>> nf.remove_blocked_source(["Daily Mail"]) # Remove Daily Mail from the list of blocked sources
        1
        """

        # Check if the sources is valid
        if not isinstance(sources, list):
            raise TypeError("Sources must be a list")

        for source in sources:
            self.blocked_sources.remove(source)
        return len(self.blocked_sources)

    def get_blocked_sources(self) -> list:
        """Get the list of blocked sources

        Returns
        -------
        list
            The list of blocked sources

        Examples
        --------
        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder()
        >>> nf.get_blocked_sources() # Remember, Daily Mail and News18 are blocked by default (due to issues with scraping)
        ['Daily Mail', 'News18']
        """

        return self.blocked_sources

    @property
    def blocked_sources(self) -> list:
        """Get the list of blocked sources

        Returns
        -------
        list
            The list of blocked sources

        Examples
        --------
        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder()
        >>> nf.blocked_sources # Remember, Daily Mail and News18 are blocked by default (due to issues with scraping)
        ['Daily Mail', 'News18']
        """

        return self.__blocked_sources

    @blocked_sources.setter
    def blocked_sources(self, sources: list) -> None:
        """Set the list of blocked sources

        Parameters
        ----------
        sources : list
            A list of sources to block

        Raises
        ------
        TypeError
            If sources is not a list

        Examples
        --------
        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder()
        >>> nf.blocked_sources = ["BBC"] # Set the list of blocked sources to BBC
        """

        # Check if the sources is valid
        if not isinstance(sources, list):
            raise TypeError("sources must be a list")

        self.__blocked_sources = sources

    def update_blocked_sources(self, sources: list) -> int:
        """Update the list of blocked sources by completely replacing existing blocked sources

        Parameters
        ----------
        sources : list
            A list of sources to block

        Returns
        -------
        int
            The number of blocked sources

        Raises
        ------
        TypeError
            If sources is not a list

        Examples
        --------
        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder()
        >>> nf.update_blocked_sources(["BBC"]) # Update the list of blocked sources to BBC
        1
        """
        # Check if the sources is valid
        if not isinstance(sources, list):
            raise TypeError("Sources must be a list")

        self.blocked_sources = sources
        return len(self.blocked_sources)

    def get_news_articles(self, topic: str|None = None, number_of_articles: int|None = None, source: str|None = None, period: str = "Any time", article_text: bool = False) -> list:
        """Get the news articles for a given topic or for a given source filtered by date

        Parameters
        ----------
        topic : str, optional
            The topic to search for, by default None
        number_of_articles : int, optional
            The number of articles to scrape, by default None which gives all the possible articles
        source : str, optional
            The domain for the website of the source to search for, by default None. For example, "dailymail.co.uk" or "bbc.com"
        period : list, optional
            The period to search for, by default "Any time". Period must be one of ["Any time", "Past hour", "Past 24 hours", "Past week", "Past year"]
        article_text : bool, optional
            Whether to scrape the article text or not, by default False

        Returns
        -------
        list
            A list of dictionaries containing the title, source, link and article body of the news articles (only if article_text is True)

        Raises
        ------
        ValueError
            If the period is not one of ["Any time", "Past hour", "Past 24 hours", "Past week", "Past year"].
        ValueError
            If the number_of_articles is not a positive integer
        TypeError
            If the topic is not a string
        TypeError
            If the source is not a string
        ValueError
            If the source is not a valid domain name
        TypeError
            If the article_text is not a boolean
        ValueError
            If the topic and source are both None

        Examples
        --------

        Retrieving all articles for a given topic in the last 24 hours without scraping.

        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder()
        >>> nf.get_news_articles(topic="Donald Trump", period="Past 24 hours")

        Retrieving all articles for a given topic in the last 24 hours and scraping the article text.

        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder()
        >>> nf.get_news_articles(topic="Donald Trump", period="Past 24 hours", article_text=True)

        Retrieving 5 articles from a given source in the last week without scraping.

        >>> from twitternewsbot.newsfinder import NewsFinder
        >>> nf = NewsFinder()
        >>> nf.get_news_articles(source="bbc.com", period="Past week", number_of_articles=5)
        """

        # Check if the period is valid
        if period not in ["Any time", "Past hour", "Past 24 hours", "Past week", "Past year"]:
            raise ValueError("period must be one of ['Any time', 'Past hour', 'Past 24 hours', 'Past week', 'Past year']")

        # Check if the number of articles is valid
        if number_of_articles is not None and number_of_articles <= 0:
            raise ValueError("number_of_articles must be a positive integer")

        # Check if the topic is valid
        if topic is not None and not isinstance(topic, str):
            raise TypeError("topic must be a string")

        # Check if the source is valid
        if source is not None and not isinstance(source, str):
            raise TypeError("source must be a string")

        if source is not None and not domain(source):
            raise ValueError("source must be a valid domain name")

        # Check if the article_text is valid
        if not isinstance(article_text, bool):
            raise TypeError("article_text must be a boolean")

        # Check if the topic and source are both None
        if topic is None and source is None:
            raise ValueError("Either or both topic and source must be provided")

        ################### Build url ######################

        # If topic is provided
        if topic is not None:
            topic_url = topic + " "
        else:
            topic_url = ""

        # If source is provided
        if source is not None:
            source_url = " site:" + source
        else:
            source_url = ""

        # If period is provided

        period_mappings = {"Any time": "",
                           "Past hour": " when:1h",
                           "Past 24 hours": " when:1d",
                           "Past week": " when:7d",
                           "Past year": " when:1y"}

        period_url = period_mappings[period]

        url = self.__build_url(topic_url, source_url, period_url)


        ################### Scrape ######################

        articles = self.__scrape_articles(url, number_of_articles)

        # Check and report if no articles found
        if articles is None:
            sys.stdout.write("No articles found. Try different parameters")
            return []


        ################### Build Articles ######################
        if article_text:
            articles = self.__build_list_of_articles(articles)

        return articles

blocked_sources property writable

blocked_sources: list

Get the list of blocked sources

RETURNS DESCRIPTION
list

The list of blocked sources

Examples:

>>> from twitternewsbot.newsfinder import NewsFinder
>>> nf = NewsFinder()
>>> nf.blocked_sources # Remember, Daily Mail and News18 are blocked by default (due to issues with scraping)
['Daily Mail', 'News18']

add_blocked_source

add_blocked_source(sources)

Add a list of sources to block

PARAMETER DESCRIPTION
sources

A list of sources to block

TYPE: list

RETURNS DESCRIPTION
int

The number of blocked sources

RAISES DESCRIPTION
TypeError

If sources is not a list

Examples:

>>> from twitternewsbot.newsfinder import NewsFinder
>>> nf = NewsFinder() 
>>> nf.add_blocked_source(["BBC"]) # Add BBC to the list of blocked sources
3
Source code in twitternewsbot/newsfinder.py
def add_blocked_source(self, sources: list) -> int:
    """Add a list of sources to block

    Parameters
    ----------
    sources : list
        A list of sources to block

    Returns
    -------
    int
        The number of blocked sources

    Raises
    ------
    TypeError
        If sources is not a list

    Examples
    --------
    >>> from twitternewsbot.newsfinder import NewsFinder
    >>> nf = NewsFinder() 
    >>> nf.add_blocked_source(["BBC"]) # Add BBC to the list of blocked sources
    3
    """

    # Check if the sources is valid
    if not isinstance(sources, list):
        raise TypeError("Sources must be a list")

    # Check if the source is already blocked
    for source in sources:
        if source in self.blocked_sources:
            sources.remove(source)

    self.blocked_sources = self.blocked_sources + sources
    return len(self.blocked_sources)

remove_blocked_source

remove_blocked_source(sources)

Remove a list of sources to block

PARAMETER DESCRIPTION
sources

A list of sources to block

TYPE: list

RETURNS DESCRIPTION
int

The number of blocked sources

RAISES DESCRIPTION
TypeError

If sources is not a list

Examples:

>>> from twitternewsbot.newsfinder import NewsFinder
>>> nf = NewsFinder()
>>> nf.remove_blocked_source(["Daily Mail"]) # Remove Daily Mail from the list of blocked sources
1
Source code in twitternewsbot/newsfinder.py
def remove_blocked_source(self, sources: list) -> int:
    """Remove a list of sources to block

    Parameters
    ----------
    sources : list
        A list of sources to block

    Returns
    -------
    int
        The number of blocked sources

    Raises
    ------
    TypeError
        If sources is not a list

    Examples
    --------
    >>> from twitternewsbot.newsfinder import NewsFinder
    >>> nf = NewsFinder()
    >>> nf.remove_blocked_source(["Daily Mail"]) # Remove Daily Mail from the list of blocked sources
    1
    """

    # Check if the sources is valid
    if not isinstance(sources, list):
        raise TypeError("Sources must be a list")

    for source in sources:
        self.blocked_sources.remove(source)
    return len(self.blocked_sources)

get_blocked_sources

get_blocked_sources()

Get the list of blocked sources

RETURNS DESCRIPTION
list

The list of blocked sources

Examples:

>>> from twitternewsbot.newsfinder import NewsFinder
>>> nf = NewsFinder()
>>> nf.get_blocked_sources() # Remember, Daily Mail and News18 are blocked by default (due to issues with scraping)
['Daily Mail', 'News18']
Source code in twitternewsbot/newsfinder.py
def get_blocked_sources(self) -> list:
    """Get the list of blocked sources

    Returns
    -------
    list
        The list of blocked sources

    Examples
    --------
    >>> from twitternewsbot.newsfinder import NewsFinder
    >>> nf = NewsFinder()
    >>> nf.get_blocked_sources() # Remember, Daily Mail and News18 are blocked by default (due to issues with scraping)
    ['Daily Mail', 'News18']
    """

    return self.blocked_sources

update_blocked_sources

update_blocked_sources(sources)

Update the list of blocked sources by completely replacing existing blocked sources

PARAMETER DESCRIPTION
sources

A list of sources to block

TYPE: list

RETURNS DESCRIPTION
int

The number of blocked sources

RAISES DESCRIPTION
TypeError

If sources is not a list

Examples:

>>> from twitternewsbot.newsfinder import NewsFinder
>>> nf = NewsFinder()
>>> nf.update_blocked_sources(["BBC"]) # Update the list of blocked sources to BBC
1
Source code in twitternewsbot/newsfinder.py
def update_blocked_sources(self, sources: list) -> int:
    """Update the list of blocked sources by completely replacing existing blocked sources

    Parameters
    ----------
    sources : list
        A list of sources to block

    Returns
    -------
    int
        The number of blocked sources

    Raises
    ------
    TypeError
        If sources is not a list

    Examples
    --------
    >>> from twitternewsbot.newsfinder import NewsFinder
    >>> nf = NewsFinder()
    >>> nf.update_blocked_sources(["BBC"]) # Update the list of blocked sources to BBC
    1
    """
    # Check if the sources is valid
    if not isinstance(sources, list):
        raise TypeError("Sources must be a list")

    self.blocked_sources = sources
    return len(self.blocked_sources)

get_news_articles

get_news_articles(topic=None, number_of_articles=None, source=None, period='Any time', article_text=False)

Get the news articles for a given topic or for a given source filtered by date

PARAMETER DESCRIPTION
topic

The topic to search for, by default None

TYPE: str DEFAULT: None

number_of_articles

The number of articles to scrape, by default None which gives all the possible articles

TYPE: int DEFAULT: None

source

The domain for the website of the source to search for, by default None. For example, "dailymail.co.uk" or "bbc.com"

TYPE: str DEFAULT: None

period

The period to search for, by default "Any time". Period must be one of ["Any time", "Past hour", "Past 24 hours", "Past week", "Past year"]

TYPE: list DEFAULT: 'Any time'

article_text

Whether to scrape the article text or not, by default False

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list

A list of dictionaries containing the title, source, link and article body of the news articles (only if article_text is True)

RAISES DESCRIPTION
ValueError

If the period is not one of ["Any time", "Past hour", "Past 24 hours", "Past week", "Past year"].

ValueError

If the number_of_articles is not a positive integer

TypeError

If the topic is not a string

TypeError

If the source is not a string

ValueError

If the source is not a valid domain name

TypeError

If the article_text is not a boolean

ValueError

If the topic and source are both None

Examples:

Retrieving all articles for a given topic in the last 24 hours without scraping.

>>> from twitternewsbot.newsfinder import NewsFinder
>>> nf = NewsFinder()
>>> nf.get_news_articles(topic="Donald Trump", period="Past 24 hours")

Retrieving all articles for a given topic in the last 24 hours and scraping the article text.

>>> from twitternewsbot.newsfinder import NewsFinder
>>> nf = NewsFinder()
>>> nf.get_news_articles(topic="Donald Trump", period="Past 24 hours", article_text=True)

Retrieving 5 articles from a given source in the last week without scraping.

>>> from twitternewsbot.newsfinder import NewsFinder
>>> nf = NewsFinder()
>>> nf.get_news_articles(source="bbc.com", period="Past week", number_of_articles=5)
Source code in twitternewsbot/newsfinder.py
def get_news_articles(self, topic: str|None = None, number_of_articles: int|None = None, source: str|None = None, period: str = "Any time", article_text: bool = False) -> list:
    """Get the news articles for a given topic or for a given source filtered by date

    Parameters
    ----------
    topic : str, optional
        The topic to search for, by default None
    number_of_articles : int, optional
        The number of articles to scrape, by default None which gives all the possible articles
    source : str, optional
        The domain for the website of the source to search for, by default None. For example, "dailymail.co.uk" or "bbc.com"
    period : list, optional
        The period to search for, by default "Any time". Period must be one of ["Any time", "Past hour", "Past 24 hours", "Past week", "Past year"]
    article_text : bool, optional
        Whether to scrape the article text or not, by default False

    Returns
    -------
    list
        A list of dictionaries containing the title, source, link and article body of the news articles (only if article_text is True)

    Raises
    ------
    ValueError
        If the period is not one of ["Any time", "Past hour", "Past 24 hours", "Past week", "Past year"].
    ValueError
        If the number_of_articles is not a positive integer
    TypeError
        If the topic is not a string
    TypeError
        If the source is not a string
    ValueError
        If the source is not a valid domain name
    TypeError
        If the article_text is not a boolean
    ValueError
        If the topic and source are both None

    Examples
    --------

    Retrieving all articles for a given topic in the last 24 hours without scraping.

    >>> from twitternewsbot.newsfinder import NewsFinder
    >>> nf = NewsFinder()
    >>> nf.get_news_articles(topic="Donald Trump", period="Past 24 hours")

    Retrieving all articles for a given topic in the last 24 hours and scraping the article text.

    >>> from twitternewsbot.newsfinder import NewsFinder
    >>> nf = NewsFinder()
    >>> nf.get_news_articles(topic="Donald Trump", period="Past 24 hours", article_text=True)

    Retrieving 5 articles from a given source in the last week without scraping.

    >>> from twitternewsbot.newsfinder import NewsFinder
    >>> nf = NewsFinder()
    >>> nf.get_news_articles(source="bbc.com", period="Past week", number_of_articles=5)
    """

    # Check if the period is valid
    if period not in ["Any time", "Past hour", "Past 24 hours", "Past week", "Past year"]:
        raise ValueError("period must be one of ['Any time', 'Past hour', 'Past 24 hours', 'Past week', 'Past year']")

    # Check if the number of articles is valid
    if number_of_articles is not None and number_of_articles <= 0:
        raise ValueError("number_of_articles must be a positive integer")

    # Check if the topic is valid
    if topic is not None and not isinstance(topic, str):
        raise TypeError("topic must be a string")

    # Check if the source is valid
    if source is not None and not isinstance(source, str):
        raise TypeError("source must be a string")

    if source is not None and not domain(source):
        raise ValueError("source must be a valid domain name")

    # Check if the article_text is valid
    if not isinstance(article_text, bool):
        raise TypeError("article_text must be a boolean")

    # Check if the topic and source are both None
    if topic is None and source is None:
        raise ValueError("Either or both topic and source must be provided")

    ################### Build url ######################

    # If topic is provided
    if topic is not None:
        topic_url = topic + " "
    else:
        topic_url = ""

    # If source is provided
    if source is not None:
        source_url = " site:" + source
    else:
        source_url = ""

    # If period is provided

    period_mappings = {"Any time": "",
                       "Past hour": " when:1h",
                       "Past 24 hours": " when:1d",
                       "Past week": " when:7d",
                       "Past year": " when:1y"}

    period_url = period_mappings[period]

    url = self.__build_url(topic_url, source_url, period_url)


    ################### Scrape ######################

    articles = self.__scrape_articles(url, number_of_articles)

    # Check and report if no articles found
    if articles is None:
        sys.stdout.write("No articles found. Try different parameters")
        return []


    ################### Build Articles ######################
    if article_text:
        articles = self.__build_list_of_articles(articles)

    return articles