import sys
import pyodbc
import requests
from datetime import datetime
from bs4 import BeautifulSoup,Tag

WIKIPEDIA_PAGE_URL = 'https://en.wikipedia.org/wiki/List_of_language_self-study_programs'

# Change the Database connection information before runing the code
DBSERVER = '192.168.2.16'
DATABASE_NAME = 'demo'
UID = 'user'
PWD= 'password'

#
#************************The Third Level*****************************
#

# Create an SQL Server database connection.
def create_connection(server, database, user, password):
    '''
    Take server instance name, database name, username and password as inputs
    Return an SQL Server database connection
    '''
    connection = None
    try:
        connection = pyodbc.connect(Driver="{ODBC Driver 17 for SQL Server}",
                                Server=server,
                                Database=database,
                                UID=user,
                                PWD=password)
    except:
        # We should log the specific type of error occurred.
        print('Failed to connect to the database server: {}'.format(sys.exc_info()[1]))
    return connection

def save_tuples(connection, tuples):
    '''
    Takes database connection and a list of tuples containing the HTML table data as input
    Save the data in the tuples into a database table
    '''

    cursor = connection.cursor()
    cursor.fast_executemany = True 
    try:

        cursor.executemany(''' 
            INSERT INTO [dbo].[stg_language_self_study_programs] 
                    ([self_study_program],[number_of_languages_taught],[interface_languages],
                    [media],[business_model],[date_extracted]) 
                VALUES(?,?,?,?,?,?)''',html_table_data[1:]) 
        connection.commit() 
    except:
        # We should log the specific type of error occurred.
        print('Failed to save data to the database table: {}'.format(sys.exc_info()[1]))
        raise
    finally:
        cursor.close() 

def close_connection(connection):
    '''Take input connection and close the database connection'''
    try:
        connection.close()
    except pyodbc.ProgrammingError as ex:
        # We should log the specific type of error occurred.
        print('Failed to close the database connection: {}'.format(ex))
    except:
        # We should log the specific type of error occurred.
        print('Failed to close the database connection: {}'.format(sys.exc_info()[1]))
        raise

#
#************************The Second Level*****************************
#

def save_html_table_data(html_table_data):
    """
    Takes a list of Python tuples. Save the list to a database table.
    """
    connection = create_connection(DBSERVER,DATABASE_NAME,UID,PWD)
    if connection != None:
        try:
            save_tuples(connection, html_table_data)
        except:
            # We should log the specific type of error occurred.
            print('Failed to save the data: {}'.format(sys.exc_info()[1]))
        finally:
            # Close the connection
            close_connection(connection)

def download_table_data(self_study_programs_table_body):
    """
    Takes an HTML table body tag. 
    Returns a list of Python tuples that contain all the HTML table data.
    """
    # Create an empty list. The elements in this list are also lists.
    # Therefore, this is a two-dimensional list 
    html_table_data = list()

    for table_row in self_study_programs_table_body.children:
        # We only look at the content in a tag
        if isinstance(table_row, Tag):
            # Create a list that will be added to the two-dimensional list
            data_row = list()
            for data_cell in table_row.children:
                # We only look at the content in a tag
                if isinstance(data_cell, Tag):
                    try:
                        # Extract the text in the data cell
	                    data_row.append(data_cell.get_text().strip())
                    except:
                        # We should log the specific type of error occurred.
                        print('Failed to extract content from the data cell{}.\
                        Error: {}'.format(data_cell, sys.exc_info()[1]))
                        # Continue scraping but show "Error" for further analysis.
                        data_row.append('Error')
            # Add audit data
            data_row.append(datetime.now())
            # Add the tuple to the list
            html_table_data.append(tuple(data_row))

    return html_table_data

def find_table_body(beautiful_soup_object, tag_attributes):
    """
    Take a Beautiful Soup instance, tag name and attributes as inputs
    Returns a table body.
    """
    self_study_programs_table_body = None
    self_study_programs_table = beautiful_soup_object.find('table',{'class':'wikitable sortable'})
    try:
        self_study_programs_table_body = self_study_programs_table.tbody
    except AttributeError as ex:
        # We should log the specific type of error occurred.
        print('The table bodt tag was not found: {}'.format(ex))
    else:
        if self_study_programs_table_body == None:
            # We should log the message.
            print('The table bodt tag was not found.')

    return self_study_programs_table_body


def parse_web_content(web_content):
    """
    Take web content as input. 
    Return an instance of the BeautifulSoup class
    """

    # Create an instance of the BeautifulSoup class to parse the web content
    soup = BeautifulSoup(web_content, 'html.parser')    
    return soup

def read_web_page_content(web_page_url):
    """
    Takes a web page url and read content of the web page as input.
    Returns the status code and the web content
    """
    status_code, content = None, None
    try:
        response = requests.get(web_page_url)
    except requests.exceptions.ConnectionError as ex:
        # We should log the specific type of error happened.
        print('A Connection error occurred: {}'.format(ex))
    except KeyboardInterrupt as ex:
        raise SystemExit('Keyboard Interrupt')
    except:
        # We should log the specific type of error happened.
        print('Failed to read the page: {}'.format(sys.exc_info()[1]))
    else:
        status_code, content = response.status_code, response.content
    return status_code, content

# The variable __name__ contains a string whose value depends on
# how the code is being used. A module's __name__ is set equal to 
# __main__ when read from standard input, a script, or from an 
# interactive prompt. 
if __name__ == '__main__':  # Use this condition to determine the execution conext.
    # 1. Read the web page content.
    status_code, content = read_web_page_content(WIKIPEDIA_PAGE_URL)
    # 2, Use an instance of the Beautiful Soup class to parse the web content.
    if status_code == 200:
        soup = parse_web_content(content)
        # 3, Search for the table body element in parsed content based on the tag attributes
        self_study_programs_table_body = find_table_body(soup, {'class':'wikitable sortable'})
        if self_study_programs_table_body != None:
            # 4, Loop through the table body element to download data into a list of Python tuples.
            html_table_data = download_table_data(self_study_programs_table_body)
            # 5, Store the HTML table data into a SQL Server Database.
            save_html_table_data(html_table_data)