import requests
from datetime import datetime
from bs4 import BeautifulSoup
from multiprocess import Pool, Value
import math
import time
from drugtools.env_setup import postgres_conn, ENV

############ GLOBALS
RESET_TIME = Value('I',int(ENV["TRIAL_DOWNLOAD_RESET_TIME"]))
DELAY_TIME = Value("I",int(ENV["TRIAL_DOWNLOAD_DELAY_TIME"]))
TRIAL_RESERVATION_LIMIT=int(ENV["TRIAL_RESERVATION_LIMIT"])
    
############ Functions
def get_highest_version_number(response):
    """
    Navigate to the version table and and extract the highest posted version.

    As there are cases where the last element in the table IS NOT a
    a version entry, this function iterates from the last row entry to the first,
    looking for cells with the correct header, indicating 
    that it contains version information.
    The last one occuring in the unreversed list is what we need.
    """

    soup = BeautifulSoup(response.text, features="lxml")
    #get version table rows
    table_rows = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr")

    for row in reversed(table_rows):
        # if it is <td headers="VersionNumber">xx</td> then it contains what we need.
        for td in row.findChildren("td"):
            if ("headers" in td.attrs) and (td.attrs["headers"][0]=="VersionNumber"):
                #Note the use of [0] above. attribute elements are lists.
                version_number = int(td.text)
                return version_number

def make_request(nct_id,version1,version2):
    """
    Request a page comparing two snapshots
    """
    #create url
    baseurl = "https://clinicaltrials.gov/ct2/history/{}?A={}&B={}&C=Side-by-Side"
    url = baseurl.format(nct_id,version1,version2)

    #make request
    response = requests.get(url)

    #return the response
    return response

def upload_response(db_cursor, nct_id, version_a, version_b, response):
    """
    Upload a requested page (with versions) to the database.
    """

    timestamp = datetime.strptime(response.headers['date'], "%a, %d %b %Y %H:%M:%S %Z")

    #this uploads the response values.
    db_cursor.execute("""
        INSERT INTO http.responses
        (nct_id,version_a,version_b,url,response_code,response_date, html) 
        VALUES  (%s,%s,%s,%s,%s,%s,%s)
        ;
        """
        ,(nct_id
            ,version_a
            ,version_b
            ,response.url
            ,response.status_code
            ,datetime.isoformat(timestamp)
            ,response.text
        )
    ) 

def download_and_handle_errors(cursor, nct_id, version_a, version_b, delay_time, reset_time):
    """
    Request a page, checking for http error codes, and handle the errors as requested.
    """
    #sleep log10(counts of delays)
    time.sleep(math.log10(delay_time.value))

    #request page
    r = make_request(nct_id, version_a, version_b)
    #check for 
    if r.status_code == 200:
        upload_response(cursor,nct_id,version_a, version_b, r)
    elif r.status_code == 503:
        # write http code to http.responses
        upload_response(cursor, nct_id, version_a, version_b, r)
        # write incomplete to http.download_status
        write_incomplete(cursor,nct_id)
        # tell all other processes to slow down the request speed
        delay_time.value += 1
        # Delay
        print("Recieved 503 on {}, increasing delay count to {}".format(nct_id, delay_tiome))
        time.sleep(reset_time)
    else:
        #TODO: this should handle errors by
        # write http code to http.responses
        upload_response(cursor, nct_id, version_a, version_b, r)
        # write incomplete to http.download_status
        write_incomplete(cursor,nct_id)
        # raise exception
        #raise Exception("Download of {} (versions {},{}) returned http code {}".format(nct_id,version_a,version_b, r.status_code))
    
        # Delay
        time.sleep(reset_time)
    return r

def write_incomplete(cursor, nct_id):
    """
    Flags a trial as not having been fully downloaded.
    """
    query = """
    INSERT INTO HTTP.DOWNLOAD_STATUS (NCT_ID,STATUS) VALUES
        (%s, 'Incomplete'::HTTP.HISTORY_DOWNLOAD_STATUS);
    """
    cursor.execute(query, [nct_id] )

def download_trial_records(nct_id, delay_time, reset_time):
    """
    Manage the download of all records associated with a given trial.
    It uses a single connection and cursor for downloading the entire trial.

    The benefit of distributing the work at the trial level is that errors related 
    to a trial can be handled at that level.

    This doesn't reserve a trial for download, but it does release the reservation.
    """
    #for testing
    print(nct_id)


    # A new connection is created every time the function is called so that this 
    # function can be run using a multiprocessing pool
    with postgres_conn() as db_conn:
        with db_conn.cursor() as cursor:

            #upload the first two versions
            r = download_and_handle_errors(cursor, nct_id, 1, 2, delay_time, reset_time)
            #extract last version
            v = get_highest_version_number(r)


            #download and upload the remaining versions
            if v == 2:
                return None
            elif v % 2 == 0:
                for version_a, version_b in step_generator(v):
                    #download the history, handling any errors as they come up, and submitting it to the database.
                    download_and_handle_errors(cursor, nct_id, version_a, version_b, delay_time, reset_time)
            elif v % 2 == 1:
                #if there are an odd number of submissions treat at as even
                for version_a, version_b in step_generator(v):
                    download_and_handle_errors(cursor, nct_id, version_a, version_b, delay_time, reset_time)
                #now handle an odd number of versions by downloading the 1 vs (end) comparison.
                download_and_handle_errors(cursor, nct_id, 1, v, delay_time, reset_time)

            #now mark the trial as having been downloaded
            cursor.execute(
                """
                INSERT INTO http.download_status (nct_id,status)
                VALUES (%s, 'Downloaded'::http.history_download_status)
                """
                , [nct_id]
            )


def step_generator(max_version):
    """
    Used to generate a list of versions to request
    The specific pattern generated is

    (3,4), (5,6), (7,8),...,(max_version-1,max_version)
    """
    old=3
    for i in range(4,max_version,2):
        yield (old,i)
        old = i + 1


def reserve_trials(db_connection, limit=10):
    """
    Reserves a certain number of trials for processing in the DB.
    """
    query = """
    WITH OF_INTEREST AS
        (SELECT NCT_ID
            FROM  HTTP.TRIALS_TO_DOWNLOAD
            LIMIT %s
        )
    INSERT INTO HTTP.DOWNLOAD_STATUS (NCT_ID,STATUS)
        SELECT OF_INTEREST.NCT_ID, 'Reserved'::HTTP.HISTORY_DOWNLOAD_STATUS AS STATUS
        FROM OF_INTEREST 
    RETURNING NCT_ID;
    """

    with db_connection.cursor() as cursor:
        cursor.execute(query, [limit] )
        nctids_list = cursor.fetchall()
        nctids_list = [ x[0] for x in nctids_list]

    return nctids_list


def reserve_and_download_versions(limit):
    #db connection
    with postgres_conn() as con:

        #get list of nct_ids
        nctids = reserve_trials(con, limit)
        print("reserving_trials: ", nctids)


    #lambda that parameterizes the downloader, allowing it to be passed to the pool.
    def downloader(nct):
        download_trial_records(nct, DELAY_TIME, RESET_TIME)

    #start analyzing them
    with Pool(processes=12) as process_pool:
        process_pool.map(downloader, nctids)


def run():
    reserve_and_download_versions(TRIAL_RESERVATION_LIMIT)

if __name__ == "__main__":
    """
    Main!
    """
    run()