Getting data from web page faster

Last edited on

Overview

Fetching data from a web page through Squish is in most cases done piece by piece, which can make retrieving larger amounts of data (for example the complete contents of a table) relatively slow.

The following script demonstrates different ways to retrieve data and provides the times each of these takes to execute for an example web page.

Example Script

Please note: For get_table_data4() and get_table_data5() the file squish_getTableData.js (contains helper functions used in get_table_data4() and get_table_data5()) must be placed in <SQUISH_DIR>/lib/extensions/web.

import os
import sys
import json
import time

def main():
    loadUrl("https://download.froglogic.com/support/table_rows_50_columns2.html")

    test_count = 3

    for i in range(test_count):
        get_table_data1("thetable")

    for i in range(test_count):
        get_table_data2("thetable")

    for i in range(test_count):
        get_table_data3("thetable")

    for i in range(test_count):
        get_table_data4("thetable")

    for i in range(test_count):
        get_table_data5("thetable")

def get_table_data1(table):
    obj = waitForObject(table)

    start_time = time.time()

    rows = obj.rowCount
    columns = obj.columnCount
    for i in range(rows):
        for j in range(columns):
            obj.cellAt(i, j).innerText

    t = time.time() - start_time
    test.log("get_table_data1(): %.2f\n\n" % t)

def get_table_data2(table):
    obj = waitForObject(table)
    id = obj.id

    start_time = time.time()

    rows = obj.rowCount
    columns = obj.columnCount
    for i in range(rows):
        for j in range(columns):
            s = evalJS('document.getElementById("%s").rows[%s].cells[%s].innerText' % (id, i, j))

    t = time.time() - start_time
    test.log("get_table_data2(): %.2f\n\n" % t)

def get_table_data3(table):
    obj = waitForObject(table)
    id = obj.id

    start_time = time.time()

    rows = obj.rowCount
    columns = obj.columnCount
    for i in range(rows):
        for j in range(columns):
            evalJS('document.getElementById("%s").rows[%s].children[%s].innerText' % (id, i, j))

    t = time.time() - start_time
    test.log("get_table_data3(): %.2f\n\n" % t)

def get_table_data4(table):
    if evalJS('typeof squish_getTableDataAsObject == "function"') == "false":
        test.fail("Function squish_getTableDataAsObject does not exist; maybe squish_getTableData.js has not been installed in " + os.environ["SQUISH_PREFIX"] + "/lib/extensions/web?")
        return

    obj = waitForObject(table)
    id = obj.id

    start_time = time.time()

    o = retrieveJSObject('squish_getTableDataAsObject(document.getElementById("%s"))' % id)
    rows = o.property("rowCount")
    columns = o.property("columnCount")
    for i in range(rows):
        for j in range(columns):
            o.property("%s/%s" % (i, j))

    t = time.time() - start_time
    test.log("get_table_data4(): %.2f\n\n" % t)

def get_table_data5(table):
    if evalJS('typeof squish_getTableDataObjectAsJSON == "function"') == "false":
        test.fail("Function squish_getTableDataObjectAsJSON does not exist; maybe squish_getTableData.js has not been installed in " + os.environ["SQUISH_PREFIX"] + "/lib/extensions/web?")
        return

    obj = waitForObject(table)
    id = obj.id

    start_time = time.time()

    json_str = evalJS('squish_getTableDataObjectAsJSON(document.getElementById("%s"))' % id)
    o = json.loads(json_str)
    rows = o["rowCount"]
    columns = o["columnCount"]
    for i in range(rows):
        for j in range(columns):
            o["%s/%s" % (i, j)]

    t = time.time() - start_time
    test.log("get_table_data5(): %.2f\n\n" % t)
test.py

Results

Example results (consider relative differences, not absolute values!):

get_table_data1(): 7.30

get_table_data1(): 7.37

get_table_data1(): 7.41

get_table_data2(): 3.60

get_table_data2(): 3.60

get_table_data2(): 3.77

get_table_data3(): 3.75

get_table_data3(): 3.65

get_table_data3(): 3.63

get_table_data4(): 3.85

get_table_data4(): 3.74

get_table_data4(): 3.61

get_table_data5(): 0.03

get_table_data5(): 0.03

get_table_data5(): 0.05