Spaces:

webshop
/

amazon_shop

Runtime error

App Files Files Community

John Yang commited on Jul 8, 2022

Commit

730ca01

1 Parent(s): 69177fb

Code clean up

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +9 -10
predict_help.py +17 -13

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 *.pyc
 .DS_Store

 *.pyc
+*.ipynb
 .DS_Store

app.py CHANGED Viewed

@@ -119,7 +119,7 @@ def run_episode(goal, env, verbose=True):
     search_results_cache = {}
     visited_asins, clicked_options = set(), set()
     sub_page_type, page_type, page_num = None, None, None
-    search_terms, prod_title, asin, num_prods, = None, None, None, None
     options = {}
     for i in range(100):
@@ -228,7 +228,6 @@ def run_episode(goal, env, verbose=True):
                     print(f"Parsing search results took {end-begin} seconds")
                 search_results_cache[search_terms] = data
-                num_prods = len(data)
                 for d in data:
                     title_to_asin_map[d['Title']] = d['asin']
         elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE:
@@ -268,7 +267,7 @@ def run_episode(goal, env, verbose=True):
         # Dict of Info -> Valid Action State (Info)
         begin = time.time()
         prod_arg = product_map if page_type == Page.ITEM_PAGE else data
-        info = convert_dict_to_actions(page_type, prod_arg, asin, page_num, num_prods)
         end = time.time()
         if verbose:
             print("Extracting available actions took", end-begin, "seconds")
@@ -294,19 +293,19 @@ def run_episode(goal, env, verbose=True):
                     return_value['Selected Options'] = ', '.join(list(clicked_options))
             return return_value
-gr.Interface(fn=run_episode,\
     inputs=[
         gr.inputs.Textbox(lines=7, label="Input Text"),
         gr.inputs.Radio(['Amazon', 'eBay'], type="value", default="Amazon", label='Environment')
-    ],\
-    outputs="text",\
     examples=[
         ["I want to find a gold floor lamp with a glass shade and a nickel finish that i can use for my living room, and price lower than 270.00 dollars", "Amazon"],
         ["I need some cute heart-shaped glittery cupcake picks as a gift to bring to a baby shower", "Amazon"],
         ["I'm trying to find white bluetooth speakers that are not only water resistant but also come with stereo sound", "eBay"],
         ["find me the soy free 3.5 ounce 4-pack of dang thai rice chips, and make sure they are the aged cheddar flavor.  i also need the ones in the resealable bags", "eBay"]
-    ],\
-    title="WebShop",\
-    article="<p style='padding-top:15px;text-align:center;'>To learn more about this project, check out the <a href='https://webshop-pnlp.github.io/' target='_blank'>project page</a>!</p>",\
-    description="<p style='text-align:center;'>Sim-to-real transfer of agent trained on WebShop to search a desired product on Amazon from any natural language query!</p>",\
 ).launch(inline=False)

     search_results_cache = {}
     visited_asins, clicked_options = set(), set()
     sub_page_type, page_type, page_num = None, None, None
+    search_terms, prod_title, asin = None, None, None
     options = {}
     for i in range(100):
                     print(f"Parsing search results took {end-begin} seconds")
                 search_results_cache[search_terms] = data
                 for d in data:
                     title_to_asin_map[d['Title']] = d['asin']
         elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE:
         # Dict of Info -> Valid Action State (Info)
         begin = time.time()
         prod_arg = product_map if page_type == Page.ITEM_PAGE else data
+        info = convert_dict_to_actions(page_type, prod_arg, asin, page_num)
         end = time.time()
         if verbose:
             print("Extracting available actions took", end-begin, "seconds")
                     return_value['Selected Options'] = ', '.join(list(clicked_options))
             return return_value
+gr.Interface(fn=run_episode,
     inputs=[
         gr.inputs.Textbox(lines=7, label="Input Text"),
         gr.inputs.Radio(['Amazon', 'eBay'], type="value", default="Amazon", label='Environment')
+    ],
+    outputs="text",
     examples=[
         ["I want to find a gold floor lamp with a glass shade and a nickel finish that i can use for my living room, and price lower than 270.00 dollars", "Amazon"],
         ["I need some cute heart-shaped glittery cupcake picks as a gift to bring to a baby shower", "Amazon"],
         ["I'm trying to find white bluetooth speakers that are not only water resistant but also come with stereo sound", "eBay"],
         ["find me the soy free 3.5 ounce 4-pack of dang thai rice chips, and make sure they are the aged cheddar flavor.  i also need the ones in the resealable bags", "eBay"]
+    ],
+    title="WebShop",
+    article="<p style='padding-top:15px;text-align:center;'>To learn more about this project, check out the <a href='https://webshop-pnlp.github.io/' target='_blank'>project page</a>!</p>",
+    description="<p style='text-align:center;'>Sim-to-real transfer of agent trained on WebShop to search a desired product on Amazon from any natural language query!</p>",
 ).launch(inline=False)

predict_help.py CHANGED Viewed

@@ -22,12 +22,6 @@ NUM_PROD_LIMIT = 10
 WEBSHOP_URL = "http://3.83.245.205:3000"
 WEBSHOP_SESSION = "abc"
-API = '85956985fae328bfe5a759a2984448d2'
-def get_url(url):
-    payload = {'api_key': API, 'url': url , 'country_code': 'us'}
-    proxy_url = 'http://api.scraperapi.com/?' + urlencode(payload)
-    return proxy_url
 def parse_results_ebay(query, page_num=None, verbose=True):
     query_string = '+'.join(query.split())
     page_num = 1 if page_num is None else page_num
@@ -64,6 +58,7 @@ def parse_results_ebay(query, page_num=None, verbose=True):
         print(f"Scraped {len(results)} products")
     return results
 def parse_item_page_ebay(asin, verbose=True):
     product_dict = {}
     product_dict["asin"] = asin
@@ -188,6 +183,7 @@ def parse_results_ws(query, page_num=None, verbose=True):
         print(f"Scraped {len(results)} products")
     return results
 def parse_item_page_ws(asin, query, page_num, options, verbose=True):
     product_dict = {}
     product_dict["asin"] = asin
@@ -199,7 +195,7 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
         f'{asin}/{query_string}/{page_num}/{options_string}'
     )
     if verbose:
-        print("Item Page URL: ", url)
     webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
     soup = BeautifulSoup(webpage.content, 'html.parser')
@@ -240,6 +236,8 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
         f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
         f'{asin}/{query_string}/{page_num}/Description/{options_string}'
     )
     webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
     soup = BeautifulSoup(webpage.content, 'html.parser')
     product_dict["Description"] = soup.find(name="p", attrs={'class': 'product-info'}).text.strip()
@@ -249,6 +247,8 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
         f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
         f'{asin}/{query_string}/{page_num}/Features/{options_string}'
     )
     webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
     soup = BeautifulSoup(webpage.content, 'html.parser')
     bullets = soup.find(name="ul").findAll(name="li")
@@ -256,6 +256,7 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
     return product_dict
 # Query -> Search Result ASINs
 def parse_results_amz(query, page_num=None, verbose=True):
     url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+")
@@ -289,6 +290,7 @@ def parse_results_amz(query, page_num=None, verbose=True):
         print("Scraped", len(results), "products")
     return results
 # Scrape information of each product
 def parse_item_page_amz(asin, verbose=True):
     product_dict = {}
@@ -385,7 +387,9 @@ def parse_item_page_amz(asin, verbose=True):
     product_dict["options"], product_dict["option_to_image"] = options, options_to_image
     return product_dict
 # Get text observation from html
 def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins=None):
     def tag_visible(element):
         ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'}
@@ -419,18 +423,18 @@ def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins
             observation += processed_t + '\n'
         return observation
-# Get action from dict
-def convert_dict_to_actions(page_type, products=None, asin=None, page_num=None, num_prods=None) -> dict:
     info = {"valid": []}
     if page_type == Page.RESULTS:
         info["valid"] = ['click[back to search]']
-        if products is None or page_num is None or num_prods is None:
             print(page_num)
-            print(num_prods)
             print(products)
-            raise Exception('Provide `products`, `num_prods`, `page_num` to get `results` valid actions')
         # Decide whether to add `next >` as clickable based on # of search results
-        if num_prods > 10:
             info["valid"].append('click[next >]')
         # Add `< prev` as clickable if not first page of search results
         if page_num > 1:

 WEBSHOP_URL = "http://3.83.245.205:3000"
 WEBSHOP_SESSION = "abc"
 def parse_results_ebay(query, page_num=None, verbose=True):
     query_string = '+'.join(query.split())
     page_num = 1 if page_num is None else page_num
         print(f"Scraped {len(results)} products")
     return results
 def parse_item_page_ebay(asin, verbose=True):
     product_dict = {}
     product_dict["asin"] = asin
         print(f"Scraped {len(results)} products")
     return results
 def parse_item_page_ws(asin, query, page_num, options, verbose=True):
     product_dict = {}
     product_dict["asin"] = asin
         f'{asin}/{query_string}/{page_num}/{options_string}'
     )
     if verbose:
+        print(f"Item Page URL: {url}")
     webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
     soup = BeautifulSoup(webpage.content, 'html.parser')
         f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
         f'{asin}/{query_string}/{page_num}/Description/{options_string}'
     )
+    if verbose:
+        print(f"Item Description URL: {url}")
     webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
     soup = BeautifulSoup(webpage.content, 'html.parser')
     product_dict["Description"] = soup.find(name="p", attrs={'class': 'product-info'}).text.strip()
         f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
         f'{asin}/{query_string}/{page_num}/Features/{options_string}'
     )
+    if verbose:
+        print(f"Item Features URL: {url}")
     webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
     soup = BeautifulSoup(webpage.content, 'html.parser')
     bullets = soup.find(name="ul").findAll(name="li")
     return product_dict
 # Query -> Search Result ASINs
 def parse_results_amz(query, page_num=None, verbose=True):
     url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+")
         print("Scraped", len(results), "products")
     return results
 # Scrape information of each product
 def parse_item_page_amz(asin, verbose=True):
     product_dict = {}
     product_dict["options"], product_dict["option_to_image"] = options, options_to_image
     return product_dict
 # Get text observation from html
+# TODO[john-b-yang]: Similar to web_agent_site/envs/...text_env.py func def, merge?
 def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins=None):
     def tag_visible(element):
         ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'}
             observation += processed_t + '\n'
         return observation
+# Get action from dict of values retrieved from html
+def convert_dict_to_actions(page_type, products=None, asin=None, page_num=None) -> dict:
     info = {"valid": []}
     if page_type == Page.RESULTS:
         info["valid"] = ['click[back to search]']
+        if products is None or page_num is None:
             print(page_num)
             print(products)
+            raise Exception('Provide `products`, `page_num` to get `results` valid actions')
         # Decide whether to add `next >` as clickable based on # of search results
+        if len(products) > 10:
             info["valid"].append('click[next >]')
         # Add `< prev` as clickable if not first page of search results
         if page_num > 1: