{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "6947fe33-dd74-4235-90d7-af68aae454ce",
"metadata": {},
"outputs": [],
"source": [
"%load_ext lab_black"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1a572123-f84e-4c61-933d-3f1cede2af8e",
"metadata": {},
"outputs": [],
"source": [
"from bs4 import BeautifulSoup\n",
"import gzip\n",
"import json\n",
"import re\n",
"import datetime\n",
"import pathlib\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7e71aa7f-4d8f-446c-9f58-41ec77ca039e",
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ef0bb3ba-081d-4f16-a4e1-34fd636f9951",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option(\"display.max_rows\", None)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9c4dd76b-c2e5-454d-98d2-0c5d1f826fb7",
"metadata": {},
"outputs": [],
"source": [
"TIME_STRING = \"%Y%m%dT%H%M%SZ\""
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a837900c-6a6d-4f13-8c6c-25c14cfaaf06",
"metadata": {},
"outputs": [],
"source": [
"DIRECTORY = \"/home/agude/Projects/blog_projects/zillow_redfin/data/\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e5f714d3-ca36-4823-b7af-eac742e84185",
"metadata": {},
"outputs": [],
"source": [
"def get_date_from_file(file_name: str, time_string: str = TIME_STRING) -> datetime:\n",
" return datetime.datetime.strptime(\n",
" file_name.split(\"_\")[-1].split(\".\")[0], time_string\n",
" )"
]
},
{
"cell_type": "markdown",
"id": "b0543217-3baf-4536-923b-9400e9379d68",
"metadata": {
"tags": []
},
"source": [
"# Zillow"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9e2dd087-95b1-4edb-baed-0ad065b057b5",
"metadata": {},
"outputs": [],
"source": [
"def get_zillow_price(file_name: str):\n",
"\n",
" with gzip.open(file_name) as fp:\n",
" soup = BeautifulSoup(fp, \"html.parser\")\n",
"\n",
" item = soup.select_one(\"script#hdpApolloPreloadedData\").text\n",
" api_cache_string = json.loads(item)[\"apiCache\"]\n",
" zillow_data_json = json.loads(api_cache_string)\n",
"\n",
" for key in zillow_data_json.keys():\n",
" targets = zillow_data_json[key][\"property\"].get(\"adTargets\")\n",
" if targets is not None:\n",
" price = targets.get(\"zestimate\")\n",
" break\n",
"\n",
" return price"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0753acaa-4316-46e5-b1fb-4c7117f0875e",
"metadata": {},
"outputs": [],
"source": [
"price_data = {\"date\": [], \"amount\": []}\n",
"\n",
"for path in pathlib.Path(DIRECTORY).iterdir():\n",
" if path.is_file() and \"zillow_1381\" in str(path):\n",
" path_str = str(path)\n",
"\n",
" price_data[\"date\"].append(get_date_from_file(path_str))\n",
" price_data[\"amount\"].append(get_zillow_price(path_str))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8de78aee-1b7e-4494-8729-cc818ee9fbe1",
"metadata": {},
"outputs": [],
"source": [
"zillow_df = (\n",
" pd.DataFrame.from_dict(price_data).sort_values(\"date\").reset_index(drop=True)\n",
")\n",
"\n",
"zillow_df[\"amount\"] = zillow_df[\"amount\"].astype(\"float\")\n",
"\n",
"zillow_df[\"website\"] = \"zillow\""
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7e704146-8089-49f6-b15d-8919c9cd0404",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" amount | \n",
" website | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2022-04-11 01:24:32 | \n",
" 1027400.0 | \n",
" zillow | \n",
"
\n",
" \n",
" 1 | \n",
" 2022-04-11 07:03:45 | \n",
" 1035800.0 | \n",
" zillow | \n",
"
\n",
" \n",
" 2 | \n",
" 2022-04-12 07:04:05 | \n",
" 1035800.0 | \n",
" zillow | \n",
"
\n",
" \n",
" 3 | \n",
" 2022-04-13 07:00:49 | \n",
" 1035800.0 | \n",
" zillow | \n",
"
\n",
" \n",
" 4 | \n",
" 2022-04-14 07:02:32 | \n",
" 1035800.0 | \n",
" zillow | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date amount website\n",
"0 2022-04-11 01:24:32 1027400.0 zillow\n",
"1 2022-04-11 07:03:45 1035800.0 zillow\n",
"2 2022-04-12 07:04:05 1035800.0 zillow\n",
"3 2022-04-13 07:00:49 1035800.0 zillow\n",
"4 2022-04-14 07:02:32 1035800.0 zillow"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"zillow_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "4721bc01-859d-43f9-8bda-e88da2f118f4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEDCAYAAAA7jc+ZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVbklEQVR4nO3df5BdZ33f8fcnsuWkQAaDBLjYQvxw6jjIBrMxXeIh63pKbZqpYZQUHCa/gGqaxqX6I53AZAZ3wiR2aWaqpECN6lFUt8WU1Ji65YfjKuzYU6+p1xljbJcfxpBaKlRChhA3FGPx7R/37OhqfXf37uru7t1n36+ZO/ee53nOOc85OvvZo+ecezZVhSSpXT+y3h2QJK0ug16SGmfQS1LjDHpJapxBL0mNM+glqXFjG/RJDiQ5muShIdv//SSPJHk4yUdWu3+StFFkXO+jT/J64Eng5qp65RJtzwc+Bvytqvp2khdU1dG16KckjbuxPaOvqruAJ/rLkrw8yWeS3J/k7iQXdFX/APhgVX27m9eQl6TO2Ab9AvYD/7iqXgP8JvChrvwngJ9I8t+T3JvkynXroSSNmTPWuwPDSvJs4HXAHyeZKz6rez8DOB+YAs4F7kqyq6q+s8bdlKSxs2GCnt7/Pr5TVa8aUHcY+FxV/QD4WpIv0wv++9awf5I0ljbM0E1VfZdeiP8CQHou7qo/Qe9sniTb6A3lPLYO3ZSksTO2QZ/kFmAG+BtJDid5B/A24B1JPg88DFzdNb8DOJ7kEeCzwD+tquPr0W9JGjdje3ulJGk0xvaMXpI0GmN5MXbbtm21c+fO9e6GJG0Y999//7eqavugurEM+p07dzI7O7ve3ZCkDSPJny9U59CNJDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL52GmRm4/vre+6BpaRyM5X300kYwMwNXXAFPPQVbt8K+fbB378npQ4dgcnK9eyl5Ri+t2PR0L9RPnOi933rrqdPT0+vdQ6nHoJdWaGqqd+a+ZUvvfffuU6enpta7h1KPQzfSCk1O9oZnpqd7oT45Cbt2nTotjYOxfEzxxMRE+awbSRpekvuramJQnUM3ktS4JYM+yYEkR5M8tED91UkeTPJAktkkl/XVnejKH0hy+yg7LkkazjBj9AeBDwA3L1B/CLi9qirJRcDHgAu6uu8t8Me8JUlrZMkz+qq6C3hikfon6+RA/7OA8Rv0l6RNbCRj9EnenOSLwCeBt/dV/Wg3nHNvkjctsYw9XdvZY8eOjaJbkiRGFPRVdVtVXQC8CXhfX9VLuqvAvwjsS/LyRZaxv6omqmpi+/aBfw1LkrQCI73rphvmeVmSbd30ke79MWAaePUo1ydJWtppB32SVyRJ9/kS4CzgeJKzk5zVlW8DfgZ45HTXJ0laniXvuklyCzAFbEtyGLgOOBOgqm4EdgO/nOQHwPeAt3R34Pwk8OEkP6T3C+WGqjLoJWmN+c1YSWqA34yVpE3MoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaN1TQJzmQ5GiShxaovzrJg0keSDKb5LK+ul9J8pXu9Suj6rgkaTjDntEfBK5cpP4QcHFVvQp4O3ATQJLnAdcBrwUuBa5LcvZKOytJWr6hgr6q7gKeWKT+yaqqbvJZwNznvwPcWVVPVNW3gTtZ/BeGJGnERjZGn+TNSb4IfJLeWT3Ai4HH+5od7soGzb+nG/aZPXbs2Ki6JUmb3siCvqpuq6oLgDcB71vB/PuraqKqJrZv3z6qbknSpjfyu266YZ6XJdkGHAHO66s+tyuTJK2RkQR9klckSff5EuAs4DhwB/CGJGd3F2Hf0JVJktbIGcM0SnILMAVsS3KY3p00ZwJU1Y3AbuCXk/wA+B7wlu7i7BNJ3gfc1y3qd6pqwYu60mY1MwPT0zA1BZOTz5yWTkdO3iwzPiYmJmp2dna9uyGtiZkZuOIKeOop2LoV9u2DvXtPTh86ZNhraUnur6qJQXV+M1ZaZ9PTvVA/caL3fuutp05PT693D7XRGfTSOpua6p25b9nSe9+9+9Tpqan17qE2uqHG6CWtnsnJ3vBM/5j8rl2O0Wt0HKOXNggv2Goxi43Re0YvbQBesNXpcIxe2gC8YKvTYdBLG4AXbHU6HLqRNgAv2Op0eDFWkhrgF6YkaRMz6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYtGfRJDiQ5muShBerfluTBJF9Ick+Si/vqvt6VP5DEp5RJ0joY5oz+IHDlIvVfA362qnYB7wP2z6u/vKpetdBT1SRJq2vJ59FX1V1Jdi5Sf0/f5L3AuSPolyRpREY9Rv8O4NN90wX8SZL7k+xZbMYke5LMJpk9duzYiLslSZvXyP7CVJLL6QX9ZX3Fl1XVkSQvAO5M8sWqumvQ/FW1n27YZ2JiYvz+GookbVAjOaNPchFwE3B1VR2fK6+qI937UeA24NJRrE+SNLzTDvokO4CPA79UVV/uK39WkufMfQbeAAy8c0eStHqWHLpJcgswBWxLchi4DjgToKpuBN4LPB/4UBKAp7s7bF4I3NaVnQF8pKo+swrbIElaxDB33VyzRP07gXcOKH8MuPiZc0iS1pLfjJWkxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLm8zMDFx/fe990LTas+QfB09yAPg54GhVvXJA/duA3wIC/CXw61X1+a7uSuAPgC3ATVV1wwj7LmmZZmbgiivgqadg61bYtw/27j05fegQTE6udy81asOc0R8Erlyk/mvAz1bVLuB9wH6AJFuADwJXARcC1yS58LR6K21A43QGPT3dC/UTJ3rvt9566vT09Nr3SatvyTP6qroryc5F6u/pm7wXOLf7fCnwaFU9BpDko8DVwCMr7q20wYzbGfTUVG+9c+vfvRvuvvvk9NTU2vVFa2fJoF+mdwCf7j6/GHi8r+4w8NqFZkyyB9gDsGPHjhF3S1ofw5xBr2XQT072frlMT/dCfXISdu06dVrtGVnQJ7mcXtBftpL5q2o/3bDPxMREjapf0noaxzPoyclTA33+tNozkqBPchFwE3BVVR3vio8A5/U1O7crkzYNz6A1Dk476JPsAD4O/FJVfbmv6j7g/CQvpRfwbwV+8XTXJ200nkFrvQ1ze+UtwBSwLclh4DrgTICquhF4L/B84ENJAJ6uqomqejrJtcAd9G6vPFBVD6/KVkiSFpSq8RsOn5iYqNnZ2fXuhiRtGEnur6qJQXV+M1aSGmfQS1LjDHpJapxBL0nLMMwjLUbVZlRG/c1YSWrWMI+0gNG0GeUtuAa9JA1p2IfCjaLNKIPeoRtJq2I1hzRG0WYl88w90mLLlpOPtOifnpoaXZuRqqqxe73mNa8pSRvXPfdU/diPVW3Z0nu/555nln34w+vXZqXLndu23/u9hadH2WY5gNlaIFMdupE0cvOHOEY5pDGKNqcznDLMIy1G1WZUHLqRNHLzhyZGOaQxijZjMZyyhjyjlzRyg57aCcM9yXOt2qx0uRuRz7qRtGwzM88MwEFlWjuLPevGM3pJyzL/XvJB94X7R8bHi0EvaVmGudC61n8iUYsz6CUty/w/jzh3kXJQmcaDQS9pWYa90Krx4cVYqSHzL4h6gXTz8GKstAkM88Atw35z8gtTUiOGfeCWNh+DXmrEqL7ZOczDvrSxOHQjNWLQRdLlfrPTe+TbtGTQJzkA/BxwtKpeOaD+AuCPgEuA366q3++r+zrwl8AJ4OmFLhRIGo3TfVCW98i3aZgz+oPAB4CbF6h/AngX8KYF6i+vqm8tu2eS1pz3yLdpyaCvqruS7Fyk/ihwNMnfHWXHJK0975Fv02qP0RfwJ0kK+HBV7V+oYZI9wB6AHTt2rHK3JC1kmGena2NZ7btuLquqS4CrgN9I8vqFGlbV/qqaqKqJ7du3r3K3JGnzWNWgr6oj3ftR4Dbg0tVcnyTpmVYt6JM8K8lz5j4DbwAeWq31SZIGG+b2yluAKWBbksPAdcCZAFV1Y5IXAbPAjwM/TLIXuBDYBtyWZG49H6mqz6zCNkiSFjHMXTfXLFH/TeDcAVXfBS5eYb8kSSPiIxAkqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktS4JYM+yYEkR5M8tED9BUlmknw/yW/Oq7syyZeSPJrk3aPqtCRpeMOc0R8Erlyk/gngXcDv9xcm2QJ8ELgKuBC4JsmFK+umJGmllgz6qrqLXpgvVH+0qu4DfjCv6lLg0ap6rKqeAj4KXH06nZUkLd9qjtG/GHi8b/pwVzZQkj1JZpPMHjt2bBW7JUmby9hcjK2q/VU1UVUT27dvX+/uSFIzVjPojwDn9U2f25VJktbQagb9fcD5SV6aZCvwVuD2VVyfJGmAM5ZqkOQWYArYluQwcB1wJkBV3ZjkRcAs8OPAD5PsBS6squ8muRa4A9gCHKiqh1dlKyRJC1oy6KvqmiXqv0lvWGZQ3aeAT62sa5KkURibi7GSpNVh0EtS4wx6SWqcQS9JjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDPoxNDMD11/fex80PWwbSYIh/mas1tbMDFxxBTz1FGzdCvv2wd69J6cPHeq1W6rN5OT6bYOk8bLkGX2SA0mOJnlogfok+cMkjyZ5MMklfXUnkjzQvW4fZcdbNT3dC+wTJ3rvt9566vT09HBtJGnOMEM3B4ErF6m/Cji/e+0B/nVf3feq6lXd6++tuJcNWWrIZWqqd1a+ZUvvfffuU6enpoZrI0lzlhy6qaq7kuxcpMnVwM1VVcC9SZ6b5Jyq+saoOtmKYYZlJid779PTvcCenIRdu06dhuHaSBKMZoz+xcDjfdOHu7JvAD+aZBZ4Grihqj4xgvVtWMMMuUxOnnzNmT89qGxQG0mC1b8Y+5KqOpLkZcCfJvlCVX11UMMke+gN/bBjx45V7tbqmJl55ll1f9nckMvcGfzu3XD33SenHXKRtBpGEfRHgPP6ps/tyqiquffHkkwDrwYGBn1V7Qf2A0xMTNQI+rWm5g/LDLo75tAhh1wkrb1RBP3twLVJPgq8FviLqvpGkrOBv6qq7yfZBvwM8P4RrG9dLHW2Pn9YZu7Ol/ll73mPQy6S1taSQZ/kFmAK2JbkMHAdcCZAVd0IfAp4I/Ao8FfAr3Wz/iTw4SQ/pHd3zw1V9cioN2AtDHO2vm/fqcMyc8Mwg8okaS0Nc9fNNUvUF/AbA8rvAXatvGvjY5iz9ePHnzksA4PLJGkt+c3YIcy/iLrQ2fowd8dI0lprLujnj6UvNbY+bBvP1iVtVE0F/aieEzO/zdwXmTxbl7QRNRX0wz4DZrlt5r7IJEkbUVNBP+wXkpbbxrtlJG1kTQX9KJ8T4/i7pFakd3fkeJmYmKjZ2dllzzfooqokbQZJ7q+qiUF1zZzRD/pSk2EvSQ39KcGFvtQkSZtdM0E//49xeAFVknqaGbpZ6EtNkrTZNRP04BeYJGmQZoZuJEmDGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0by2fdJDkG/Pk6dmEb8K11XP+426z7Z7Nu90pt1v21Xtv9kqraPqhiLIN+vSWZXejhQNq8+2ezbvdKbdb9NY7b7dCNJDXOoJekxhn0g+1f7w6Muc26fzbrdq/UZt1fY7fdjtFLUuM8o5ekxhn0ktS4DRH0Sc5L8tkkjyR5OMk/6cqfl+TOJF/p3s/uyt+W5MEkX0hyT5KLF1vOAuu8MsmXkjya5N0D6v8wyZOLzP+7SR6f3ybJryY5luSB7vXOle6XvmWOzf5JcjDJ1/q271ULzP/SJJ/r5v+PSbZ25a9P8mdJnk7y8w1u97XdvJVkW1/5VJK/6Jv/vYtt+0qM2f5K9zPy5ST/M8m7Fpi/teNk2O0e7XFSVWP/As4BLuk+Pwf4MnAh8H7g3V35u4F/3n1+HXB29/kq4HOLLWfA+rYAXwVeBmwFPt/fDpgA/h3w5CJ9/pvd+p6cV/6rwAda3T/AQeDnh+jzx4C3dp9vBH69+7wTuAi4eanlbNDtfnW3jV8HtvWVTwH/dbP8HAG/1v0b/0g3/YJNcpwMu90jPU5W7aBa5QP2PwN/G/gScE7fP8KXBrQ9Gziy2HIGlE8Cd/RNvwd4T98/4mcZEOILrGPVg37M9s/BIX7wQu+bg2cMWt6wy9lo2z1vWSP5Ad7Ax8n/AF6xCY+TJbd7NY6TDTF00y/JTnq/7T4HvLCqvtFVfRN44YBZ3gF8eonlzPdi4PG+6cNdGcC1wO19612J3d1/Df9TkvNOYznPMAb7B+B3u+37l0nOGjD/84HvVNXTC8y/bBtku5cymeTzST6d5KdWMP/QxmB/vRx4S5LZbnvPHzB/i8fJMNu9lGUfJxsq6JM8G7gV2FtV3+2vq96vu5rX/nJ6/1C/Nexyllj/Xwd+AfhXK9qAnv8C7Kyqi4A7gX97Gss6xXrvn857gAuAnwaeN3/Zq6GR7f4zes8quZje8fWJZc4/tDHZX2cB/696jwr4N8CBZc6/bI1s94qOkw0T9EnOpLdz/0NVfbwr/j9JzunqzwGO9rW/CLgJuLqqji+2nO4iy9zFjX8IHAH6z7TP7cpeDbwCeDTJ14G/1l0w2dI3/+8sth1Vdbyqvt9N3gS8ZkU7ZJ4x2T9U1Teq5/vAHwGXdsu4o5v/JuA48NwkZ8yfv/HtXlBVfbeqnuw+fwo4s/8i3KiMy/6id5Y7t/7b6I23N3+cDLndC1rxcbLcsZ71eNEbq7sZ2Dev/F9w6sWU93efdwCPAq8bZjkD1ncG8BjwUk5eTPmpAe1WMkZ/Tt/nNwP3trR/ODnmGWAfcMMCy/hjTr3I9o/m1R9kuLH+DbXdfcv6OqeOvb6Ik19gvBT4X3PTLf4cATcAb+8+TwH3bZLjZKjtHvVxMrKDaDVfwGX0/lv1IPBA93ojvTG8Q8BXgP8GPK9rfxPw7b62s4stZ4F1vpHeVfWvAr+9QJvF7rp5P73f3j/s3v9ZV3498HD3j/9Z4IKW9g/wp8AXgIeAfw88e4H5X0bvwtSj9H6Yz+rKf7rbX/+X3hndw41t97u67Xsa+N/ATV35tX3Hxb3MC5nWfo6A5wKf7PbZDHDxJjlOht3ukR4nPgJBkhq3YcboJUkrY9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxv1/nA+PdFjtcqMAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(zillow_df[\"date\"], zillow_df[\"amount\"], \".\", color=\"blue\")"
]
},
{
"cell_type": "markdown",
"id": "c4f17000-14c9-424e-becf-6a51e8392f5a",
"metadata": {},
"source": [
"# Redfin"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "516c8a93-60fe-4bc1-a30e-32e8de7a2d4a",
"metadata": {},
"outputs": [],
"source": [
"def get_redfin_price(file_name: str) -> float:\n",
"\n",
" date = get_date_from_file(file_name)\n",
"\n",
" with gzip.open(file_name) as fp:\n",
" file_text = str(fp.read())\n",
"\n",
" return float(file_text.split('predictedValue\\\\\\\\\":')[1].split(\"}\")[0])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "b4402eea-b253-482d-9850-b1e02d53b06c",
"metadata": {},
"outputs": [],
"source": [
"redfin_price_data = {\"date\": [], \"amount\": []}\n",
"\n",
"for path in pathlib.Path(DIRECTORY).iterdir():\n",
" if path.is_file() and \"redfin_1381\" in str(path):\n",
" path_str = str(path)\n",
"\n",
" redfin_price_data[\"date\"].append(get_date_from_file(path_str))\n",
" redfin_price_data[\"amount\"].append(get_redfin_price(path_str))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "023543c1-2cd8-4807-944c-ebb41e5f36c3",
"metadata": {},
"outputs": [],
"source": [
"redfin_df = (\n",
" pd.DataFrame.from_dict(redfin_price_data).sort_values(\"date\").reset_index(drop=True)\n",
")\n",
"\n",
"redfin_df[\"amount\"] = redfin_df[\"amount\"].astype(\"float\")\n",
"\n",
"redfin_df[\"website\"] = \"redfin\""
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "8b0db34f-53b2-4d05-92f6-8e2668b35739",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" amount | \n",
" website | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2022-04-11 01:24:36 | \n",
" 1156960.86 | \n",
" redfin | \n",
"
\n",
" \n",
" 1 | \n",
" 2022-04-11 07:03:49 | \n",
" 1156960.86 | \n",
" redfin | \n",
"
\n",
" \n",
" 2 | \n",
" 2022-04-12 07:04:09 | \n",
" 1156960.86 | \n",
" redfin | \n",
"
\n",
" \n",
" 3 | \n",
" 2022-04-13 07:00:53 | \n",
" 1156960.86 | \n",
" redfin | \n",
"
\n",
" \n",
" 4 | \n",
" 2022-04-14 07:02:36 | \n",
" 1188334.45 | \n",
" redfin | \n",
"
\n",
" \n",
" 5 | \n",
" 2022-04-15 07:01:36 | \n",
" 1188334.45 | \n",
" redfin | \n",
"
\n",
" \n",
" 6 | \n",
" 2022-04-16 07:01:43 | \n",
" 1188334.45 | \n",
" redfin | \n",
"
\n",
" \n",
" 7 | \n",
" 2022-04-17 07:00:37 | \n",
" 1188334.45 | \n",
" redfin | \n",
"
\n",
" \n",
" 8 | \n",
" 2022-04-18 07:02:41 | \n",
" 1188334.45 | \n",
" redfin | \n",
"
\n",
" \n",
" 9 | \n",
" 2022-04-19 07:02:34 | \n",
" 1188334.45 | \n",
" redfin | \n",
"
\n",
" \n",
" 10 | \n",
" 2022-04-20 07:02:47 | \n",
" 1188334.45 | \n",
" redfin | \n",
"
\n",
" \n",
" 11 | \n",
" 2022-04-21 07:03:00 | \n",
" 1188334.45 | \n",
" redfin | \n",
"
\n",
" \n",
" 12 | \n",
" 2022-04-22 07:00:51 | \n",
" 1169268.70 | \n",
" redfin | \n",
"
\n",
" \n",
" 13 | \n",
" 2022-04-23 07:03:32 | \n",
" 1169268.70 | \n",
" redfin | \n",
"
\n",
" \n",
" 14 | \n",
" 2022-04-24 07:05:07 | \n",
" 1169268.70 | \n",
" redfin | \n",
"
\n",
" \n",
" 15 | \n",
" 2022-04-25 07:00:59 | \n",
" 1169268.70 | \n",
" redfin | \n",
"
\n",
" \n",
" 16 | \n",
" 2022-04-26 07:02:12 | \n",
" 1234551.86 | \n",
" redfin | \n",
"
\n",
" \n",
" 17 | \n",
" 2022-04-27 07:03:52 | \n",
" 1253357.70 | \n",
" redfin | \n",
"
\n",
" \n",
" 18 | \n",
" 2022-04-28 07:02:34 | \n",
" 1259087.09 | \n",
" redfin | \n",
"
\n",
" \n",
" 19 | \n",
" 2022-04-29 07:04:12 | \n",
" 1256868.63 | \n",
" redfin | \n",
"
\n",
" \n",
" 20 | \n",
" 2022-04-30 07:02:36 | \n",
" 1256868.63 | \n",
" redfin | \n",
"
\n",
" \n",
" 21 | \n",
" 2022-05-01 07:02:05 | \n",
" 1262774.23 | \n",
" redfin | \n",
"
\n",
" \n",
" 22 | \n",
" 2022-05-02 07:03:49 | \n",
" 1262993.79 | \n",
" redfin | \n",
"
\n",
" \n",
" 23 | \n",
" 2022-05-03 07:22:24 | \n",
" 1262296.79 | \n",
" redfin | \n",
"
\n",
" \n",
" 24 | \n",
" 2022-05-04 07:02:49 | \n",
" 1262292.24 | \n",
" redfin | \n",
"
\n",
" \n",
" 25 | \n",
" 2022-05-05 07:01:20 | \n",
" 1262936.89 | \n",
" redfin | \n",
"
\n",
" \n",
" 26 | \n",
" 2022-05-06 07:02:21 | \n",
" 1262936.89 | \n",
" redfin | \n",
"
\n",
" \n",
" 27 | \n",
" 2022-05-07 07:04:21 | \n",
" 1265774.12 | \n",
" redfin | \n",
"
\n",
" \n",
" 28 | \n",
" 2022-05-08 07:04:54 | \n",
" 1264471.54 | \n",
" redfin | \n",
"
\n",
" \n",
" 29 | \n",
" 2022-05-09 07:02:51 | \n",
" 1263528.52 | \n",
" redfin | \n",
"
\n",
" \n",
" 30 | \n",
" 2022-05-10 07:04:54 | \n",
" 1263528.52 | \n",
" redfin | \n",
"
\n",
" \n",
" 31 | \n",
" 2022-05-11 07:03:39 | \n",
" 1263528.52 | \n",
" redfin | \n",
"
\n",
" \n",
" 32 | \n",
" 2022-05-12 07:01:17 | \n",
" 1265244.53 | \n",
" redfin | \n",
"
\n",
" \n",
" 33 | \n",
" 2022-05-13 07:01:40 | \n",
" 1266800.85 | \n",
" redfin | \n",
"
\n",
" \n",
" 34 | \n",
" 2022-05-14 07:03:30 | \n",
" 1265878.84 | \n",
" redfin | \n",
"
\n",
" \n",
" 35 | \n",
" 2022-05-15 07:01:14 | \n",
" 1265878.84 | \n",
" redfin | \n",
"
\n",
" \n",
" 36 | \n",
" 2022-05-16 07:05:25 | \n",
" 1265878.84 | \n",
" redfin | \n",
"
\n",
" \n",
" 37 | \n",
" 2022-05-17 07:04:54 | \n",
" 1265878.84 | \n",
" redfin | \n",
"
\n",
" \n",
" 38 | \n",
" 2022-05-18 07:02:24 | \n",
" 1265878.84 | \n",
" redfin | \n",
"
\n",
" \n",
" 39 | \n",
" 2022-05-19 07:16:48 | \n",
" 1263741.10 | \n",
" redfin | \n",
"
\n",
" \n",
" 40 | \n",
" 2022-05-20 07:02:36 | \n",
" 1263713.04 | \n",
" redfin | \n",
"
\n",
" \n",
" 41 | \n",
" 2022-05-21 07:02:08 | \n",
" 1263713.04 | \n",
" redfin | \n",
"
\n",
" \n",
" 42 | \n",
" 2022-05-22 07:02:09 | \n",
" 1263297.58 | \n",
" redfin | \n",
"
\n",
" \n",
" 43 | \n",
" 2022-05-23 07:01:05 | \n",
" 1263615.05 | \n",
" redfin | \n",
"
\n",
" \n",
" 44 | \n",
" 2022-05-24 07:01:02 | \n",
" 1263455.00 | \n",
" redfin | \n",
"
\n",
" \n",
" 45 | \n",
" 2022-05-25 07:04:25 | \n",
" 1263455.00 | \n",
" redfin | \n",
"
\n",
" \n",
" 46 | \n",
" 2022-05-26 07:04:37 | \n",
" 1262117.12 | \n",
" redfin | \n",
"
\n",
" \n",
" 47 | \n",
" 2022-05-27 07:05:13 | \n",
" 1262117.12 | \n",
" redfin | \n",
"
\n",
" \n",
" 48 | \n",
" 2022-05-28 07:01:26 | \n",
" 1262117.12 | \n",
" redfin | \n",
"
\n",
" \n",
" 49 | \n",
" 2022-05-29 07:02:07 | \n",
" 1262117.12 | \n",
" redfin | \n",
"
\n",
" \n",
" 50 | \n",
" 2022-05-30 07:05:11 | \n",
" 1262117.12 | \n",
" redfin | \n",
"
\n",
" \n",
" 51 | \n",
" 2022-05-31 07:02:55 | \n",
" 1262117.12 | \n",
" redfin | \n",
"
\n",
" \n",
" 52 | \n",
" 2022-06-01 07:04:31 | \n",
" 1262117.12 | \n",
" redfin | \n",
"
\n",
" \n",
" 53 | \n",
" 2022-06-02 07:05:23 | \n",
" 1259206.48 | \n",
" redfin | \n",
"
\n",
" \n",
" 54 | \n",
" 2022-06-03 07:01:24 | \n",
" 1259206.48 | \n",
" redfin | \n",
"
\n",
" \n",
" 55 | \n",
" 2022-06-04 07:04:13 | \n",
" 1259206.48 | \n",
" redfin | \n",
"
\n",
" \n",
" 56 | \n",
" 2022-06-05 07:01:32 | \n",
" 1252787.11 | \n",
" redfin | \n",
"
\n",
" \n",
" 57 | \n",
" 2022-06-06 07:01:20 | \n",
" 1252787.11 | \n",
" redfin | \n",
"
\n",
" \n",
" 58 | \n",
" 2022-06-08 07:01:59 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 59 | \n",
" 2022-06-09 07:05:10 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 60 | \n",
" 2022-06-10 07:01:00 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 61 | \n",
" 2022-06-11 07:01:19 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 62 | \n",
" 2022-06-12 07:03:14 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 63 | \n",
" 2022-06-13 07:02:41 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 64 | \n",
" 2022-06-14 07:03:38 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 65 | \n",
" 2022-06-15 07:04:07 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 66 | \n",
" 2022-06-16 07:02:39 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 67 | \n",
" 2022-06-17 07:04:27 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
" 68 | \n",
" 2022-06-18 07:04:15 | \n",
" 1200096.93 | \n",
" redfin | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date amount website\n",
"0 2022-04-11 01:24:36 1156960.86 redfin\n",
"1 2022-04-11 07:03:49 1156960.86 redfin\n",
"2 2022-04-12 07:04:09 1156960.86 redfin\n",
"3 2022-04-13 07:00:53 1156960.86 redfin\n",
"4 2022-04-14 07:02:36 1188334.45 redfin\n",
"5 2022-04-15 07:01:36 1188334.45 redfin\n",
"6 2022-04-16 07:01:43 1188334.45 redfin\n",
"7 2022-04-17 07:00:37 1188334.45 redfin\n",
"8 2022-04-18 07:02:41 1188334.45 redfin\n",
"9 2022-04-19 07:02:34 1188334.45 redfin\n",
"10 2022-04-20 07:02:47 1188334.45 redfin\n",
"11 2022-04-21 07:03:00 1188334.45 redfin\n",
"12 2022-04-22 07:00:51 1169268.70 redfin\n",
"13 2022-04-23 07:03:32 1169268.70 redfin\n",
"14 2022-04-24 07:05:07 1169268.70 redfin\n",
"15 2022-04-25 07:00:59 1169268.70 redfin\n",
"16 2022-04-26 07:02:12 1234551.86 redfin\n",
"17 2022-04-27 07:03:52 1253357.70 redfin\n",
"18 2022-04-28 07:02:34 1259087.09 redfin\n",
"19 2022-04-29 07:04:12 1256868.63 redfin\n",
"20 2022-04-30 07:02:36 1256868.63 redfin\n",
"21 2022-05-01 07:02:05 1262774.23 redfin\n",
"22 2022-05-02 07:03:49 1262993.79 redfin\n",
"23 2022-05-03 07:22:24 1262296.79 redfin\n",
"24 2022-05-04 07:02:49 1262292.24 redfin\n",
"25 2022-05-05 07:01:20 1262936.89 redfin\n",
"26 2022-05-06 07:02:21 1262936.89 redfin\n",
"27 2022-05-07 07:04:21 1265774.12 redfin\n",
"28 2022-05-08 07:04:54 1264471.54 redfin\n",
"29 2022-05-09 07:02:51 1263528.52 redfin\n",
"30 2022-05-10 07:04:54 1263528.52 redfin\n",
"31 2022-05-11 07:03:39 1263528.52 redfin\n",
"32 2022-05-12 07:01:17 1265244.53 redfin\n",
"33 2022-05-13 07:01:40 1266800.85 redfin\n",
"34 2022-05-14 07:03:30 1265878.84 redfin\n",
"35 2022-05-15 07:01:14 1265878.84 redfin\n",
"36 2022-05-16 07:05:25 1265878.84 redfin\n",
"37 2022-05-17 07:04:54 1265878.84 redfin\n",
"38 2022-05-18 07:02:24 1265878.84 redfin\n",
"39 2022-05-19 07:16:48 1263741.10 redfin\n",
"40 2022-05-20 07:02:36 1263713.04 redfin\n",
"41 2022-05-21 07:02:08 1263713.04 redfin\n",
"42 2022-05-22 07:02:09 1263297.58 redfin\n",
"43 2022-05-23 07:01:05 1263615.05 redfin\n",
"44 2022-05-24 07:01:02 1263455.00 redfin\n",
"45 2022-05-25 07:04:25 1263455.00 redfin\n",
"46 2022-05-26 07:04:37 1262117.12 redfin\n",
"47 2022-05-27 07:05:13 1262117.12 redfin\n",
"48 2022-05-28 07:01:26 1262117.12 redfin\n",
"49 2022-05-29 07:02:07 1262117.12 redfin\n",
"50 2022-05-30 07:05:11 1262117.12 redfin\n",
"51 2022-05-31 07:02:55 1262117.12 redfin\n",
"52 2022-06-01 07:04:31 1262117.12 redfin\n",
"53 2022-06-02 07:05:23 1259206.48 redfin\n",
"54 2022-06-03 07:01:24 1259206.48 redfin\n",
"55 2022-06-04 07:04:13 1259206.48 redfin\n",
"56 2022-06-05 07:01:32 1252787.11 redfin\n",
"57 2022-06-06 07:01:20 1252787.11 redfin\n",
"58 2022-06-08 07:01:59 1200096.93 redfin\n",
"59 2022-06-09 07:05:10 1200096.93 redfin\n",
"60 2022-06-10 07:01:00 1200096.93 redfin\n",
"61 2022-06-11 07:01:19 1200096.93 redfin\n",
"62 2022-06-12 07:03:14 1200096.93 redfin\n",
"63 2022-06-13 07:02:41 1200096.93 redfin\n",
"64 2022-06-14 07:03:38 1200096.93 redfin\n",
"65 2022-06-15 07:04:07 1200096.93 redfin\n",
"66 2022-06-16 07:02:39 1200096.93 redfin\n",
"67 2022-06-17 07:04:27 1200096.93 redfin\n",
"68 2022-06-18 07:04:15 1200096.93 redfin"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"redfin_df"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "db93c5ea-e25a-48d4-b8b2-1bd7c150ecf1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(redfin_df[\"date\"], redfin_df[\"amount\"], \".\", color=\"red\")\n",
"plt.plot(zillow_df[\"date\"], zillow_df[\"amount\"], \".\", color=\"blue\")"
]
},
{
"cell_type": "markdown",
"id": "afc6164e-8ed9-4e6d-8f8f-60544fc556bc",
"metadata": {},
"source": [
"# realtor.com"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "0f526dad-81b7-4ada-929b-3ec66a0b6686",
"metadata": {},
"outputs": [],
"source": [
"def get_realtorcom_price(file_name: str):\n",
"\n",
" with gzip.open(file_name) as fp:\n",
" soup = BeautifulSoup(fp, \"html.parser\")\n",
"\n",
" item = soup.select_one(\"script#__NEXT_DATA__\").text\n",
" api_cache_string = json.loads(item)\n",
"\n",
" try:\n",
" estimates = api_cache_string[\"props\"][\"pageProps\"][\"initialPropertyDetails\"][\n",
" \"home\"\n",
" ][\"estimates\"][\"current_values\"][0]\n",
"\n",
" except KeyError:\n",
" return None\n",
"\n",
" high, mid, low, source, date, source = (\n",
" estimates[\"estimate_high\"],\n",
" estimates[\"estimate\"],\n",
" estimates[\"estimate_low\"],\n",
" estimates[\"estimate_low\"],\n",
" estimates[\"date\"],\n",
" estimates[\"source\"][\"type\"],\n",
" )\n",
"\n",
" return high, mid, low, source, date, source"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f563c9a7-7fa5-40fe-8014-85d9267d3e25",
"metadata": {},
"outputs": [],
"source": [
"realtorcom_price_data = {\n",
" \"date\": [],\n",
" \"prediction_date\": [],\n",
" \"amount\": [],\n",
" \"amount_high\": [],\n",
" \"amount_low\": [],\n",
" \"source\": [],\n",
"}\n",
"\n",
"for path in pathlib.Path(DIRECTORY).iterdir():\n",
" if path.is_file() and \"realtorcom_1381\" in str(path):\n",
" path_str = str(path)\n",
"\n",
" try:\n",
" high, mid, low, source, date, source = get_realtorcom_price(path_str)\n",
"\n",
" except TypeError: # Returned none\n",
" continue\n",
"\n",
" model_date = datetime.datetime.strptime(date, \"%Y-%m-%d\")\n",
" read_date = get_date_from_file(path_str)\n",
"\n",
" realtorcom_price_data[\"date\"].append(read_date)\n",
" realtorcom_price_data[\"prediction_date\"].append(model_date)\n",
" realtorcom_price_data[\"amount\"].append(mid)\n",
" realtorcom_price_data[\"amount_high\"].append(high)\n",
" realtorcom_price_data[\"amount_low\"].append(low)\n",
" realtorcom_price_data[\"source\"].append(source)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "32cd5885-c58f-45b5-8015-2dd7d1f2692e",
"metadata": {},
"outputs": [],
"source": [
"realtor_df = (pd.DataFrame.from_dict(realtorcom_price_data)).drop_duplicates()\n",
"\n",
"start_date = datetime.date(2022, 4, 22)\n",
"end_date = datetime.date(2022, 5, 27)\n",
"\n",
"dates = []\n",
"for n in range(int((end_date - start_date).days)):\n",
" n_date = start_date + datetime.timedelta(n)\n",
" dates.append([n_date, n_date, None, None, None, None])\n",
"\n",
"append_df = pd.DataFrame(\n",
" columns=realtor_df.columns,\n",
" data=dates,\n",
")\n",
"\n",
"realtor_df = pd.concat([realtor_df, append_df])\n",
"\n",
"# Cast Columns as nullable int\n",
"cast_cols = [\"amount\", \"amount_high\", \"amount_low\"]\n",
"realtor_df[cast_cols] = realtor_df[cast_cols].astype(\"float\")\n",
"\n",
"date_cols = [\"date\", \"prediction_date\"]\n",
"realtor_df[date_cols] = realtor_df[date_cols].astype(\"datetime64[ns]\")\n",
"\n",
"realtor_df[\"error_low\"] = realtor_df[\"amount\"] - realtor_df[\"amount_low\"]\n",
"realtor_df[\"error_high\"] = realtor_df[\"amount_high\"] - realtor_df[\"amount\"]\n",
"\n",
"\n",
"realtor_df = realtor_df.sort_values(\"date\").reset_index(drop=True)\n",
"\n",
"realtor_df[\"website\"] = \"realtor.com\""
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "babff326-eeb7-4fd9-bf4e-485f59fb31d8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" prediction_date | \n",
" amount | \n",
" amount_high | \n",
" amount_low | \n",
" source | \n",
" error_low | \n",
" error_high | \n",
" website | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2022-04-11 01:24:40 | \n",
" 2022-03-31 | \n",
" 1104300.0 | \n",
" 1268500.0 | \n",
" 940200.0 | \n",
" corelogic | \n",
" 164100.0 | \n",
" 164200.0 | \n",
" realtor.com | \n",
"
\n",
" \n",
" 1 | \n",
" 2022-04-11 07:03:53 | \n",
" 2022-03-31 | \n",
" 1104300.0 | \n",
" 1268500.0 | \n",
" 940200.0 | \n",
" corelogic | \n",
" 164100.0 | \n",
" 164200.0 | \n",
" realtor.com | \n",
"
\n",
" \n",
" 2 | \n",
" 2022-04-12 07:04:14 | \n",
" 2022-03-31 | \n",
" 1104300.0 | \n",
" 1268500.0 | \n",
" 940200.0 | \n",
" corelogic | \n",
" 164100.0 | \n",
" 164200.0 | \n",
" realtor.com | \n",
"
\n",
" \n",
" 3 | \n",
" 2022-04-13 07:00:57 | \n",
" 2022-03-31 | \n",
" 1104300.0 | \n",
" 1268500.0 | \n",
" 940200.0 | \n",
" corelogic | \n",
" 164100.0 | \n",
" 164200.0 | \n",
" realtor.com | \n",
"
\n",
" \n",
" 4 | \n",
" 2022-04-14 07:02:41 | \n",
" 2022-03-31 | \n",
" 1104300.0 | \n",
" 1268500.0 | \n",
" 940200.0 | \n",
" corelogic | \n",
" 164100.0 | \n",
" 164200.0 | \n",
" realtor.com | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date prediction_date amount amount_high amount_low \\\n",
"0 2022-04-11 01:24:40 2022-03-31 1104300.0 1268500.0 940200.0 \n",
"1 2022-04-11 07:03:53 2022-03-31 1104300.0 1268500.0 940200.0 \n",
"2 2022-04-12 07:04:14 2022-03-31 1104300.0 1268500.0 940200.0 \n",
"3 2022-04-13 07:00:57 2022-03-31 1104300.0 1268500.0 940200.0 \n",
"4 2022-04-14 07:02:41 2022-03-31 1104300.0 1268500.0 940200.0 \n",
"\n",
" source error_low error_high website \n",
"0 corelogic 164100.0 164200.0 realtor.com \n",
"1 corelogic 164100.0 164200.0 realtor.com \n",
"2 corelogic 164100.0 164200.0 realtor.com \n",
"3 corelogic 164100.0 164200.0 realtor.com \n",
"4 corelogic 164100.0 164200.0 realtor.com "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"realtor_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "1904969d-79a8-40fd-8c78-8cb547ae3ce6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(redfin_df[\"date\"], redfin_df[\"amount\"], \".\", color=\"red\")\n",
"plt.plot(zillow_df[\"date\"], zillow_df[\"amount\"], \".\", color=\"blue\")\n",
"plt.plot(realtor_df[\"date\"], realtor_df[\"amount\"], \".\", color=\"purple\")"
]
},
{
"cell_type": "markdown",
"id": "1d89832e-ec8a-478c-8b1d-d257b1c5041a",
"metadata": {},
"source": [
"# Combine"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "563c69be-a10f-499a-b31b-d7af998cbc9c",
"metadata": {},
"outputs": [],
"source": [
"combined_df = pd.concat(\n",
" [\n",
" realtor_df,\n",
" redfin_df,\n",
" zillow_df,\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "c6984926-805e-4ecd-9b3a-28ddcdb60f25",
"metadata": {},
"outputs": [],
"source": [
"combined_df.to_json(\"./home_price_estimate_20220701.json\", orient=\"table\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}