-
Notifications
You must be signed in to change notification settings - Fork 42
/
INPUT_SCHEMA.json
167 lines (167 loc) · 6.64 KB
/
INPUT_SCHEMA.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
{
"title": "Zillow.com scraper input schema",
"type": "object",
"schemaVersion": 1,
"properties": {
"search": {
"title": "Search",
"type": "string",
"editor": "textarea",
"prefill": "Los Angeles",
"description": "Query to search on Zillow.com"
},
"type": {
"title": "Type",
"type": "string",
"description": "Property for sale/rent",
"default": "all",
"prefill": "all",
"enum": [
"all",
"sale",
"fsbo",
"rent",
"sold"
],
"enumTitles": [
"All",
"For sale",
"For sale by owner",
"For rent",
"Sold"
]
},
"maxItems": {
"title": "Max items",
"type": "integer",
"minimum": 0,
"prefill": 200,
"default": 200,
"description": "Limit of home detail pages to be scraped"
},
"simple": {
"title": "Simple results",
"type": "boolean",
"description": "Return only a subset of possible result attributes.",
"default": true
},
"startUrls": {
"title": "Start URLs",
"type": "array",
"editor": "requestListSources",
"description": "List of URLs that will be crawled.",
"sectionCaption": "Start with URL or ZPID",
"sectionDescription": "You can use a direct URL or ZPID instead of simple search."
},
"zpids": {
"title": "Zillow home IDs",
"type": "array",
"editor": "json",
"description": "List of ZPIDs that will be scraped."
},
"zipcodes": {
"title": "US Zipcodes",
"type": "array",
"editor": "json",
"description": "List of zipcodes that will be scraped. Notice that this can take a lot of time to complete!"
},
"maxLevel": {
"title": "Max zoom level",
"type": "integer",
"minimum": 0,
"prefill": 1,
"default": 1,
"description": "Maximum map splitting level. The bigger number is set, the longer the scrape will take",
"sectionCaption": "Other filters (zoom level, dates)"
},
"minDate": {
"title": "Minimum date",
"type": "string",
"editor": "textfield",
"pattern": "^(\\d{4}-\\d{2}-\\d{2}|(\\d+)\\s?(month|week|day|year|hour|minute)s?|(to|yester)day)$",
"description": "Oldest date allowed in results. Can use literal dates, such as YYYY-MM-DD or relative ones, like 3 hours, 1 month, 2 days, today or yesterday"
},
"maxDate": {
"title": "Maximum date",
"type": "string",
"editor": "textfield",
"pattern": "^(\\d{4}-\\d{2}-\\d{2}|(\\d+)\\s?(month|week|day|year|hour|minute)s?|(to|yester)day)$",
"description": "Newest date allowed in results. Can use literal dates, such as YYYY-MM-DD or relative ones, like 3 hours, 1 month, 2 days, today or yesterday"
},
"includeRelaxedResults": {
"title": "Include more results",
"description": "Setting this option might give results close but outside to the original city / county / state.",
"default": true,
"type": "boolean",
"editor": "checkbox"
},
"proxyConfiguration": {
"title": "Proxy configuration",
"type": "object",
"editor": "proxy",
"description": "The best option is usually Automatic proxy. But you can also use your own proxies or no proxy",
"default": {
"useApifyProxy": true
},
"prefill": {
"useApifyProxy": true
},
"sectionCaption": "Proxy and browser configuration"
},
"maxRetries": {
"title": "Max retries",
"description": "How many retries until the scraper should give up",
"type": "integer",
"editor": "number",
"prefill": 6,
"default": 6
},
"handlePageTimeoutSecs": {
"title": "Timeout seconds",
"description": "Configure the handlePageTimeoutSecs setting",
"default": 3600,
"prefill": 3600,
"type": "integer",
"editor": "number"
},
"debugLog": {
"title": "Debug log",
"description": "Enable debug log",
"default": false,
"type": "boolean",
"editor": "checkbox"
},
"extendOutputFunction": {
"title": "Extend output function",
"type": "string",
"editor": "javascript",
"description": "Extend the output item to contain more fields. The raw data is present in the 'data' variable.",
"prefill": "async ({ data, item, customData, Apify }) => {\n return item;\n}",
"sectionCaption": "Extend scraper functionality",
"sectionDescription": "You can change the output of the items for your dataset here, or add additional behavior on the scraper."
},
"extendScraperFunction": {
"title": "Extend Scraper Function",
"description": "Advanced function that allows you to extend the default scraper functionality, allowing you to manually perform actions on the page",
"type": "string",
"default": "",
"prefill": "async ({ label, page, request, customData, Apify }) => {\n if (label === 'SETUP') {\n // before crawler.run()\n } else if (label === 'GOTO') {\n // inside handleGotoFunction\n } else if (label === 'HANDLE') {\n // inside handlePageFunction\n } else if (label === 'FINISH') {\n // after crawler.run()\n }\n}",
"editor": "javascript"
},
"customData": {
"title": "Custom data",
"description": "Any data that you want to have available inside the Extend Output/Scraper Function",
"default": {},
"prefill": {},
"type": "object",
"editor": "json"
},
"rawOutput": {
"title": "Raw output",
"description": "Output the results without any filtering or formatting. It's recommended to leave this option unchecked.",
"default": false,
"type": "boolean",
"editor": "checkbox"
}
}
}