Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
DOMAIN_OVERBOUNDS |
2 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PREEMPTIVE_DEFAULT |
"false" |
public static final String |
PREEMPTIVE_PROPERTY |
"httpclient.authentication.preemptive" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
PATH_DELIM |
"/" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
VALIDITY_STAMP_FILENAME |
"valid" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
LOGS_DIR_NAME |
"logs subdirectory" |
public static final String |
REPORTS_DIR_NAME |
"reports subdirectory" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
F_ADD |
"F+ " |
public static final String |
F_DISREGARD |
"Fd " |
public static final String |
F_EMIT |
"Fe " |
public static final String |
F_FAILURE |
"Ff " |
public static final String |
F_INCLUDE |
"Fi " |
public static final String |
F_REENQUEUED |
"Fr " |
public static final String |
F_SUCCESS |
"Fs " |
public static final String |
LOGNAME_RECOVER |
"frontier.recover.gz" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final int |
SEEDS_REDIRECT_NEW_SEEDS_MAX_HOPS |
5 |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final int |
GROUP |
2 |
protected static final int |
HOST |
1 |
protected static final int |
RESPONSE_KB |
3 |
protected static final int |
RESPONSES |
2 |
protected static final int |
SERVER |
0 |
protected static final int |
SUCCESS_KB |
1 |
protected static final int |
SUCCESSES |
0 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final char |
MANIFEST_CONFIG_FILE |
67 |
public static final char |
MANIFEST_LOG_FILE |
76 |
public static final char |
MANIFEST_REPORT_FILE |
82 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
SERIALIZED_CLASS_SUFFIX |
".serialized" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
DUPLICATE |
"dupByHash" |
public static final String |
DUPLICATECOUNT |
"dupByHashCount" |
public static final String |
NOTMODIFIED |
"notModified" |
public static final String |
NOTMODIFIEDCOUNT |
"notModifiedCount" |
public static final String |
NOVEL |
"novel" |
public static final String |
NOVELCOUNT |
"novelCount" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_MAX_PENDING |
10000 |
public static final long |
FLUSH_DELAY_FACTOR |
100L |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
LOG_ERROR |
"E " |
public static final String |
LOG_TIMESTAMP |
"T " |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_ANNOTATIONS |
"annotations" |
public static final String |
A_CONTENT_TYPE |
"content-type" |
public static final String |
A_CREDENTIALS_KEY |
"credentials" |
public static final String |
A_DELAY_FACTOR |
"delay-factor" |
public static final String |
A_DISTANCE_FROM_SEED |
"distance-from-seed" |
public static final String |
A_DNS_FETCH_TIME |
"dns-fetch-time" |
public static final String |
A_DNS_SERVER_IP_LABEL |
"dns-server-ip" |
public static final String |
A_FETCH_BEGAN_TIME |
"fetch-began-time" |
public static final String |
A_FETCH_COMPLETED_TIME |
"fetch-completed-time" |
public static final String |
A_FORCE_RETIRE |
"force-retire" |
public static final String |
A_FTP_CONTROL_CONVERSATION |
"ftp-control-conversation" |
public static final String |
A_FTP_FETCH_STATUS |
"ftp-fetch-status" |
public static final String |
A_HERITABLE_KEYS |
"heritable" |
public static final String |
A_HTML_BASE |
"html-base-href" |
public static final String |
A_HTTP_AUTH_CHALLENGES |
"http-auth-challenges" |
public static final String |
A_HTTP_PROXY_HOST |
"http-proxy-host" |
public static final String |
A_HTTP_PROXY_PORT |
"http-proxy-port" |
public static final String |
A_MINIMUM_DELAY |
"minimum-delay" |
public static final String |
A_MIRROR_PATH |
"mirror-path" |
public static final String |
A_NONFATAL_ERRORS |
"nonfatal-errors" |
public static final String |
A_PRECALC_PRECEDENCE |
"precalc-precedence" |
public static final String |
A_PREREQUISITE_URI |
"prerequisite-uri" |
public static final String |
A_RETRY_DELAY |
"retry-delay" |
public static final String |
A_RRECORD_SET_LABEL |
"dns-records" |
public static final String |
A_RUNTIME_EXCEPTION |
"runtime-exception" |
public static final String |
A_SOURCE_TAG |
"source" |
public static final String |
A_SUBMIT_DATA |
"submit-data" |
public static final String |
A_WARC_RESPONSE_HEADERS |
"warc-response-headers" |
public static final String |
A_WHOIS_SERVER_IP |
"whois-server-ip" |
public static final String |
HEADER_TRUNC |
"headerTrunc" |
public static final String |
LENGTH_TRUNC |
"lenTrunc" |
public static final String |
TIMER_TRUNC |
"timeTrunc" |
public static final String |
TRUNC_SUFFIX |
"Trunc" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
UNCALCULATED |
-1 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
HIGH |
1 |
public static final int |
HIGHEST |
0 |
public static final int |
MEDIUM |
2 |
public static final int |
NORMAL |
3 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
HEADER_PREDICTS_MISSING |
-1 |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
CSS_BACKSLASH_ESCAPE |
"\\\\([,\'\"\\(\\)\\s])" |
protected static final String |
CSS_URI_EXTRACTOR |
"(?i)(?:@import (?:url[(]|)|url[(])\\s*([\\\"\']?)([^\\\"\'].{0,2083}?)\\1\\s*[);]" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_FORM_OFFSETS |
"form-offsets" |
public static final String |
A_META_ROBOTS |
"meta-robots" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
JAVASCRIPT_STRING_EXTRACTOR |
"(\\\\{0,8}+[\'\"])([^\\s\'\"]{1,2083})(?:\\1)" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
JSSTRING |
"javascript:" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
ABS_HTTP_URI_PATTERN |
"^https?://[^\\s<>]*$" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
HEADER_TRUNC |
"headerTrunc" |
public static final String |
LENGTH_TRUNC |
"lenTrunc" |
public static final String |
TIMER_TRUNC |
"timeTrunc" |
public static final String |
TRUNC_SUFFIX |
"Trunc" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
HTTP_BIND_ADDRESS |
"httpBindAddress" |
public static final String |
HTTP_SCHEME |
"http" |
public static final String |
HTTPS_SCHEME |
"https" |
public static final String |
RANGE |
"Range" |
public static final String |
RANGE_PREFIX |
"bytes=0-" |
public static final String |
REFERER |
"Referer" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
S_BLOCKED_BY_CUSTOM_PROCESSOR |
-5002 |
public static final int |
S_BLOCKED_BY_QUOTA |
-5003 |
public static final int |
S_BLOCKED_BY_RUNTIME_LIMIT |
-5004 |
public static final int |
S_BLOCKED_BY_USER |
-5001 |
public static final int |
S_CONNECT_FAILED |
-2 |
public static final int |
S_CONNECT_LOST |
-3 |
public static final int |
S_DEEMED_CHAFF |
-4000 |
public static final int |
S_DEEMED_NOT_FOUND |
-404 |
public static final int |
S_DEFERRED |
-50 |
public static final int |
S_DELETED_BY_USER |
-6000 |
public static final int |
S_DNS_SUCCESS |
1 |
public static final int |
S_DOMAIN_PREREQUISITE_FAILURE |
-6 |
public static final int |
S_DOMAIN_UNRESOLVABLE |
-1 |
public static final int |
S_GETBYNAME_SUCCESS |
1001 |
public static final int |
S_NOT_FOUND |
404 |
public static final int |
S_OTHER_PREREQUISITE_FAILURE |
-62 |
public static final int |
S_OUT_OF_SCOPE |
-5000 |
public static final int |
S_PREREQUISITE_UNSCHEDULABLE_FAILURE |
-63 |
public static final int |
S_PROCESSING_THREAD_KILLED |
-7000 |
public static final int |
S_ROBOTS_PRECLUDED |
-9998 |
public static final int |
S_ROBOTS_PREREQUISITE_FAILURE |
-61 |
public static final int |
S_RUNTIME_EXCEPTION |
-5 |
public static final int |
S_SERIOUS_ERROR |
-3000 |
public static final int |
S_TIMEOUT |
-4 |
public static final int |
S_TOO_MANY_EMBED_HOPS |
-4002 |
public static final int |
S_TOO_MANY_LINK_HOPS |
-4001 |
public static final int |
S_TOO_MANY_RETRIES |
-8 |
public static final int |
S_UNATTEMPTED |
0 |
public static final int |
S_UNFETCHABLE_URI |
-7 |
public static final int |
S_UNQUEUEABLE |
-60 |
public static final int |
S_WHOIS_GENERIC_FINISHED |
2002 |
public static final int |
S_WHOIS_SUCCESS |
2001 |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
DEFAULT_IP_WHOIS_SERVER |
"whois.arin.net" |
public static final String |
IP_ADDRESS_REGEX |
"\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}" |
protected static final String |
ULTRA_SUFFIX_WHOIS_SERVER |
"whois.iana.org" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_HTML_FORM_OBJECTS |
"html-form-objects" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final long |
IP_NEVER_EXPIRES |
-1L |
public static final long |
IP_NEVER_LOOKED_UP |
-2L |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final long |
MIN_ROBOTS_RETRIES |
3L |
public static final long |
ROBOTS_NOT_FETCHED |
-1L |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final long |
MAX_SIZE |
512000L |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
URI_HISTORY_DBNAME |
"uri_history" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_CONTENT_DIGEST |
"content-digest" |
public static final String |
A_CONTENT_DIGEST_COUNT |
"content-digest-count" |
public static final String |
A_CONTENT_DIGEST_HISTORY |
"content-digest-history" |
public static final String |
A_ETAG_HEADER |
"etag" |
public static final String |
A_FETCH_HISTORY |
"fetch-history" |
public static final String |
A_LAST_MODIFIED_HEADER |
"last-modified" |
public static final String |
A_ORIGINAL_DATE |
"content-written-date" |
public static final String |
A_ORIGINAL_URL |
"original-url" |
public static final String |
A_REFERENCE_LENGTH |
"reference-length" |
public static final String |
A_STATUS |
"status" |
public static final String |
A_WARC_FILE_OFFSET |
"warc-file-offset" |
public static final String |
A_WARC_FILENAME |
"warc-filename" |
public static final String |
A_WARC_RECORD_ID |
"warc-record-id" |
public static final String |
A_WRITE_TAG |
"write-tag" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
ARCHIVE_TIME_KEY |
"HTTP-Archive-Time" |
public static final String |
COLLECTION_KEY |
"HTTP-Collection" |
public static final String |
CONTENT_LENGTH_KEY |
"HTTP-Content-Length" |
public static final String |
CONTENT_MD5_KEY |
"HTTP-Content-MD5" |
public static final String |
CONTENT_TYPE_KEY |
"Content-Type" |
public static final String |
HARVESTER_KEY |
"HTTP-Harvester" |
public static final String |
HEADER_LENGTH_KEY |
"HTTP-Header-Length" |
public static final String |
HEADER_MD5_KEY |
"HTTP-Header-MD5" |
public static final String |
IP_ADDRESS_KEY |
"HTTP-IP-Address" |
public static final String |
STATUS_CODE_KEY |
"HTTP-Status-Code" |
public static final String |
URL_KEY |
"HTTP-URL" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
ATTR_MAX_BYTES_WRITTEN |
"total-bytes-to-write" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
A_MIRROR_PATH |
"mirror-path" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
ANNOTATION_UNWRITTEN |
"unwritten" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final long |
ADDRESS_BITS_PER_UNIT |
6L |
protected static final long |
BIT_INDEX_MASK |
63L |
protected static final boolean |
DEBUG |
false |
protected static final int |
NUMBER_OF_WEIGHTS |
2083 |
protected static final int |
SUBARRAY_LENGTH_IN_LONGS |
67108864 |
protected static final int |
SUBARRAY_MASK |
67108863 |
protected static final int |
SUBARRAY_POWER_OF_TWO |
26 |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final int |
DEFAULT_REPLICAS |
128 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final String |
TEST_TMP_SYSTEM_PROPERTY_NAME |
"testtmpdir" |
Modifier and Type | Constant Field | Value |
---|---|---|
protected static final String |
LIKELY_URI_PATH |
"(\\.{0,2}[^\\.\\n\\r\\s\"\']*(\\.[^\\.\\n\\r\\s\"\']+)+)" |
protected static final String |
NAIVE_LIKELY_URI_PATTERN |
"[^<>\\s]*[\\./][^<>\\s]*" |
protected static final String |
QNV |
"[a-zA-Z_]+=(?:[\\w-/.]|%[0-9a-fA-F]{2})+" |
protected static final String |
STRING_URI_DETECTOR |
"(?:\\w|[\\.]{0,2}/)[\\S&&[^<>]]*(?:\\.|/)[\\S&&[^<>]]*(?:\\w|/)" |
protected static final String |
VERY_LIKELY_RELATIVE_URI_PATTERN |
"(?:\\.?/)?(?:(?:[\\w-]+|\\.\\.)/)*(?:[\\w-]+(?:\\.[a-zA-Z0-9]{2,5})?)?(?:\\?(?:[a-zA-Z_]+=(?:[\\w-/.]|%[0-9a-fA-F]{2})+)(?:&(?:[a-zA-Z_]+=(?:[\\w-/.]|%[0-9a-fA-F]{2})+))*)?(?:#[\\w-]+)?" |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
DEFAULT_CAPACITY |
1048576 |
public static final int |
DEFAULT_SMEAR |
5 |
Modifier and Type | Constant Field | Value |
---|---|---|
public static final int |
BLOCK_SIZE |
512 |
Copyright © 2003-2014 Internet Archive. All Rights Reserved.