String cookieComment
String cookieDomain
Date cookieExpiryDate
Date
.String cookiePath
boolean isSecure
boolean hasPathAttribute
boolean hasDomainAttribute
int cookieVersion
String location
private void readObject(ObjectInputStream arg0) throws ClassNotFoundException, IOException
ClassNotFoundException
IOException
private void writeObject(ObjectOutputStream arg0) throws IOException
IOException
int hash
char[] _uri
String protocolCharset
char[] _scheme
char[] _opaque
char[] _authority
char[] _userinfo
char[] _host
int _port
char[] _path
char[] _query
char[] _fragment
boolean _is_hier_part
boolean _is_opaque_part
boolean _is_net_path
boolean _is_abs_path
boolean _is_rel_path
boolean _is_reg_name
boolean _is_server
boolean _is_hostname
boolean _is_IPv4address
boolean _is_IPv6reference
int reasonCode
String reason
int reasonCode
String reason
String cookieCommentURL
int[] cookiePorts
boolean discard
boolean hasPortAttribute
boolean isPortAttributeBlank
boolean hasVersionAttribute
CrawlController controller
CrawlController.State state
String message
CrawlURI curi
CrawlURIDispositionEvent.Disposition disposition
CrawlStatSnapshot snapshot
Checkpoint checkpoint
private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException
IOException
ClassNotFoundException
org.springframework.context.support.AbstractApplicationContext appCtx
CrawlMetadata metadata
ServerCache serverCache
Frontier frontier
ConfigPath scratchDir
StatisticsTracker statisticsTracker
SeedModule seeds
FetchChain fetchChain
DispositionChain dispositionChain
CandidateChain candidateChain
int maxToeThreads
boolean runWhileEmpty
boolean pauseAtStart
int recorderOutBufferBytes
int recorderInBufferBytes
CrawlerLoggerModule loggerModule
boolean isRunning
boolean hasStarted
boolean isStopComplete
Checkpoint recoveryCheckpoint
ServerCache serverCache
ServerCache serverCache
KeyedProperties kp
LongToIntConsistentHash conhash
String classKey
boolean active
long count
long enqueueCount
boolean isManaged
long wakeTime
PrecedenceProvider precedenceProvider
int sessionBudget
int lastCost
long costCount
long totalExpenditure
long expenditureAtLastActivation
long totalBudget
String lastQueued
String lastPeeked
long lastDequeueTime
long errorCount
FetchStats substats
boolean retired
KeyedProperties kp
KeyedProperties kp
Histotable<K> enqueuedCounts
BdbModule bdb
com.sleepycat.collections.StoredSortedMap<K,V> store
com.sleepycat.je.Database historyDb
CrawlJob crawlJob
boolean createdEnvironment
long lastCacheMiss
long lastCacheMissDiff
AtomicLong count
long aggregatedLookupTime
BdbModule bdb
String beanName
boolean isRunning
Checkpoint recoveryCheckpoint
BloomFilter bloom
LongFPSet fpset
UriUniqFilter.CrawlUriReceiver receiver
PrintWriter profileLog
long duplicateCount
long duplicatesAtLastSample
int maxsize
ConcurrentMap<K,V> set
long smallestKnownValue
String smallestKnownKey
long largestKnownValue
String largestKnownKey
IOException decoratedIOException
KeyedProperties kp
Map<K,V> availableRobotsPolicies
String operator
String description
String audience
String organization
String jobName
private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException
IOException
ClassNotFoundException
private void writeObject(ObjectOutputStream stream) throws IOException
IOException
UURI uuri
boolean isSeed
String pathFromSeed
P precondition R redirection E embedded (as frame, src, link, codebase, etc.) X speculative embed (as from javascript, some alternate-format extractors L linkFor example LLLE (an embedded image on a page 3 links from seed).
UURI via
LinkContext viaContext
int schedulingDirective
String classKey
int precedence
int fetchStatus
int deferrals
int fetchAttempts
String userAgent
long contentSize
long contentLength
Map<K,V> data
The attribute list is a flexible map of key/value pairs for storing
status of this URI for use by other processors. By convention the
attribute list is keyed by constants found in the
CoreAttributeConstants
interface. Use this list to carry
data or state produced by custom processors rather change the
classes CrawlURI
or this class, CrawlURI.
boolean forceRevisit
String contentType
boolean prerequisite
PreconditionEnforcer
.
This flag is used at least inside in the precondition enforcer so that
subsequent prerequisite tests know to let this CrawlURI through because
its a prerequisite needed by an earlier prerequisite tests (e.g. If
this is a robots.txt, then the subsequent login credentials prereq
test must not throw it out because its not a login curi).CrawlURI.FetchType fetchType
long ordinal
byte[] contentDigest
String contentDigestScheme
int holderCost
String canonicalString
long politenessDelay
long rescheduleTime
org.json.JSONObject extraInfo
KeyedProperties kp
String domain
KeyedProperties kp
KeyedProperties kp
String comment
SimpleFileLoggerProvider loggerModule
String beanName
boolean isRunning
ExternalGeoLookupInterface lookup
List<E> countryCodes
ServerCache serverCache
String engineName
org.archive.io.ReadSource scriptSource
boolean isolateThreads
org.springframework.context.ApplicationContext appCtx
org.archive.io.ReadSource surtsSource
boolean seedsAsSurtPrefixes
ConfigFile surtsDumpFile
SeedModule seeds
org.archive.util.SurtPrefixSet surtPrefixes
String beanName
Checkpoint recoveryCheckpoint
String path
CharSequence source
CharSequence destination
LinkContext context
Hop hop
Map<K,V> data
See further Link.getData()
String desc
ObjectIdentityCache<V extends IdentityCacheable> servers
ObjectIdentityCache<V extends IdentityCacheable> hosts
long totalScheduled
long fetchSuccesses
long fetchFailures
long fetchDisregards
long fetchResponses
long robotsDenials
long successBytes
long totalBytes
long fetchNonResponses
long novelBytes
long novelUrls
long notModifiedBytes
long notModifiedUrls
long dupByHashBytes
long dupByHashUrls
long lastSuccessTime
BdbModule bdb
boolean isRunning
boolean isCheckpointRecovery
String hostname
String countryCode
InetAddress ip
long ipFetched
FetchStats substats
long ipTTL
TTL a 32 bit unsigned integer that specifies the time interval (in seconds) that the resource record may be cached before it should be discarded. Zero values are interpreted to mean that the RR can only be used for the transaction in progress, and should not be cached.
long earliestNextURIEmitTime
String server
int port
Robotstxt robotstxt
long robotsFetched
boolean validRobots
FetchStats substats
int consecutiveConnectionErrors
ConcurrentSkipListSet<E> disallows
ConcurrentSkipListSet<E> allows
float crawlDelay
LinkedList<E> namedUserAgents
Map<K,V> agentsToDirectives
RobotsDirectives wildcardDirectives
boolean hasErrors
boolean sourceTagSeeds
Set<E> seedListeners
org.archive.io.ReadSource textSource
int blockAwaitingSeedLines
private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException
IOException
ClassNotFoundException
private void writeObject(ObjectOutputStream stream) throws IOException
IOException
String name
String path
ConfigPath base
ConfigPathConfigurer configurer
int capacityPowerOfTwo
float loadFactor
long count
long m
int power
long expectedInserts
int d
long[][] bits
long[][] weight
int size
AtomicLong count
AtomicLong cacheHit
AtomicLong countOfGets
ObjectIdentityBdbCache.get(java.lang.String)
method was called.AtomicLong diskHit
AtomicLong supplierUsed
AtomicLong expungeStatsDiskPut
AtomicLong count
AtomicLong cacheHit
AtomicLong countOfGets
ObjectIdentityBdbManualCache.get(java.lang.String)
method was called.AtomicLong diskHit
AtomicLong supplierUsed
AtomicLong evictions
Copyright © 2003-2014 Internet Archive. All Rights Reserved.