An accessor to an inverted file. More...
#include <CAcInvertedFile.h>
Public Member Functions | |
virtual bool | operator() () const =0 |
for testing if the inverted file is correctly constructed | |
virtual string | IDToURL (TID inID) const =0 |
<HIER-WIRDS-INTERESSANT-> | |
virtual pair< bool, TID > | URLToID (const string &inURL) const =0 |
Translate an URL to its document ID. | |
virtual list< TID > * | getAllFeatureIDs () const =0 |
Getting a list of all features contained in this. | |
bool | operator() () const |
for testing if the inverted file is correctly constructed | |
CAcInvertedFile (const CXMLElement &inCollectionElement) | |
This opens an exsisting inverted file, and then inits this structure. | |
bool | init (bool) |
called by constructors | |
~CAcInvertedFile () | |
Destructor. | |
string | IDToURL (TID inID) const |
Translate a DocumentID to a URL (for output) | |
TID | URLToID (const string &inURL) const |
Translate an URL to its document ID. | |
TID | getMaximumFeatureID () const |
This is interesting for browsing. | |
list< TID > * | getAllFeatureIDs () const |
Getting a list of all features contained in this. | |
The proper inverted file access | |
virtual CDocumentFrequencyList * | FeatureToList (TFeatureID inFID) const =0 |
Give the List of documents containing the feature inFID. | |
virtual CDocumentFrequencyList * | URLToFeatureList (string inURL) const =0 |
List of features contained by a document with URL inURL. | |
virtual CDocumentFrequencyList * | DIDToFeatureList (TID inDID) const =0 |
List of features contained by a document with ID inDID. | |
CDocumentFrequencyList * | FeatureToList (TFeatureID) const |
List of documents containing the feature. | |
CDocumentFrequencyList * | URLToFeatureList (string inURL) const |
List of features contained by a document. | |
CDocumentFrequencyList * | DIDToFeatureList (TID inDID) const |
List of features contained by a document with ID inDID. | |
Accessing information about features | |
virtual double | FeatureToCollectionFrequency (TFeatureID) const =0 |
Collection frequency for a given feature. | |
virtual unsigned int | getFeatureDescription (TID inFeatureID) const =0 |
What kind of feature is the feature with ID inFeatureID? | |
double | FeatureToCollectionFrequency (TFeatureID) const |
Collection frequency for a given feature. | |
unsigned int | getFeatureDescription (TID inFeatureID) const |
What kind of feature is the feature with ID inFeatureID? | |
Accessing additional document information | |
virtual double | DIDToMaxDocumentFrequency (TID) const =0 |
returns the maximum document frequency for one document ID | |
virtual double | DIDToDFSquareSum (TID) const =0 |
Returns the document-frequency square sum for a given document ID. | |
virtual double | DIDToSquareDFLogICFSum (TID) const =0 |
Returns this function for a given document ID. | |
double | DIDToMaxDocumentFrequency (TID) const |
returns the maximum document frequency for one document ID | |
double | DIDToDFSquareSum (TID) const |
Returns the document-frequency square sum for a given document ID. | |
double | DIDToSquareDFLogICFSum (TID) const |
Returns this function for a given document ID. | |
virtual bool | generateInvertedFile ()=0 |
Generating an inverted File, if there is none. | |
virtual bool | checkConsistency ()=0 |
Check the consistency of the inverted file system accessed by this accessor. | |
bool | generateInvertedFile () |
Generating an inverted File, if there is none. | |
bool | newGenerateInvertedFile () |
Generating an inverted File, if there is none. | |
bool | checkConsistency () |
Check the consistency of the inverted file system accessed by this accessor. | |
bool | findWithinStream (TID inFeatureID, TID inDocumentID, double inDocumentFrequency) const |
Is the Document with inDocumentID contained in the document frequency list of the feature inFeatureID and is the associated document frequency the same? | |
![]() | |
const string & | getURLToFeatureFileName () const |
gives back the content of mURLToFeatureFileName | |
CAcURL2FTS (const CXMLElement &inContentElement) | |
Constructor: slurp in an url2fts file and fill the maps. | |
virtual | operator bool () const |
Is this accessor up and working? | |
virtual int | size () const |
Give the number of elements stored in this accessor. | |
pair< bool, string > | URLToFFN (const string &inURL) const |
gives the feature file name which corresponds to a given URL return value: pair of bool (does the feature file exsist) string (the feature file name) | |
pair< bool, string > | IDToFFN (TID inID) const |
gives the feature file name which corresponds to a given URL return value: pair of bool (does the feature file exsist) string (the feature file name) | |
![]() | |
virtual pair< bool, CAccessorElement > | IDToAccessorElement (TID inID) const |
Translate a DocumentID to an accessor Element. | |
void | getAllIDs (list< TID > &) const |
This is useful for making lists of which images are present in a database etc. | |
void | getAllAccessorElements (list< CAccessorElement > &) const |
List of triplets (ID,imageURL,thumbnailURL) of all the documents present in the inverted file. | |
void | getRandomIDs (list< TID > &, list< TID >::size_type) const |
get a given number of random AccessorElement's | |
void | getRandomAccessorElements (list< CAccessorElement > &outResult, list< CAccessorElement >::size_type inSize) const |
For drawing random sets. | |
![]() | |
virtual | ~CAccessor () |
virtual accessor for clean destruction | |
virtual CXMLElement * | prepareDatabase () |
If a new collection is created during runtime, this function prepares the indexing structures such that they are able to accept new objects. | |
virtual bool | isPreparedDatabase () const |
Is the database accessed by this accessor prepared? In other words: is there an index structure to access? |
Protected Types | |
typedef hash_map< TID, unsigned int > | CIDToOffset |
map from feature id to the offset for this feature |
Protected Member Functions | |
void | writeOffsetFileElement (TID inFeatureID, int inPosition, ostream &inOpenOffsetFile) |
add a pair of FeatureID,Offset to the open offset file (helper function for inverted file construction) | |
CDocumentFrequencyList * | getFeatureFile (string inFileName) const |
loads a *.fts file. | |
![]() | |
virtual void | dummy () const |
without this function things like upcasting etc. |
Protected Attributes | |
TID | mMaximumFeatureID |
the maximum feature ID arising in this file | |
CArraySelfDestroyPointer< char > | mInvertedFileBuffer |
A buffer, if the inverted file is to be held in ram. | |
CSelfDestroyPointer< istream > | mInvertedFile |
The inverted file. | |
ifstream | mOffsetFile |
Feature -> Offset in inverted file. | |
ifstream | mFeatureDescriptionFile |
File of feature descriptions. | |
string | mInvertedFileName |
Name of the inverted file. | |
string | mOffsetFileName |
Name of the Offset file. | |
string | mFeatureDescriptionFileName |
Name for the file with the feature description. | |
CIDToOffset | mIDToOffset |
map from feature id to the offset for this feature | |
hash_map< TID, double > | mFeatureToCollectionFrequency |
map from feature to the collection frequency | |
for fast access... | |
hash_map< TID, unsigned int > | mFeatureDescription |
map from the feature ID to the feature description | |
CADIHash | mDocumentInformation |
additional information about the document like, e.g. | |
![]() | |
TID | mID |
the ID of the next element | |
string | mURLPrefix |
the url-prefix for the image list | |
string | mThumbnailURLPrefix |
the thumbnail-url-prefix for the image list | |
CMutex | mMutexURL2FTS |
the mutex for multithreading the name is intended to be unique and immune against inheritance... | |
string_string_map | mURLToFFN |
map from the url of an image to the name of the feature file for this image | |
TID_string_map | mIDToFFN |
map from the id of an image to the name of the feature file for this image | |
ifstream | mURLToFeatureFile |
URL -> FeatureFileName. | |
string | mURLToFeatureFileName |
Name of the file that contains pairs of URL and the Feature file that belongs to the URL. | |
![]() | |
string_TID_map | mURLToID |
map the url of an image to the id of this image | |
TID_CAccessorElement_map | mIDToAccessorElement |
maps the ID of an image to the URL of this image |
An accessor to an inverted file.
This access is done "by hand" at present this not really efficient, however we plan to move to memory mapped files.
CAcInvertedFile::CAcInvertedFile | ( | const CXMLElement & | inCollectionElement | ) |
This opens an exsisting inverted file, and then inits this structure.
After that it is fully usable
As a paramter it takes an XMLElement which contains a "collection" element and its content.
If the attribute vi-generate-inverted-file is true, then a new inverted file will be generated using the parameters given in inCollectionElement. you will NOT be able to use *this afterwards.
The REAL constructor.
|
pure virtual |
Check the consistency of the inverted file system accessed by this accessor.
Implemented in CAcIFFileSystem.
bool CAcInvertedFile::checkConsistency | ( | ) |
Check the consistency of the inverted file system accessed by this accessor.
Reimplemented in CAcIFFileSystem.
|
pure virtual |
Give the List of documents containing the feature inFID.
CORNELIA: CDocumentFrequencyList ist nichts anderes als eine liste von
int,float paaren:
struct{ int mID, float mFrequency; }
Implemented in CAcIFFileSystem.
bool CAcInvertedFile::generateInvertedFile | ( | ) |
Generating an inverted File, if there is none.
Fast but stupid in-memory method. This method is very fast, if all the inverted file (and a bit more) can be kept in memory at runtime. If this is not the case, extensive swapping is the result, virtually halting the inverted file creation.
Reimplemented in CAcIFFileSystem.
|
pure virtual |
Getting a list of all features contained in this.
This function is necessary, because in the present system only about 50 percent of the features are really used.
A feature is considered used if it arises in at least one image
Implemented in CAcIFFileSystem.
list<TID>* CAcInvertedFile::getAllFeatureIDs | ( | ) | const |
Getting a list of all features contained in this.
This function is necessary, because in the present system only about 50 percent of the features are really used.
A feature is considered used if it arises in mIDToOffset.
Reimplemented in CAcIFFileSystem.
|
protected |
|
pure virtual |
<HIER-WIRDS-INTERESSANT->
Translate a DocumentID to a URL (for output)
Implements CAccessor.
Implemented in CAcIFFileSystem.
bool CAcInvertedFile::newGenerateInvertedFile | ( | ) |
Generating an inverted File, if there is none.
Employing the two-way-merge method described in "managing gigabytes", chapter 5.2. Sort-based inversion. (Page 181)
Reimplemented in CAcIFFileSystem.
|
protected |
additional information about the document like, e.g.
the euclidean length of the feature list.